gsd-trae 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (761) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/assets/screenshot.png +0 -0
  3. package/package.json +9 -2
  4. package/.claude/settings.local.json +0 -8
  5. package/.gitmodules +0 -6
  6. package/.trae/rules/project_rules.md +0 -56
  7. package/.vscode/code-counter/code-counter.db +0 -0
  8. package/.vscode/settings.json +0 -6
  9. package/refs/gsd/.github/CODEOWNERS +0 -2
  10. package/refs/gsd/.github/FUNDING.yml +0 -1
  11. package/refs/gsd/.github/ISSUE_TEMPLATE/bug_report.yml +0 -59
  12. package/refs/gsd/.github/ISSUE_TEMPLATE/feature_request.yml +0 -37
  13. package/refs/gsd/.github/pull_request_template.md +0 -24
  14. package/refs/gsd/.github/workflows/auto-label-issues.yml +0 -21
  15. package/refs/gsd/CHANGELOG.md +0 -1520
  16. package/refs/gsd/LICENSE +0 -21
  17. package/refs/gsd/README.md +0 -704
  18. package/refs/gsd/SECURITY.md +0 -33
  19. package/refs/gsd/agents/gsd-codebase-mapper.md +0 -764
  20. package/refs/gsd/agents/gsd-debugger.md +0 -1246
  21. package/refs/gsd/agents/gsd-executor.md +0 -469
  22. package/refs/gsd/agents/gsd-integration-checker.md +0 -443
  23. package/refs/gsd/agents/gsd-phase-researcher.md +0 -546
  24. package/refs/gsd/agents/gsd-plan-checker.md +0 -690
  25. package/refs/gsd/agents/gsd-planner.md +0 -1275
  26. package/refs/gsd/agents/gsd-project-researcher.md +0 -621
  27. package/refs/gsd/agents/gsd-research-synthesizer.md +0 -239
  28. package/refs/gsd/agents/gsd-roadmapper.md +0 -642
  29. package/refs/gsd/agents/gsd-verifier.md +0 -573
  30. package/refs/gsd/assets/gsd-logo-2000-transparent.png +0 -0
  31. package/refs/gsd/assets/gsd-logo-2000-transparent.svg +0 -17
  32. package/refs/gsd/assets/gsd-logo-2000.png +0 -0
  33. package/refs/gsd/assets/gsd-logo-2000.svg +0 -21
  34. package/refs/gsd/assets/terminal.svg +0 -68
  35. package/refs/gsd/bin/install.js +0 -2090
  36. package/refs/gsd/commands/gsd/add-phase.md +0 -43
  37. package/refs/gsd/commands/gsd/add-tests.md +0 -41
  38. package/refs/gsd/commands/gsd/add-todo.md +0 -47
  39. package/refs/gsd/commands/gsd/audit-milestone.md +0 -36
  40. package/refs/gsd/commands/gsd/check-todos.md +0 -45
  41. package/refs/gsd/commands/gsd/cleanup.md +0 -18
  42. package/refs/gsd/commands/gsd/complete-milestone.md +0 -136
  43. package/refs/gsd/commands/gsd/debug.md +0 -167
  44. package/refs/gsd/commands/gsd/discuss-phase.md +0 -83
  45. package/refs/gsd/commands/gsd/execute-phase.md +0 -41
  46. package/refs/gsd/commands/gsd/health.md +0 -22
  47. package/refs/gsd/commands/gsd/help.md +0 -22
  48. package/refs/gsd/commands/gsd/insert-phase.md +0 -32
  49. package/refs/gsd/commands/gsd/join-discord.md +0 -18
  50. package/refs/gsd/commands/gsd/list-phase-assumptions.md +0 -46
  51. package/refs/gsd/commands/gsd/map-codebase.md +0 -71
  52. package/refs/gsd/commands/gsd/new-milestone.md +0 -44
  53. package/refs/gsd/commands/gsd/new-project.md +0 -42
  54. package/refs/gsd/commands/gsd/new-project.md.bak +0 -1041
  55. package/refs/gsd/commands/gsd/pause-work.md +0 -38
  56. package/refs/gsd/commands/gsd/plan-milestone-gaps.md +0 -34
  57. package/refs/gsd/commands/gsd/plan-phase.md +0 -45
  58. package/refs/gsd/commands/gsd/progress.md +0 -24
  59. package/refs/gsd/commands/gsd/quick.md +0 -41
  60. package/refs/gsd/commands/gsd/reapply-patches.md +0 -110
  61. package/refs/gsd/commands/gsd/remove-phase.md +0 -31
  62. package/refs/gsd/commands/gsd/research-phase.md +0 -189
  63. package/refs/gsd/commands/gsd/resume-work.md +0 -40
  64. package/refs/gsd/commands/gsd/set-profile.md +0 -34
  65. package/refs/gsd/commands/gsd/settings.md +0 -36
  66. package/refs/gsd/commands/gsd/update.md +0 -37
  67. package/refs/gsd/commands/gsd/verify-work.md +0 -38
  68. package/refs/gsd/docs/USER-GUIDE.md +0 -471
  69. package/refs/gsd/docs/context-monitor.md +0 -96
  70. package/refs/gsd/get-shit-done/bin/gsd-tools.cjs +0 -585
  71. package/refs/gsd/get-shit-done/bin/lib/commands.cjs +0 -553
  72. package/refs/gsd/get-shit-done/bin/lib/config.cjs +0 -162
  73. package/refs/gsd/get-shit-done/bin/lib/core.cjs +0 -411
  74. package/refs/gsd/get-shit-done/bin/lib/frontmatter.cjs +0 -299
  75. package/refs/gsd/get-shit-done/bin/lib/init.cjs +0 -710
  76. package/refs/gsd/get-shit-done/bin/lib/milestone.cjs +0 -215
  77. package/refs/gsd/get-shit-done/bin/lib/phase.cjs +0 -870
  78. package/refs/gsd/get-shit-done/bin/lib/roadmap.cjs +0 -298
  79. package/refs/gsd/get-shit-done/bin/lib/state.cjs +0 -521
  80. package/refs/gsd/get-shit-done/bin/lib/template.cjs +0 -222
  81. package/refs/gsd/get-shit-done/bin/lib/verify.cjs +0 -772
  82. package/refs/gsd/get-shit-done/references/checkpoints.md +0 -776
  83. package/refs/gsd/get-shit-done/references/continuation-format.md +0 -249
  84. package/refs/gsd/get-shit-done/references/decimal-phase-calculation.md +0 -65
  85. package/refs/gsd/get-shit-done/references/git-integration.md +0 -248
  86. package/refs/gsd/get-shit-done/references/git-planning-commit.md +0 -38
  87. package/refs/gsd/get-shit-done/references/model-profile-resolution.md +0 -34
  88. package/refs/gsd/get-shit-done/references/model-profiles.md +0 -92
  89. package/refs/gsd/get-shit-done/references/phase-argument-parsing.md +0 -61
  90. package/refs/gsd/get-shit-done/references/planning-config.md +0 -196
  91. package/refs/gsd/get-shit-done/references/questioning.md +0 -145
  92. package/refs/gsd/get-shit-done/references/tdd.md +0 -263
  93. package/refs/gsd/get-shit-done/references/ui-brand.md +0 -160
  94. package/refs/gsd/get-shit-done/references/verification-patterns.md +0 -612
  95. package/refs/gsd/get-shit-done/templates/DEBUG.md +0 -164
  96. package/refs/gsd/get-shit-done/templates/UAT.md +0 -247
  97. package/refs/gsd/get-shit-done/templates/VALIDATION.md +0 -76
  98. package/refs/gsd/get-shit-done/templates/codebase/architecture.md +0 -255
  99. package/refs/gsd/get-shit-done/templates/codebase/concerns.md +0 -310
  100. package/refs/gsd/get-shit-done/templates/codebase/conventions.md +0 -307
  101. package/refs/gsd/get-shit-done/templates/codebase/integrations.md +0 -280
  102. package/refs/gsd/get-shit-done/templates/codebase/stack.md +0 -186
  103. package/refs/gsd/get-shit-done/templates/codebase/structure.md +0 -285
  104. package/refs/gsd/get-shit-done/templates/codebase/testing.md +0 -480
  105. package/refs/gsd/get-shit-done/templates/config.json +0 -37
  106. package/refs/gsd/get-shit-done/templates/context.md +0 -283
  107. package/refs/gsd/get-shit-done/templates/continue-here.md +0 -78
  108. package/refs/gsd/get-shit-done/templates/debug-subagent-prompt.md +0 -91
  109. package/refs/gsd/get-shit-done/templates/discovery.md +0 -146
  110. package/refs/gsd/get-shit-done/templates/milestone-archive.md +0 -123
  111. package/refs/gsd/get-shit-done/templates/milestone.md +0 -115
  112. package/refs/gsd/get-shit-done/templates/phase-prompt.md +0 -569
  113. package/refs/gsd/get-shit-done/templates/planner-subagent-prompt.md +0 -117
  114. package/refs/gsd/get-shit-done/templates/project.md +0 -184
  115. package/refs/gsd/get-shit-done/templates/requirements.md +0 -231
  116. package/refs/gsd/get-shit-done/templates/research-project/ARCHITECTURE.md +0 -204
  117. package/refs/gsd/get-shit-done/templates/research-project/FEATURES.md +0 -147
  118. package/refs/gsd/get-shit-done/templates/research-project/PITFALLS.md +0 -200
  119. package/refs/gsd/get-shit-done/templates/research-project/STACK.md +0 -120
  120. package/refs/gsd/get-shit-done/templates/research-project/SUMMARY.md +0 -170
  121. package/refs/gsd/get-shit-done/templates/research.md +0 -552
  122. package/refs/gsd/get-shit-done/templates/retrospective.md +0 -54
  123. package/refs/gsd/get-shit-done/templates/roadmap.md +0 -202
  124. package/refs/gsd/get-shit-done/templates/state.md +0 -176
  125. package/refs/gsd/get-shit-done/templates/summary-complex.md +0 -59
  126. package/refs/gsd/get-shit-done/templates/summary-minimal.md +0 -41
  127. package/refs/gsd/get-shit-done/templates/summary-standard.md +0 -48
  128. package/refs/gsd/get-shit-done/templates/summary.md +0 -248
  129. package/refs/gsd/get-shit-done/templates/user-setup.md +0 -311
  130. package/refs/gsd/get-shit-done/templates/verification-report.md +0 -322
  131. package/refs/gsd/get-shit-done/workflows/add-phase.md +0 -111
  132. package/refs/gsd/get-shit-done/workflows/add-tests.md +0 -350
  133. package/refs/gsd/get-shit-done/workflows/add-todo.md +0 -157
  134. package/refs/gsd/get-shit-done/workflows/audit-milestone.md +0 -297
  135. package/refs/gsd/get-shit-done/workflows/check-todos.md +0 -176
  136. package/refs/gsd/get-shit-done/workflows/cleanup.md +0 -152
  137. package/refs/gsd/get-shit-done/workflows/complete-milestone.md +0 -763
  138. package/refs/gsd/get-shit-done/workflows/diagnose-issues.md +0 -219
  139. package/refs/gsd/get-shit-done/workflows/discovery-phase.md +0 -289
  140. package/refs/gsd/get-shit-done/workflows/discuss-phase.md +0 -542
  141. package/refs/gsd/get-shit-done/workflows/execute-phase.md +0 -449
  142. package/refs/gsd/get-shit-done/workflows/execute-plan.md +0 -448
  143. package/refs/gsd/get-shit-done/workflows/health.md +0 -156
  144. package/refs/gsd/get-shit-done/workflows/help.md +0 -489
  145. package/refs/gsd/get-shit-done/workflows/insert-phase.md +0 -129
  146. package/refs/gsd/get-shit-done/workflows/list-phase-assumptions.md +0 -178
  147. package/refs/gsd/get-shit-done/workflows/map-codebase.md +0 -315
  148. package/refs/gsd/get-shit-done/workflows/new-milestone.md +0 -382
  149. package/refs/gsd/get-shit-done/workflows/new-project.md +0 -1116
  150. package/refs/gsd/get-shit-done/workflows/pause-work.md +0 -122
  151. package/refs/gsd/get-shit-done/workflows/plan-milestone-gaps.md +0 -274
  152. package/refs/gsd/get-shit-done/workflows/plan-phase.md +0 -569
  153. package/refs/gsd/get-shit-done/workflows/progress.md +0 -381
  154. package/refs/gsd/get-shit-done/workflows/quick.md +0 -453
  155. package/refs/gsd/get-shit-done/workflows/remove-phase.md +0 -154
  156. package/refs/gsd/get-shit-done/workflows/research-phase.md +0 -73
  157. package/refs/gsd/get-shit-done/workflows/resume-project.md +0 -306
  158. package/refs/gsd/get-shit-done/workflows/set-profile.md +0 -80
  159. package/refs/gsd/get-shit-done/workflows/settings.md +0 -213
  160. package/refs/gsd/get-shit-done/workflows/transition.md +0 -544
  161. package/refs/gsd/get-shit-done/workflows/update.md +0 -219
  162. package/refs/gsd/get-shit-done/workflows/verify-phase.md +0 -242
  163. package/refs/gsd/get-shit-done/workflows/verify-work.md +0 -569
  164. package/refs/gsd/hooks/gsd-check-update.js +0 -62
  165. package/refs/gsd/hooks/gsd-context-monitor.js +0 -122
  166. package/refs/gsd/hooks/gsd-statusline.js +0 -108
  167. package/refs/gsd/package.json +0 -50
  168. package/refs/gsd/scripts/build-hooks.js +0 -43
  169. package/refs/gsd/tests/commands.test.cjs +0 -661
  170. package/refs/gsd/tests/helpers.cjs +0 -40
  171. package/refs/gsd/tests/init.test.cjs +0 -205
  172. package/refs/gsd/tests/milestone.test.cjs +0 -98
  173. package/refs/gsd/tests/phase.test.cjs +0 -1241
  174. package/refs/gsd/tests/roadmap.test.cjs +0 -265
  175. package/refs/gsd/tests/state.test.cjs +0 -302
  176. package/refs/gsd/tests/verify.test.cjs +0 -80
  177. package/refs/vbenchmark/.agent/agents/codebase-explorer.md +0 -224
  178. package/refs/vbenchmark/.agent/agents/debugger.md +0 -180
  179. package/refs/vbenchmark/.agent/agents/documenter.md +0 -166
  180. package/refs/vbenchmark/.agent/agents/implementer.md +0 -70
  181. package/refs/vbenchmark/.agent/agents/orchestrator.md +0 -212
  182. package/refs/vbenchmark/.agent/agents/researcher.md +0 -80
  183. package/refs/vbenchmark/.agent/agents/reviewer.md +0 -184
  184. package/refs/vbenchmark/.agent/agents/tester.md +0 -170
  185. package/refs/vbenchmark/.agent/commands/commit.md +0 -29
  186. package/refs/vbenchmark/.agent/commands/debug.md +0 -59
  187. package/refs/vbenchmark/.agent/commands/document.md +0 -52
  188. package/refs/vbenchmark/.agent/commands/gather-context.md +0 -58
  189. package/refs/vbenchmark/.agent/commands/init.md +0 -56
  190. package/refs/vbenchmark/.agent/commands/preset-help.md +0 -50
  191. package/refs/vbenchmark/.agent/commands/refactor.md +0 -71
  192. package/refs/vbenchmark/.agent/commands/research.md +0 -37
  193. package/refs/vbenchmark/.agent/commands/review.md +0 -38
  194. package/refs/vbenchmark/.agent/commands/test.md +0 -61
  195. package/refs/vbenchmark/.agent/rules/01-code-quality.md +0 -33
  196. package/refs/vbenchmark/.agent/rules/02-typescript-go.md +0 -46
  197. package/refs/vbenchmark/.agent/rules/03-security-git.md +0 -34
  198. package/refs/vbenchmark/.agent/rules/04-architecture.md +0 -40
  199. package/refs/vbenchmark/.agent/sync.js +0 -536
  200. package/refs/vbenchmark/.agent/workflows/commit.md +0 -29
  201. package/refs/vbenchmark/.agent/workflows/debug.md +0 -59
  202. package/refs/vbenchmark/.agent/workflows/document.md +0 -52
  203. package/refs/vbenchmark/.agent/workflows/gather-context.md +0 -58
  204. package/refs/vbenchmark/.agent/workflows/init.md +0 -56
  205. package/refs/vbenchmark/.agent/workflows/preset-help.md +0 -50
  206. package/refs/vbenchmark/.agent/workflows/refactor.md +0 -71
  207. package/refs/vbenchmark/.agent/workflows/research.md +0 -37
  208. package/refs/vbenchmark/.agent/workflows/review.md +0 -38
  209. package/refs/vbenchmark/.agent/workflows/test.md +0 -61
  210. package/refs/vbenchmark/.claude/commands/agentic-dev/apply.md +0 -222
  211. package/refs/vbenchmark/.claude/commands/agentic-dev/done.md +0 -166
  212. package/refs/vbenchmark/.claude/commands/agentic-dev/proposal.md +0 -220
  213. package/refs/vbenchmark/.claude/commands/openspec/apply.md +0 -23
  214. package/refs/vbenchmark/.claude/commands/openspec/archive.md +0 -27
  215. package/refs/vbenchmark/.claude/commands/openspec/proposal.md +0 -28
  216. package/refs/vbenchmark/.clinerules/01-rules.md +0 -73
  217. package/refs/vbenchmark/.clinerules/02-agents.md +0 -34
  218. package/refs/vbenchmark/.cursor/commands/commit.md +0 -29
  219. package/refs/vbenchmark/.cursor/commands/debug.md +0 -59
  220. package/refs/vbenchmark/.cursor/commands/document.md +0 -52
  221. package/refs/vbenchmark/.cursor/commands/gather-context.md +0 -58
  222. package/refs/vbenchmark/.cursor/commands/init.md +0 -56
  223. package/refs/vbenchmark/.cursor/commands/preset-help.md +0 -50
  224. package/refs/vbenchmark/.cursor/commands/refactor.md +0 -71
  225. package/refs/vbenchmark/.cursor/commands/research.md +0 -37
  226. package/refs/vbenchmark/.cursor/commands/review.md +0 -38
  227. package/refs/vbenchmark/.cursor/commands/test.md +0 -61
  228. package/refs/vbenchmark/.cursor/rules/agents.mdc +0 -1357
  229. package/refs/vbenchmark/.factory/droids/codebase-explorer.md +0 -224
  230. package/refs/vbenchmark/.factory/droids/debugger.md +0 -180
  231. package/refs/vbenchmark/.factory/droids/documenter.md +0 -166
  232. package/refs/vbenchmark/.factory/droids/implementer.md +0 -70
  233. package/refs/vbenchmark/.factory/droids/orchestrator.md +0 -212
  234. package/refs/vbenchmark/.factory/droids/researcher.md +0 -80
  235. package/refs/vbenchmark/.factory/droids/reviewer.md +0 -184
  236. package/refs/vbenchmark/.factory/droids/tester.md +0 -170
  237. package/refs/vbenchmark/.gemini/workflows/commit.md +0 -29
  238. package/refs/vbenchmark/.gemini/workflows/debug.md +0 -59
  239. package/refs/vbenchmark/.gemini/workflows/document.md +0 -52
  240. package/refs/vbenchmark/.gemini/workflows/gather-context.md +0 -58
  241. package/refs/vbenchmark/.gemini/workflows/init.md +0 -56
  242. package/refs/vbenchmark/.gemini/workflows/preset-help.md +0 -50
  243. package/refs/vbenchmark/.gemini/workflows/refactor.md +0 -71
  244. package/refs/vbenchmark/.gemini/workflows/research.md +0 -37
  245. package/refs/vbenchmark/.gemini/workflows/review.md +0 -38
  246. package/refs/vbenchmark/.gemini/workflows/test.md +0 -61
  247. package/refs/vbenchmark/.github/CODEOWNERS +0 -20
  248. package/refs/vbenchmark/.github/FUNDING.yml +0 -4
  249. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/bug-report.yml +0 -76
  250. package/refs/vbenchmark/.github/ISSUE_TEMPLATE/new-task.yml +0 -106
  251. package/refs/vbenchmark/.github/PULL_REQUEST_TEMPLATE.md +0 -38
  252. package/refs/vbenchmark/.github/copilot-instructions.md +0 -73
  253. package/refs/vbenchmark/.github/workflows/ci.yaml +0 -33
  254. package/refs/vbenchmark/.github/workflows/vercel-auto-pr.yml +0 -478
  255. package/refs/vbenchmark/.github/workflows/vercel-deploy.yaml +0 -487
  256. package/refs/vbenchmark/.github/workflows/vercel-pr-command.yaml +0 -337
  257. package/refs/vbenchmark/.github/workflows/vercel-project-init.yaml +0 -208
  258. package/refs/vbenchmark/.opencode/agent/codebase-explorer.md +0 -224
  259. package/refs/vbenchmark/.opencode/agent/debugger.md +0 -180
  260. package/refs/vbenchmark/.opencode/agent/documenter.md +0 -166
  261. package/refs/vbenchmark/.opencode/agent/implementer.md +0 -70
  262. package/refs/vbenchmark/.opencode/agent/orchestrator.md +0 -212
  263. package/refs/vbenchmark/.opencode/agent/researcher.md +0 -80
  264. package/refs/vbenchmark/.opencode/agent/reviewer.md +0 -184
  265. package/refs/vbenchmark/.opencode/agent/tester.md +0 -170
  266. package/refs/vbenchmark/.opencode/command/commit.md +0 -29
  267. package/refs/vbenchmark/.opencode/command/debug.md +0 -59
  268. package/refs/vbenchmark/.opencode/command/document.md +0 -52
  269. package/refs/vbenchmark/.opencode/command/gather-context.md +0 -58
  270. package/refs/vbenchmark/.opencode/command/init.md +0 -56
  271. package/refs/vbenchmark/.opencode/command/preset-help.md +0 -50
  272. package/refs/vbenchmark/.opencode/command/refactor.md +0 -71
  273. package/refs/vbenchmark/.opencode/command/research.md +0 -37
  274. package/refs/vbenchmark/.opencode/command/review.md +0 -38
  275. package/refs/vbenchmark/.opencode/command/test.md +0 -61
  276. package/refs/vbenchmark/.trae/project_rules.md +0 -73
  277. package/refs/vbenchmark/.windsurf/rules/rules.md +0 -85
  278. package/refs/vbenchmark/AGENTS.md +0 -73
  279. package/refs/vbenchmark/CONTRIBUTING.md +0 -332
  280. package/refs/vbenchmark/Caddyfile +0 -3
  281. package/refs/vbenchmark/LICENSE +0 -47
  282. package/refs/vbenchmark/README.md +0 -354
  283. package/refs/vbenchmark/docker-compose.prod.yaml +0 -35
  284. package/refs/vbenchmark/docker-compose.yaml +0 -53
  285. package/refs/vbenchmark/docs/TASK_EXPANSION_PLAN.md +0 -211
  286. package/refs/vbenchmark/docs/THESIS.md +0 -441
  287. package/refs/vbenchmark/docs/categories/code-evolution.md +0 -138
  288. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/design.md +0 -111
  289. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/proposal.md +0 -15
  290. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/evaluation/spec.md +0 -105
  291. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/leaderboard/spec.md +0 -68
  292. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-definition/spec.md +0 -45
  293. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/specs/task-runner/spec.md +0 -49
  294. package/refs/vbenchmark/openspec/changes/init-vibecodingbench/tasks.md +0 -413
  295. package/refs/vbenchmark/package.json +0 -51
  296. package/refs/vbenchmark/packages/cli/eslint.config.js +0 -16
  297. package/refs/vbenchmark/packages/cli/package.json +0 -35
  298. package/refs/vbenchmark/packages/cli/src/agents/index.ts +0 -655
  299. package/refs/vbenchmark/packages/cli/src/commands/eval.ts +0 -197
  300. package/refs/vbenchmark/packages/cli/src/commands/list.ts +0 -63
  301. package/refs/vbenchmark/packages/cli/src/commands/run.ts +0 -147
  302. package/refs/vbenchmark/packages/cli/src/evaluator.ts +0 -125
  303. package/refs/vbenchmark/packages/cli/src/index.ts +0 -21
  304. package/refs/vbenchmark/packages/cli/src/lib/task-variation.ts +0 -153
  305. package/refs/vbenchmark/packages/cli/src/loader.ts +0 -258
  306. package/refs/vbenchmark/packages/cli/src/reporter.ts +0 -222
  307. package/refs/vbenchmark/packages/cli/src/runtime/docker.ts +0 -385
  308. package/refs/vbenchmark/packages/cli/tsconfig.json +0 -8
  309. package/refs/vbenchmark/packages/dashboard/Dockerfile +0 -42
  310. package/refs/vbenchmark/packages/dashboard/index.html +0 -21
  311. package/refs/vbenchmark/packages/dashboard/package.json +0 -29
  312. package/refs/vbenchmark/packages/dashboard/postcss.config.js +0 -6
  313. package/refs/vbenchmark/packages/dashboard/public/favicon.svg +0 -24
  314. package/refs/vbenchmark/packages/dashboard/public/logo.png +0 -0
  315. package/refs/vbenchmark/packages/dashboard/public/logo.svg +0 -39
  316. package/refs/vbenchmark/packages/dashboard/src/App.tsx +0 -1468
  317. package/refs/vbenchmark/packages/dashboard/src/data/category-performance.json +0 -1
  318. package/refs/vbenchmark/packages/dashboard/src/data/leaderboard.json +0 -1
  319. package/refs/vbenchmark/packages/dashboard/src/data/task-results.json +0 -1
  320. package/refs/vbenchmark/packages/dashboard/src/data/tasks.json +0 -1
  321. package/refs/vbenchmark/packages/dashboard/src/index.css +0 -3
  322. package/refs/vbenchmark/packages/dashboard/src/main.tsx +0 -13
  323. package/refs/vbenchmark/packages/dashboard/src/vite-env.d.ts +0 -9
  324. package/refs/vbenchmark/packages/dashboard/tailwind.config.js +0 -11
  325. package/refs/vbenchmark/packages/dashboard/tsconfig.json +0 -21
  326. package/refs/vbenchmark/packages/dashboard/tsconfig.node.json +0 -11
  327. package/refs/vbenchmark/packages/dashboard/vercel.json +0 -6
  328. package/refs/vbenchmark/packages/dashboard/vite.config.ts +0 -28
  329. package/refs/vbenchmark/packages/evaluator/eslint.config.js +0 -16
  330. package/refs/vbenchmark/packages/evaluator/package.json +0 -24
  331. package/refs/vbenchmark/packages/evaluator/src/index.ts +0 -15
  332. package/refs/vbenchmark/packages/evaluator/src/runners/functional.ts +0 -88
  333. package/refs/vbenchmark/packages/evaluator/src/runners/quality.ts +0 -140
  334. package/refs/vbenchmark/packages/evaluator/src/runners/security.ts +0 -94
  335. package/refs/vbenchmark/packages/evaluator/src/runners/visual.ts +0 -108
  336. package/refs/vbenchmark/packages/evaluator/src/types.d.ts +0 -19
  337. package/refs/vbenchmark/packages/evaluator/tsconfig.json +0 -8
  338. package/refs/vbenchmark/packages/leaderboard/Dockerfile +0 -38
  339. package/refs/vbenchmark/packages/leaderboard/drizzle.config.ts +0 -10
  340. package/refs/vbenchmark/packages/leaderboard/eslint.config.js +0 -16
  341. package/refs/vbenchmark/packages/leaderboard/fly.toml +0 -29
  342. package/refs/vbenchmark/packages/leaderboard/package.json +0 -36
  343. package/refs/vbenchmark/packages/leaderboard/src/app.ts +0 -29
  344. package/refs/vbenchmark/packages/leaderboard/src/components/BrowserPreview.tsx +0 -190
  345. package/refs/vbenchmark/packages/leaderboard/src/components/ComparisonView.tsx +0 -205
  346. package/refs/vbenchmark/packages/leaderboard/src/components/LeaderboardTable.tsx +0 -150
  347. package/refs/vbenchmark/packages/leaderboard/src/components/LiveRunCard.tsx +0 -133
  348. package/refs/vbenchmark/packages/leaderboard/src/components/SubmissionForm.tsx +0 -406
  349. package/refs/vbenchmark/packages/leaderboard/src/components/SubmitForm.tsx +0 -293
  350. package/refs/vbenchmark/packages/leaderboard/src/components/TerminalStream.tsx +0 -111
  351. package/refs/vbenchmark/packages/leaderboard/src/config/pricing.ts +0 -206
  352. package/refs/vbenchmark/packages/leaderboard/src/db/index.ts +0 -31
  353. package/refs/vbenchmark/packages/leaderboard/src/db/schema.ts +0 -125
  354. package/refs/vbenchmark/packages/leaderboard/src/index.ts +0 -13
  355. package/refs/vbenchmark/packages/leaderboard/src/lib/websocket.ts +0 -124
  356. package/refs/vbenchmark/packages/leaderboard/src/routes/leaderboard.ts +0 -698
  357. package/refs/vbenchmark/packages/leaderboard/src/routes/live.ts +0 -175
  358. package/refs/vbenchmark/packages/leaderboard/src/routes/submissions.ts +0 -183
  359. package/refs/vbenchmark/packages/leaderboard/src/routes/tasks.ts +0 -215
  360. package/refs/vbenchmark/packages/leaderboard/tests/api.test.ts +0 -228
  361. package/refs/vbenchmark/packages/leaderboard/tsconfig.json +0 -9
  362. package/refs/vbenchmark/scripts/deploy.sh +0 -70
  363. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/PROMPT.md +0 -15
  364. package/refs/vbenchmark/tasks/ai-integration/advanced/context-management/task.yaml +0 -16
  365. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/PROMPT.md +0 -15
  366. package/refs/vbenchmark/tasks/ai-integration/advanced/evaluation-framework/task.yaml +0 -16
  367. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/PROMPT.md +0 -15
  368. package/refs/vbenchmark/tasks/ai-integration/advanced/guardrails-safety/task.yaml +0 -16
  369. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/PROMPT.md +0 -15
  370. package/refs/vbenchmark/tasks/ai-integration/advanced/memory-system/task.yaml +0 -16
  371. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/PROMPT.md +0 -15
  372. package/refs/vbenchmark/tasks/ai-integration/advanced/model-routing/task.yaml +0 -16
  373. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/PROMPT.md +0 -15
  374. package/refs/vbenchmark/tasks/ai-integration/advanced/multi-agent-system/task.yaml +0 -16
  375. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/PROMPT.md +0 -15
  376. package/refs/vbenchmark/tasks/ai-integration/advanced/prompt-optimization/task.yaml +0 -16
  377. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/PROMPT.md +0 -15
  378. package/refs/vbenchmark/tasks/ai-integration/advanced/reasoning-chain/task.yaml +0 -16
  379. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/PROMPT.md +0 -15
  380. package/refs/vbenchmark/tasks/ai-integration/advanced/streaming-pipeline/task.yaml +0 -16
  381. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/PROMPT.md +0 -15
  382. package/refs/vbenchmark/tasks/ai-integration/advanced/tool-use-orchestration/task.yaml +0 -16
  383. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/PROMPT.md +0 -64
  384. package/refs/vbenchmark/tasks/ai-integration/agents/code-review-agent/task.yaml +0 -24
  385. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/PROMPT.md +0 -61
  386. package/refs/vbenchmark/tasks/ai-integration/agents/research-agent/task.yaml +0 -24
  387. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/PROMPT.md +0 -57
  388. package/refs/vbenchmark/tasks/ai-integration/agents/web-scraper-agent/task.yaml +0 -24
  389. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/PROMPT.md +0 -50
  390. package/refs/vbenchmark/tasks/ai-integration/embeddings/duplicate-detection/task.yaml +0 -24
  391. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/PROMPT.md +0 -51
  392. package/refs/vbenchmark/tasks/ai-integration/embeddings/recommendation-engine/task.yaml +0 -24
  393. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/PROMPT.md +0 -50
  394. package/refs/vbenchmark/tasks/ai-integration/embeddings/semantic-search/task.yaml +0 -24
  395. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/PROMPT.md +0 -50
  396. package/refs/vbenchmark/tasks/ai-integration/fine-tuning/classification-model/task.yaml +0 -24
  397. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/PROMPT.md +0 -60
  398. package/refs/vbenchmark/tasks/ai-integration/function-calling/api-orchestrator/task.yaml +0 -24
  399. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/PROMPT.md +0 -50
  400. package/refs/vbenchmark/tasks/ai-integration/function-calling/calendar-assistant/task.yaml +0 -24
  401. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/PROMPT.md +0 -62
  402. package/refs/vbenchmark/tasks/ai-integration/function-calling/database-query/task.yaml +0 -24
  403. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/PROMPT.md +0 -60
  404. package/refs/vbenchmark/tasks/ai-integration/multimodal/chart-interpreter/task.yaml +0 -24
  405. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/PROMPT.md +0 -49
  406. package/refs/vbenchmark/tasks/ai-integration/multimodal/image-captioning/task.yaml +0 -24
  407. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/PROMPT.md +0 -51
  408. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/code-assistant/task.yaml +0 -24
  409. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/PROMPT.md +0 -51
  410. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/doc-search/task.yaml +0 -24
  411. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/PROMPT.md +0 -76
  412. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/docker-compose.yaml +0 -30
  413. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/task.yaml +0 -30
  414. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/pdf-qa/tests/functional/qa.test.py +0 -146
  415. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/PROMPT.md +0 -51
  416. package/refs/vbenchmark/tasks/ai-integration/rag-chatbot/support-bot/task.yaml +0 -24
  417. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/PROMPT.md +0 -67
  418. package/refs/vbenchmark/tasks/ai-integration/structured-output/contract-analyzer/task.yaml +0 -24
  419. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/PROMPT.md +0 -61
  420. package/refs/vbenchmark/tasks/ai-integration/structured-output/invoice-parser/task.yaml +0 -27
  421. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/PROMPT.md +0 -65
  422. package/refs/vbenchmark/tasks/ai-integration/structured-output/receipt-scanner/task.yaml +0 -24
  423. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/PROMPT.md +0 -70
  424. package/refs/vbenchmark/tasks/ai-integration/structured-output/resume-parser/task.yaml +0 -24
  425. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/PROMPT.md +0 -15
  426. package/refs/vbenchmark/tasks/api-integrations/advanced/api-analytics/task.yaml +0 -16
  427. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/PROMPT.md +0 -15
  428. package/refs/vbenchmark/tasks/api-integrations/advanced/api-gateway/task.yaml +0 -16
  429. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/PROMPT.md +0 -15
  430. package/refs/vbenchmark/tasks/api-integrations/advanced/api-mocking/task.yaml +0 -16
  431. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/PROMPT.md +0 -15
  432. package/refs/vbenchmark/tasks/api-integrations/advanced/contract-testing/task.yaml +0 -16
  433. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/PROMPT.md +0 -15
  434. package/refs/vbenchmark/tasks/api-integrations/advanced/graphql-federation/task.yaml +0 -16
  435. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/PROMPT.md +0 -15
  436. package/refs/vbenchmark/tasks/api-integrations/advanced/grpc-gateway/task.yaml +0 -16
  437. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/PROMPT.md +0 -15
  438. package/refs/vbenchmark/tasks/api-integrations/advanced/rate-limiter/task.yaml +0 -16
  439. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/PROMPT.md +0 -15
  440. package/refs/vbenchmark/tasks/api-integrations/advanced/request-validator/task.yaml +0 -16
  441. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/PROMPT.md +0 -15
  442. package/refs/vbenchmark/tasks/api-integrations/advanced/sdk-generator/task.yaml +0 -16
  443. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/PROMPT.md +0 -15
  444. package/refs/vbenchmark/tasks/api-integrations/advanced/webhook-processor/task.yaml +0 -16
  445. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/PROMPT.md +0 -42
  446. package/refs/vbenchmark/tasks/api-integrations/analytics/mixpanel-events/task.yaml +0 -24
  447. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/PROMPT.md +0 -42
  448. package/refs/vbenchmark/tasks/api-integrations/analytics/segment-tracking/task.yaml +0 -24
  449. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/PROMPT.md +0 -42
  450. package/refs/vbenchmark/tasks/api-integrations/auth-provider/oauth2-github/task.yaml +0 -24
  451. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/PROMPT.md +0 -44
  452. package/refs/vbenchmark/tasks/api-integrations/auth-provider/okta-integration/task.yaml +0 -24
  453. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/PROMPT.md +0 -42
  454. package/refs/vbenchmark/tasks/api-integrations/auth-provider/saml-sso/task.yaml +0 -24
  455. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/PROMPT.md +0 -44
  456. package/refs/vbenchmark/tasks/api-integrations/communication/discord-webhook/task.yaml +0 -24
  457. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/PROMPT.md +0 -42
  458. package/refs/vbenchmark/tasks/api-integrations/communication/slack-bot/task.yaml +0 -24
  459. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/PROMPT.md +0 -42
  460. package/refs/vbenchmark/tasks/api-integrations/communication/twilio-sms/task.yaml +0 -24
  461. package/refs/vbenchmark/tasks/api-integrations/email/transactional/PROMPT.md +0 -82
  462. package/refs/vbenchmark/tasks/api-integrations/email/transactional/task.yaml +0 -27
  463. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/PROMPT.md +0 -41
  464. package/refs/vbenchmark/tasks/api-integrations/maps/google-maps-geocoding/task.yaml +0 -24
  465. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/PROMPT.md +0 -41
  466. package/refs/vbenchmark/tasks/api-integrations/maps/mapbox-directions/task.yaml +0 -24
  467. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/PROMPT.md +0 -43
  468. package/refs/vbenchmark/tasks/api-integrations/payment/crypto-payments/task.yaml +0 -24
  469. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/PROMPT.md +0 -41
  470. package/refs/vbenchmark/tasks/api-integrations/payment/paypal-integration/task.yaml +0 -24
  471. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/PROMPT.md +0 -41
  472. package/refs/vbenchmark/tasks/api-integrations/social/twitter-api/task.yaml +0 -24
  473. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/PROMPT.md +0 -43
  474. package/refs/vbenchmark/tasks/api-integrations/storage/cloudinary-upload/task.yaml +0 -24
  475. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/PROMPT.md +0 -43
  476. package/refs/vbenchmark/tasks/api-integrations/storage/gcs-streaming/task.yaml +0 -24
  477. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/PROMPT.md +0 -41
  478. package/refs/vbenchmark/tasks/api-integrations/storage/s3-presigned-urls/task.yaml +0 -24
  479. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/PROMPT.md +0 -41
  480. package/refs/vbenchmark/tasks/api-integrations/stripe/checkout-session/task.yaml +0 -24
  481. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/PROMPT.md +0 -60
  482. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/docker-compose.yaml +0 -38
  483. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/task.yaml +0 -31
  484. package/refs/vbenchmark/tasks/api-integrations/stripe/payment-webhook/tests/webhook.test.ts +0 -193
  485. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/PROMPT.md +0 -41
  486. package/refs/vbenchmark/tasks/api-integrations/stripe/subscription-portal/task.yaml +0 -24
  487. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/PROMPT.md +0 -15
  488. package/refs/vbenchmark/tasks/code-evolution/advanced/api-deprecation/task.yaml +0 -16
  489. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/PROMPT.md +0 -15
  490. package/refs/vbenchmark/tasks/code-evolution/advanced/ast-refactoring/task.yaml +0 -16
  491. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/PROMPT.md +0 -15
  492. package/refs/vbenchmark/tasks/code-evolution/advanced/concurrency-fix/task.yaml +0 -16
  493. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/PROMPT.md +0 -15
  494. package/refs/vbenchmark/tasks/code-evolution/advanced/database-schema-migration/task.yaml +0 -16
  495. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/PROMPT.md +0 -15
  496. package/refs/vbenchmark/tasks/code-evolution/advanced/dead-code-elimination/task.yaml +0 -16
  497. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/PROMPT.md +0 -15
  498. package/refs/vbenchmark/tasks/code-evolution/advanced/dependency-upgrade/task.yaml +0 -16
  499. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/PROMPT.md +0 -15
  500. package/refs/vbenchmark/tasks/code-evolution/advanced/memory-optimization/task.yaml +0 -16
  501. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/PROMPT.md +0 -15
  502. package/refs/vbenchmark/tasks/code-evolution/advanced/monorepo-extraction/task.yaml +0 -16
  503. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/PROMPT.md +0 -15
  504. package/refs/vbenchmark/tasks/code-evolution/advanced/performance-profiling/task.yaml +0 -16
  505. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/PROMPT.md +0 -15
  506. package/refs/vbenchmark/tasks/code-evolution/advanced/type-migration/task.yaml +0 -16
  507. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/PROMPT.md +0 -47
  508. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/callback-to-async/task.yaml +0 -24
  509. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/PROMPT.md +0 -49
  510. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/base-code/src/app.ts +0 -22
  511. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/task.yaml +0 -37
  512. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/express-to-fastify/tests/api.test.ts +0 -70
  513. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/PROMPT.md +0 -46
  514. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/flask-to-fastapi/task.yaml +0 -24
  515. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/PROMPT.md +0 -45
  516. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/java-to-kotlin/task.yaml +0 -24
  517. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/PROMPT.md +0 -47
  518. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/jquery-to-react/task.yaml +0 -24
  519. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/PROMPT.md +0 -47
  520. package/refs/vbenchmark/tasks/code-evolution/legacy-migration/rest-to-grpc/task.yaml +0 -24
  521. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/PROMPT.md +0 -47
  522. package/refs/vbenchmark/tasks/code-evolution/performance/async-refactor/task.yaml +0 -24
  523. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/PROMPT.md +0 -47
  524. package/refs/vbenchmark/tasks/code-evolution/performance/memory-leak-fix/task.yaml +0 -24
  525. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/PROMPT.md +0 -49
  526. package/refs/vbenchmark/tasks/code-evolution/performance/query-optimization/task.yaml +0 -24
  527. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/PROMPT.md +0 -96
  528. package/refs/vbenchmark/tasks/code-evolution/refactoring/class-to-hooks/task.yaml +0 -27
  529. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/PROMPT.md +0 -47
  530. package/refs/vbenchmark/tasks/code-evolution/refactoring/dependency-injection/task.yaml +0 -24
  531. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/PROMPT.md +0 -48
  532. package/refs/vbenchmark/tasks/code-evolution/refactoring/error-handling/task.yaml +0 -24
  533. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/PROMPT.md +0 -50
  534. package/refs/vbenchmark/tasks/code-evolution/refactoring/monolith-to-modules/task.yaml +0 -24
  535. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/PROMPT.md +0 -47
  536. package/refs/vbenchmark/tasks/code-evolution/refactoring/orm-migration/task.yaml +0 -24
  537. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/PROMPT.md +0 -49
  538. package/refs/vbenchmark/tasks/code-evolution/security/secrets-rotation/task.yaml +0 -24
  539. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/PROMPT.md +0 -50
  540. package/refs/vbenchmark/tasks/code-evolution/security/sql-injection-fix/task.yaml +0 -24
  541. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/PROMPT.md +0 -47
  542. package/refs/vbenchmark/tasks/code-evolution/security/xss-prevention/task.yaml +0 -24
  543. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/PROMPT.md +0 -48
  544. package/refs/vbenchmark/tasks/code-evolution/testing/add-unit-tests/task.yaml +0 -24
  545. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/PROMPT.md +0 -50
  546. package/refs/vbenchmark/tasks/code-evolution/testing/e2e-playwright/task.yaml +0 -24
  547. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/PROMPT.md +0 -47
  548. package/refs/vbenchmark/tasks/code-evolution/testing/pytest-fixtures/task.yaml +0 -24
  549. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/PROMPT.md +0 -44
  550. package/refs/vbenchmark/tasks/frontend/accessibility/keyboard-shortcuts/task.yaml +0 -24
  551. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/PROMPT.md +0 -44
  552. package/refs/vbenchmark/tasks/frontend/accessibility/screen-reader-nav/task.yaml +0 -24
  553. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/PROMPT.md +0 -15
  554. package/refs/vbenchmark/tasks/frontend/advanced/canvas-editor/task.yaml +0 -16
  555. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/PROMPT.md +0 -15
  556. package/refs/vbenchmark/tasks/frontend/advanced/micro-frontend/task.yaml +0 -16
  557. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/PROMPT.md +0 -15
  558. package/refs/vbenchmark/tasks/frontend/advanced/offline-first/task.yaml +0 -16
  559. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/PROMPT.md +0 -15
  560. package/refs/vbenchmark/tasks/frontend/advanced/realtime-collab/task.yaml +0 -16
  561. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/PROMPT.md +0 -15
  562. package/refs/vbenchmark/tasks/frontend/advanced/service-worker/task.yaml +0 -16
  563. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/PROMPT.md +0 -15
  564. package/refs/vbenchmark/tasks/frontend/advanced/state-machine/task.yaml +0 -16
  565. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/PROMPT.md +0 -15
  566. package/refs/vbenchmark/tasks/frontend/advanced/virtual-list/task.yaml +0 -16
  567. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/PROMPT.md +0 -15
  568. package/refs/vbenchmark/tasks/frontend/advanced/wasm-integration/task.yaml +0 -16
  569. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/PROMPT.md +0 -15
  570. package/refs/vbenchmark/tasks/frontend/advanced/web-worker/task.yaml +0 -16
  571. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/PROMPT.md +0 -15
  572. package/refs/vbenchmark/tasks/frontend/advanced/webgl-visualization/task.yaml +0 -16
  573. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/PROMPT.md +0 -44
  574. package/refs/vbenchmark/tasks/frontend/animation/page-transitions/task.yaml +0 -24
  575. package/refs/vbenchmark/tasks/frontend/components/data-grid/PROMPT.md +0 -59
  576. package/refs/vbenchmark/tasks/frontend/components/data-grid/task.yaml +0 -24
  577. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/PROMPT.md +0 -57
  578. package/refs/vbenchmark/tasks/frontend/components/date-range-picker/task.yaml +0 -24
  579. package/refs/vbenchmark/tasks/frontend/components/file-uploader/PROMPT.md +0 -55
  580. package/refs/vbenchmark/tasks/frontend/components/file-uploader/task.yaml +0 -24
  581. package/refs/vbenchmark/tasks/frontend/components/form-builder/PROMPT.md +0 -96
  582. package/refs/vbenchmark/tasks/frontend/components/form-builder/task.yaml +0 -28
  583. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/PROMPT.md +0 -45
  584. package/refs/vbenchmark/tasks/frontend/components/rich-text-editor/task.yaml +0 -24
  585. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/PROMPT.md +0 -50
  586. package/refs/vbenchmark/tasks/frontend/figma-to-code/dashboard-layout/task.yaml +0 -25
  587. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/PROMPT.md +0 -49
  588. package/refs/vbenchmark/tasks/frontend/figma-to-code/landing-page/task.yaml +0 -25
  589. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/PROMPT.md +0 -51
  590. package/refs/vbenchmark/tasks/frontend/figma-to-code/mobile-app-screen/task.yaml +0 -24
  591. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/PROMPT.md +0 -93
  592. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/docker-compose.yaml +0 -23
  593. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/task.yaml +0 -30
  594. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/diff.test.ts +0 -107
  595. package/refs/vbenchmark/tasks/frontend/figma-to-code/pricing-card/tests/visual/interaction.test.ts +0 -88
  596. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/PROMPT.md +0 -43
  597. package/refs/vbenchmark/tasks/frontend/performance/image-lazy-load/task.yaml +0 -24
  598. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/PROMPT.md +0 -44
  599. package/refs/vbenchmark/tasks/frontend/performance/infinite-scroll/task.yaml +0 -24
  600. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/PROMPT.md +0 -44
  601. package/refs/vbenchmark/tasks/frontend/state-management/collaborative-editor/task.yaml +0 -24
  602. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/PROMPT.md +0 -53
  603. package/refs/vbenchmark/tasks/frontend/state-management/shopping-cart/task.yaml +0 -24
  604. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/PROMPT.md +0 -83
  605. package/refs/vbenchmark/tasks/frontend/visualization/chart-dashboard/task.yaml +0 -28
  606. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/PROMPT.md +0 -57
  607. package/refs/vbenchmark/tasks/frontend/visualization/gantt-chart/task.yaml +0 -24
  608. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/PROMPT.md +0 -44
  609. package/refs/vbenchmark/tasks/frontend/visualization/map-dashboard/task.yaml +0 -24
  610. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/PROMPT.md +0 -43
  611. package/refs/vbenchmark/tasks/frontend/visualization/realtime-charts/task.yaml +0 -24
  612. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/PROMPT.md +0 -15
  613. package/refs/vbenchmark/tasks/glue-code/advanced/blue-green-deploy/task.yaml +0 -16
  614. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/PROMPT.md +0 -15
  615. package/refs/vbenchmark/tasks/glue-code/advanced/canary-release/task.yaml +0 -16
  616. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/PROMPT.md +0 -15
  617. package/refs/vbenchmark/tasks/glue-code/advanced/change-data-capture/task.yaml +0 -16
  618. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/PROMPT.md +0 -15
  619. package/refs/vbenchmark/tasks/glue-code/advanced/config-management/task.yaml +0 -16
  620. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/PROMPT.md +0 -15
  621. package/refs/vbenchmark/tasks/glue-code/advanced/data-pipeline/task.yaml +0 -16
  622. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/PROMPT.md +0 -15
  623. package/refs/vbenchmark/tasks/glue-code/advanced/distributed-tracing/task.yaml +0 -16
  624. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/PROMPT.md +0 -15
  625. package/refs/vbenchmark/tasks/glue-code/advanced/log-aggregation/task.yaml +0 -16
  626. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/PROMPT.md +0 -15
  627. package/refs/vbenchmark/tasks/glue-code/advanced/schema-registry/task.yaml +0 -16
  628. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/PROMPT.md +0 -15
  629. package/refs/vbenchmark/tasks/glue-code/advanced/secret-rotation/task.yaml +0 -16
  630. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/PROMPT.md +0 -15
  631. package/refs/vbenchmark/tasks/glue-code/advanced/stream-processing/task.yaml +0 -16
  632. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/PROMPT.md +0 -66
  633. package/refs/vbenchmark/tasks/glue-code/api-sync/rest-to-graphql/task.yaml +0 -27
  634. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/PROMPT.md +0 -82
  635. package/refs/vbenchmark/tasks/glue-code/caching/redis-cache/task.yaml +0 -27
  636. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/PROMPT.md +0 -51
  637. package/refs/vbenchmark/tasks/glue-code/data-transform/avro-schema-evolution/task.yaml +0 -24
  638. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/PROMPT.md +0 -49
  639. package/refs/vbenchmark/tasks/glue-code/data-transform/csv-normalizer/task.yaml +0 -24
  640. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/PROMPT.md +0 -67
  641. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/task.yaml +0 -28
  642. package/refs/vbenchmark/tasks/glue-code/data-transform/excel-to-json/tests/transform.test.py +0 -137
  643. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/PROMPT.md +0 -45
  644. package/refs/vbenchmark/tasks/glue-code/data-transform/json-to-xml/task.yaml +0 -24
  645. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/PROMPT.md +0 -44
  646. package/refs/vbenchmark/tasks/glue-code/data-transform/protobuf-converter/task.yaml +0 -24
  647. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/PROMPT.md +0 -52
  648. package/refs/vbenchmark/tasks/glue-code/etl/cdc-pipeline/task.yaml +0 -27
  649. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/PROMPT.md +0 -51
  650. package/refs/vbenchmark/tasks/glue-code/etl/database-sync/task.yaml +0 -24
  651. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/PROMPT.md +0 -50
  652. package/refs/vbenchmark/tasks/glue-code/etl/s3-to-warehouse/task.yaml +0 -24
  653. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/PROMPT.md +0 -52
  654. package/refs/vbenchmark/tasks/glue-code/file-processing/image-resizer/task.yaml +0 -24
  655. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/PROMPT.md +0 -50
  656. package/refs/vbenchmark/tasks/glue-code/file-processing/pdf-merger/task.yaml +0 -24
  657. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/PROMPT.md +0 -50
  658. package/refs/vbenchmark/tasks/glue-code/file-processing/video-transcoder/task.yaml +0 -27
  659. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/PROMPT.md +0 -50
  660. package/refs/vbenchmark/tasks/glue-code/migration/data-backfill/task.yaml +0 -24
  661. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/PROMPT.md +0 -50
  662. package/refs/vbenchmark/tasks/glue-code/migration/database-versioning/task.yaml +0 -24
  663. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/PROMPT.md +0 -49
  664. package/refs/vbenchmark/tasks/glue-code/queue/kafka-producer/task.yaml +0 -27
  665. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/PROMPT.md +0 -50
  666. package/refs/vbenchmark/tasks/glue-code/queue/rabbitmq-consumer/task.yaml +0 -27
  667. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/PROMPT.md +0 -47
  668. package/refs/vbenchmark/tasks/glue-code/queue/sqs-batch-processor/task.yaml +0 -24
  669. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/PROMPT.md +0 -52
  670. package/refs/vbenchmark/tasks/glue-code/scheduler/cron-job-manager/task.yaml +0 -27
  671. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/PROMPT.md +0 -51
  672. package/refs/vbenchmark/tasks/glue-code/scheduler/delayed-tasks/task.yaml +0 -27
  673. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/PROMPT.md +0 -15
  674. package/refs/vbenchmark/tasks/saas-core/advanced/api-versioning/task.yaml +0 -16
  675. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/PROMPT.md +0 -13
  676. package/refs/vbenchmark/tasks/saas-core/advanced/circuit-breaker/task.yaml +0 -16
  677. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/PROMPT.md +0 -15
  678. package/refs/vbenchmark/tasks/saas-core/advanced/compliance-gdpr/task.yaml +0 -16
  679. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/PROMPT.md +0 -13
  680. package/refs/vbenchmark/tasks/saas-core/advanced/cqrs-pattern/task.yaml +0 -16
  681. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/PROMPT.md +0 -15
  682. package/refs/vbenchmark/tasks/saas-core/advanced/data-encryption/task.yaml +0 -16
  683. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/PROMPT.md +0 -46
  684. package/refs/vbenchmark/tasks/saas-core/advanced/distributed-locking/task.yaml +0 -24
  685. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/PROMPT.md +0 -23
  686. package/refs/vbenchmark/tasks/saas-core/advanced/event-sourcing/task.yaml +0 -16
  687. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/PROMPT.md +0 -15
  688. package/refs/vbenchmark/tasks/saas-core/advanced/feature-flags-ab/task.yaml +0 -16
  689. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/PROMPT.md +0 -13
  690. package/refs/vbenchmark/tasks/saas-core/advanced/saga-orchestration/task.yaml +0 -16
  691. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/PROMPT.md +0 -15
  692. package/refs/vbenchmark/tasks/saas-core/advanced/webhook-delivery/task.yaml +0 -16
  693. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/PROMPT.md +0 -50
  694. package/refs/vbenchmark/tasks/saas-core/audit/activity-logging/task.yaml +0 -27
  695. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/PROMPT.md +0 -50
  696. package/refs/vbenchmark/tasks/saas-core/auth/jwt-refresh-tokens/task.yaml +0 -27
  697. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/PROMPT.md +0 -53
  698. package/refs/vbenchmark/tasks/saas-core/auth/magic-link-email/task.yaml +0 -27
  699. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/PROMPT.md +0 -79
  700. package/refs/vbenchmark/tasks/saas-core/auth/mfa-totp/task.yaml +0 -27
  701. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/PROMPT.md +0 -51
  702. package/refs/vbenchmark/tasks/saas-core/auth/rbac-permissions/task.yaml +0 -27
  703. package/refs/vbenchmark/tasks/saas-core/auth/session-management/PROMPT.md +0 -52
  704. package/refs/vbenchmark/tasks/saas-core/auth/session-management/task.yaml +0 -27
  705. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/PROMPT.md +0 -45
  706. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/docker-compose.yaml +0 -47
  707. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/task.yaml +0 -32
  708. package/refs/vbenchmark/tasks/saas-core/auth/supabase-oauth/tests/auth.test.ts +0 -59
  709. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/PROMPT.md +0 -53
  710. package/refs/vbenchmark/tasks/saas-core/billing/invoice-generation/task.yaml +0 -27
  711. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/PROMPT.md +0 -51
  712. package/refs/vbenchmark/tasks/saas-core/billing/stripe-subscriptions/task.yaml +0 -27
  713. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/PROMPT.md +0 -52
  714. package/refs/vbenchmark/tasks/saas-core/billing/usage-metering/task.yaml +0 -27
  715. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/PROMPT.md +0 -48
  716. package/refs/vbenchmark/tasks/saas-core/crud/dashboard-table/task.yaml +0 -28
  717. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/PROMPT.md +0 -50
  718. package/refs/vbenchmark/tasks/saas-core/multi-tenant/org-isolation/task.yaml +0 -27
  719. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/PROMPT.md +0 -50
  720. package/refs/vbenchmark/tasks/saas-core/multi-tenant/subdomain-routing/task.yaml +0 -27
  721. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/PROMPT.md +0 -53
  722. package/refs/vbenchmark/tasks/saas-core/notifications/email-queue/task.yaml +0 -27
  723. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/PROMPT.md +0 -51
  724. package/refs/vbenchmark/tasks/saas-core/notifications/in-app-alerts/task.yaml +0 -27
  725. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/PROMPT.md +0 -51
  726. package/refs/vbenchmark/tasks/saas-core/notifications/push-notifications/task.yaml +0 -27
  727. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/PROMPT.md +0 -80
  728. package/refs/vbenchmark/tasks/saas-core/realtime/websocket-chat/task.yaml +0 -27
  729. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/PROMPT.md +0 -51
  730. package/refs/vbenchmark/tasks/saas-core/search/full-text-search/task.yaml +0 -27
  731. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/PROMPT.md +0 -99
  732. package/refs/vbenchmark/tasks/saas-core/security/rate-limiter/task.yaml +0 -27
  733. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/PROMPT.md +0 -78
  734. package/refs/vbenchmark/tasks/saas-core/settings/user-preferences/task.yaml +0 -27
  735. package/refs/vbenchmark/templates/fastapi-postgres/docker-compose.yaml +0 -36
  736. package/refs/vbenchmark/templates/fastapi-postgres/pyproject.toml +0 -34
  737. package/refs/vbenchmark/templates/fastapi-postgres/src/__init__.py +0 -0
  738. package/refs/vbenchmark/templates/fastapi-postgres/src/config.py +0 -12
  739. package/refs/vbenchmark/templates/fastapi-postgres/src/database.py +0 -15
  740. package/refs/vbenchmark/templates/fastapi-postgres/src/main.py +0 -51
  741. package/refs/vbenchmark/templates/fastapi-postgres/src/models.py +0 -12
  742. package/refs/vbenchmark/templates/fastapi-postgres/src/schemas.py +0 -20
  743. package/refs/vbenchmark/templates/go-fiber/docker-compose.yaml +0 -34
  744. package/refs/vbenchmark/templates/go-fiber/go.mod +0 -33
  745. package/refs/vbenchmark/templates/go-fiber/go.sum +0 -68
  746. package/refs/vbenchmark/templates/go-fiber/main.go +0 -98
  747. package/refs/vbenchmark/templates/nextjs-supabase/.env.example +0 -3
  748. package/refs/vbenchmark/templates/nextjs-supabase/docker-compose.yaml +0 -68
  749. package/refs/vbenchmark/templates/nextjs-supabase/src/app/globals.css +0 -13
  750. package/refs/vbenchmark/templates/nextjs-supabase/src/app/layout.tsx +0 -19
  751. package/refs/vbenchmark/templates/nextjs-supabase/src/app/page.tsx +0 -38
  752. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/client.ts +0 -8
  753. package/refs/vbenchmark/templates/nextjs-supabase/src/lib/supabase/server.ts +0 -32
  754. package/refs/vbenchmark/templates/rust-axum/Cargo.lock +0 -2371
  755. package/refs/vbenchmark/templates/rust-axum/Cargo.toml +0 -16
  756. package/refs/vbenchmark/templates/rust-axum/docker-compose.yaml +0 -34
  757. package/refs/vbenchmark/templates/rust-axum/migrations/20240101000000_init.sql +0 -20
  758. package/refs/vbenchmark/templates/rust-axum/src/main.rs +0 -121
  759. package/refs/vbenchmark/tsconfig.base.json +0 -18
  760. package/refs/vbenchmark/turbo.json +0 -23
  761. package/refs/vbenchmark/vercel.json +0 -10
@@ -1 +0,0 @@
1
- {"tasks":[{"taskId":"saas-core/auth/task-1","category":"saas-core","subcategory":"auth","results":[{"modelName":"GLM 4-Plus","score":88.8,"functional":84.4,"quality":79.6,"passed":true,"tokens":5151,"timeMs":107297,"cost":0.0061},{"modelName":"MiniMax M2.1","score":91.5,"functional":86.9,"quality":80.6,"passed":true,"tokens":17546,"timeMs":189875,"cost":0.0144},{"modelName":"GLM-4.7","score":80.9,"functional":76.9,"quality":78.1,"passed":true,"tokens":3237,"timeMs":62125,"cost":0.0042},{"modelName":"Gemini 3 Flash","score":82.8,"functional":78.6,"quality":74.3,"passed":true,"tokens":2150,"timeMs":33164,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4567,"timeMs":39531,"cost":0.064},{"modelName":"Claude Sonnet 4.5","score":85.8,"functional":81.5,"quality":78.6,"passed":true,"tokens":3887,"timeMs":32147,"cost":0.0424},{"modelName":"Claude Opus 4.5","score":95.4,"functional":90.6,"quality":81.3,"passed":true,"tokens":3158,"timeMs":31710,"cost":0.059},{"modelName":"Claude Haiku 4.5","score":95.5,"functional":90.7,"quality":80.9,"passed":true,"tokens":4693,"timeMs":25658,"cost":0.0173},{"modelName":"DeepSeek v3.2","score":93.5,"functional":88.8,"quality":81,"passed":true,"tokens":3581,"timeMs":80516,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":87,"functional":82.6,"quality":78.4,"passed":true,"tokens":2532,"timeMs":25236,"cost":0.028},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.5,"functional":86.9,"quality":80.2,"passed":true,"tokens":2622,"timeMs":78633,"cost":0.0013},{"modelName":"Grok 4","score":94.9,"functional":90.2,"quality":81.5,"passed":true,"tokens":2437,"timeMs":64475,"cost":0.0257},{"modelName":"Grok 4.1 Fast","score":86.1,"functional":81.8,"quality":77.9,"passed":true,"tokens":3298,"timeMs":94315,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.3,"functional":85,"quality":80,"passed":true,"tokens":5474,"timeMs":101653,"cost":0.0307}]},{"taskId":"saas-core/auth/task-2","category":"saas-core","subcategory":"auth","results":[{"modelName":"GLM 4-Plus","score":91.3,"functional":86.7,"quality":80.3,"passed":true,"tokens":3665,"timeMs":92725,"cost":0.0055},{"modelName":"MiniMax M2.1","score":89,"functional":84.6,"quality":79.9,"passed":true,"tokens":16777,"timeMs":195125,"cost":0.0116},{"modelName":"GLM-4.7","score":81.5,"functional":77.4,"quality":78.2,"passed":true,"tokens":4045,"timeMs":60739,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":85.1,"functional":80.9,"quality":75,"passed":true,"tokens":2175,"timeMs":22960,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4567,"timeMs":39531,"cost":0.064},{"modelName":"Claude Sonnet 4.5","score":87,"functional":82.7,"quality":78.9,"passed":true,"tokens":3051,"timeMs":49659,"cost":0.0454},{"modelName":"Claude Opus 4.5","score":96.1,"functional":91.3,"quality":81.5,"passed":true,"tokens":3091,"timeMs":54885,"cost":0.0551},{"modelName":"Claude Haiku 4.5","score":94,"functional":89.3,"quality":80.5,"passed":true,"tokens":4695,"timeMs":20649,"cost":0.0176},{"modelName":"DeepSeek v3.2","score":94.9,"functional":90.1,"quality":81.4,"passed":true,"tokens":2531,"timeMs":107104,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":85.9,"functional":81.6,"quality":78.1,"passed":true,"tokens":2168,"timeMs":26089,"cost":0.027},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89,"functional":84.6,"quality":79.5,"passed":true,"tokens":3387,"timeMs":49867,"cost":0.0012},{"modelName":"Grok 4","score":94,"functional":89.3,"quality":81.2,"passed":true,"tokens":2937,"timeMs":95394,"cost":0.0259},{"modelName":"Grok 4.1 Fast","score":84.4,"functional":80.2,"quality":77.4,"passed":true,"tokens":3643,"timeMs":114391,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.3,"functional":85,"quality":80,"passed":true,"tokens":5474,"timeMs":101653,"cost":0.0307}]},{"taskId":"saas-core/auth/task-3","category":"saas-core","subcategory":"auth","results":[{"modelName":"GLM 4-Plus","score":93.5,"functional":88.8,"quality":81,"passed":true,"tokens":4284,"timeMs":96131,"cost":0.0044},{"modelName":"MiniMax M2.1","score":86.7,"functional":82.4,"quality":79.2,"passed":true,"tokens":17115,"timeMs":129047,"cost":0.0147},{"modelName":"GLM-4.7","score":83.2,"functional":79.1,"quality":78.8,"passed":true,"tokens":3027,"timeMs":54671,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":87.5,"functional":83.2,"quality":75.7,"passed":true,"tokens":2346,"timeMs":27377,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4567,"timeMs":39531,"cost":0.064},{"modelName":"Claude Sonnet 4.5","score":89.2,"functional":84.7,"quality":79.6,"passed":true,"tokens":3247,"timeMs":53738,"cost":0.0355},{"modelName":"Claude Opus 4.5","score":95.7,"functional":90.9,"quality":81.4,"passed":true,"tokens":2968,"timeMs":54051,"cost":0.0734},{"modelName":"Claude Haiku 4.5","score":91.7,"functional":87.1,"quality":79.8,"passed":true,"tokens":3988,"timeMs":17398,"cost":0.0195},{"modelName":"DeepSeek v3.2","score":95.1,"functional":90.4,"quality":81.5,"passed":true,"tokens":2612,"timeMs":112849,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":86,"functional":81.7,"quality":78.1,"passed":true,"tokens":2338,"timeMs":20566,"cost":0.0303},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87,"functional":82.7,"quality":78.9,"passed":true,"tokens":3045,"timeMs":49015,"cost":0.0014},{"modelName":"Grok 4","score":92.1,"functional":87.5,"quality":80.6,"passed":true,"tokens":2751,"timeMs":92157,"cost":0.0324},{"modelName":"Grok 4.1 Fast","score":83.8,"functional":79.6,"quality":77.2,"passed":true,"tokens":3483,"timeMs":104802,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.3,"functional":85,"quality":80,"passed":true,"tokens":5474,"timeMs":101653,"cost":0.0307}]},{"taskId":"saas-core/auth/task-4","category":"saas-core","subcategory":"auth","results":[{"modelName":"GLM 4-Plus","score":94.9,"functional":90.1,"quality":81.4,"passed":true,"tokens":4530,"timeMs":116378,"cost":0.0061},{"modelName":"MiniMax M2.1","score":85,"functional":80.8,"quality":78.7,"passed":true,"tokens":17881,"timeMs":213996,"cost":0.0146},{"modelName":"GLM-4.7","score":85.6,"functional":81.3,"quality":79.5,"passed":true,"tokens":3998,"timeMs":46659,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":89.5,"functional":85,"quality":76.3,"passed":true,"tokens":2058,"timeMs":26138,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4567,"timeMs":39531,"cost":0.064},{"modelName":"Claude Sonnet 4.5","score":91.6,"functional":87.1,"quality":80.3,"passed":true,"tokens":3933,"timeMs":33067,"cost":0.0353},{"modelName":"Claude Opus 4.5","score":94.1,"functional":89.4,"quality":80.9,"passed":true,"tokens":3752,"timeMs":41125,"cost":0.0634},{"modelName":"Claude Haiku 4.5","score":89.2,"functional":84.8,"quality":79,"passed":true,"tokens":4713,"timeMs":15510,"cost":0.0175},{"modelName":"DeepSeek v3.2","score":94.2,"functional":89.5,"quality":81.2,"passed":true,"tokens":3368,"timeMs":116343,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":87.2,"functional":82.9,"quality":78.5,"passed":true,"tokens":2182,"timeMs":25159,"cost":0.0276},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.9,"functional":81.6,"quality":78.5,"passed":true,"tokens":3197,"timeMs":85469,"cost":0.0011},{"modelName":"Grok 4","score":89.6,"functional":85.1,"quality":79.9,"passed":true,"tokens":2282,"timeMs":76602,"cost":0.0291},{"modelName":"Grok 4.1 Fast","score":84.4,"functional":80.2,"quality":77.4,"passed":true,"tokens":2653,"timeMs":92696,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.3,"functional":85,"quality":80,"passed":true,"tokens":5474,"timeMs":101653,"cost":0.0307}]},{"taskId":"saas-core/auth/task-5","category":"saas-core","subcategory":"auth","results":[{"modelName":"GLM 4-Plus","score":95.1,"functional":90.4,"quality":81.5,"passed":true,"tokens":4713,"timeMs":72726,"cost":0.0047},{"modelName":"MiniMax M2.1","score":84.4,"functional":80.2,"quality":78.5,"passed":true,"tokens":15063,"timeMs":157264,"cost":0.0139},{"modelName":"GLM-4.7","score":88,"functional":83.6,"quality":80.2,"passed":true,"tokens":3654,"timeMs":62594,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":90.4,"functional":85.9,"quality":76.6,"passed":true,"tokens":2244,"timeMs":27659,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4567,"timeMs":39531,"cost":0.064},{"modelName":"Claude Sonnet 4.5","score":93.8,"functional":89.2,"quality":81,"passed":true,"tokens":3470,"timeMs":42443,"cost":0.0331},{"modelName":"Claude Opus 4.5","score":91.9,"functional":87.3,"quality":80.2,"passed":true,"tokens":3980,"timeMs":42654,"cost":0.0638},{"modelName":"Claude Haiku 4.5","score":87.2,"functional":82.8,"quality":78.4,"passed":true,"tokens":5158,"timeMs":19944,"cost":0.0182},{"modelName":"DeepSeek v3.2","score":92.3,"functional":87.6,"quality":80.6,"passed":true,"tokens":3307,"timeMs":106121,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":89.4,"functional":84.9,"quality":79.1,"passed":true,"tokens":2833,"timeMs":20156,"cost":0.0324},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86,"functional":81.7,"quality":78.6,"passed":true,"tokens":2559,"timeMs":88413,"cost":0.0013},{"modelName":"Grok 4","score":87.3,"functional":82.9,"quality":79.2,"passed":true,"tokens":2949,"timeMs":86658,"cost":0.034},{"modelName":"Grok 4.1 Fast","score":86.1,"functional":81.8,"quality":77.9,"passed":true,"tokens":2673,"timeMs":94134,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.3,"functional":85,"quality":80,"passed":true,"tokens":5474,"timeMs":101653,"cost":0.0307}]},{"taskId":"saas-core/billing/task-1","category":"saas-core","subcategory":"billing","results":[{"modelName":"GLM 4-Plus","score":94.2,"functional":89.5,"quality":81.2,"passed":true,"tokens":5101,"timeMs":103171,"cost":0.0052},{"modelName":"MiniMax M2.1","score":85,"functional":80.8,"quality":78.7,"passed":true,"tokens":16508,"timeMs":200570,"cost":0.0157},{"modelName":"GLM-4.7","score":89.9,"functional":85.4,"quality":80.8,"passed":true,"tokens":3252,"timeMs":55987,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":90.1,"functional":85.6,"quality":76.5,"passed":true,"tokens":1749,"timeMs":32839,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4582,"timeMs":39859,"cost":0.0642},{"modelName":"Claude Sonnet 4.5","score":95.2,"functional":90.5,"quality":81.4,"passed":true,"tokens":3767,"timeMs":50994,"cost":0.0362},{"modelName":"Claude Opus 4.5","score":89.4,"functional":84.9,"quality":79.5,"passed":true,"tokens":4202,"timeMs":42043,"cost":0.0723},{"modelName":"Claude Haiku 4.5","score":86.1,"functional":81.8,"quality":78.1,"passed":true,"tokens":5059,"timeMs":19162,"cost":0.017},{"modelName":"DeepSeek v3.2","score":89.8,"functional":85.3,"quality":79.9,"passed":true,"tokens":2558,"timeMs":62807,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":91.8,"functional":87.2,"quality":79.9,"passed":true,"tokens":2628,"timeMs":26897,"cost":0.0333},{"modelName":"GLM 4.7 Flash","score":54.6,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.3,"functional":82.9,"quality":78.9,"passed":true,"tokens":2736,"timeMs":68558,"cost":0.0014},{"modelName":"Grok 4","score":85.6,"functional":81.3,"quality":78.7,"passed":true,"tokens":2700,"timeMs":66588,"cost":0.029},{"modelName":"Grok 4.1 Fast","score":88.5,"functional":84,"quality":78.6,"passed":true,"tokens":3865,"timeMs":79211,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7548,"timeMs":142951,"cost":0.043}]},{"taskId":"saas-core/billing/task-2","category":"saas-core","subcategory":"billing","results":[{"modelName":"GLM 4-Plus","score":92.3,"functional":87.6,"quality":80.6,"passed":true,"tokens":4464,"timeMs":70088,"cost":0.0055},{"modelName":"MiniMax M2.1","score":86.7,"functional":82.4,"quality":79.2,"passed":true,"tokens":15051,"timeMs":167057,"cost":0.0137},{"modelName":"GLM-4.7","score":90.9,"functional":86.3,"quality":81,"passed":true,"tokens":3619,"timeMs":63835,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":88.7,"functional":84.3,"quality":76.1,"passed":true,"tokens":1741,"timeMs":25172,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4582,"timeMs":39859,"cost":0.0642},{"modelName":"Claude Sonnet 4.5","score":95.5,"functional":90.7,"quality":81.5,"passed":true,"tokens":3602,"timeMs":45100,"cost":0.0415},{"modelName":"Claude Opus 4.5","score":87.4,"functional":83,"quality":78.9,"passed":true,"tokens":3973,"timeMs":52748,"cost":0.061},{"modelName":"Claude Haiku 4.5","score":86.2,"functional":81.9,"quality":78.1,"passed":true,"tokens":4724,"timeMs":27335,"cost":0.015},{"modelName":"DeepSeek v3.2","score":87.5,"functional":83.1,"quality":79.2,"passed":true,"tokens":2864,"timeMs":110875,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":94,"functional":89.3,"quality":80.5,"passed":true,"tokens":3191,"timeMs":26185,"cost":0.0226},{"modelName":"GLM 4.7 Flash","score":54.6,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.4,"functional":84.9,"quality":79.6,"passed":true,"tokens":3448,"timeMs":72608,"cost":0.001},{"modelName":"Grok 4","score":85,"functional":80.8,"quality":78.5,"passed":true,"tokens":2405,"timeMs":90040,"cost":0.0278},{"modelName":"Grok 4.1 Fast","score":90.9,"functional":86.4,"quality":79.3,"passed":true,"tokens":3028,"timeMs":68263,"cost":0.0011},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7548,"timeMs":142951,"cost":0.043}]},{"taskId":"saas-core/billing/task-3","category":"saas-core","subcategory":"billing","results":[{"modelName":"GLM 4-Plus","score":89.8,"functional":85.3,"quality":79.9,"passed":true,"tokens":4377,"timeMs":92381,"cost":0.005},{"modelName":"MiniMax M2.1","score":89.1,"functional":84.6,"quality":79.9,"passed":true,"tokens":17304,"timeMs":150056,"cost":0.0109},{"modelName":"GLM-4.7","score":90.6,"functional":86,"quality":81,"passed":true,"tokens":3738,"timeMs":50945,"cost":0.0041},{"modelName":"Gemini 3 Flash","score":86.5,"functional":82.1,"quality":75.4,"passed":true,"tokens":1924,"timeMs":24741,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4582,"timeMs":39859,"cost":0.0642},{"modelName":"Claude Sonnet 4.5","score":94.6,"functional":89.8,"quality":81.2,"passed":true,"tokens":3838,"timeMs":53830,"cost":0.0316},{"modelName":"Claude Opus 4.5","score":86.3,"functional":81.9,"quality":78.5,"passed":true,"tokens":3059,"timeMs":55316,"cost":0.0639},{"modelName":"Claude Haiku 4.5","score":87.4,"functional":83.1,"quality":78.5,"passed":true,"tokens":3981,"timeMs":19068,"cost":0.0161},{"modelName":"DeepSeek v3.2","score":85.8,"functional":81.5,"quality":78.7,"passed":true,"tokens":2936,"timeMs":96895,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":95.4,"functional":90.7,"quality":81,"passed":true,"tokens":2404,"timeMs":25505,"cost":0.0238},{"modelName":"GLM 4.7 Flash","score":54.6,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.9,"functional":87.3,"quality":80.3,"passed":true,"tokens":3081,"timeMs":49960,"cost":0.0012},{"modelName":"Grok 4","score":85.6,"functional":81.3,"quality":78.7,"passed":true,"tokens":2445,"timeMs":89606,"cost":0.0329},{"modelName":"Grok 4.1 Fast","score":92.8,"functional":88.2,"quality":79.9,"passed":true,"tokens":2650,"timeMs":72960,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7548,"timeMs":142951,"cost":0.043}]},{"taskId":"saas-core/billing/task-4","category":"saas-core","subcategory":"billing","results":[{"modelName":"GLM 4-Plus","score":87.5,"functional":83.1,"quality":79.2,"passed":true,"tokens":4968,"timeMs":81446,"cost":0.0045},{"modelName":"MiniMax M2.1","score":91.5,"functional":86.9,"quality":80.6,"passed":true,"tokens":15209,"timeMs":130622,"cost":0.015},{"modelName":"GLM-4.7","score":89.2,"functional":84.7,"quality":80.5,"passed":true,"tokens":2885,"timeMs":65321,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":84,"functional":79.8,"quality":74.7,"passed":true,"tokens":2249,"timeMs":28737,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4582,"timeMs":39859,"cost":0.0642},{"modelName":"Claude Sonnet 4.5","score":92.6,"functional":88,"quality":80.6,"passed":true,"tokens":3660,"timeMs":30634,"cost":0.0462},{"modelName":"Claude Opus 4.5","score":86.4,"functional":82,"quality":78.6,"passed":true,"tokens":3903,"timeMs":31878,"cost":0.0767},{"modelName":"Claude Haiku 4.5","score":89.6,"functional":85.1,"quality":79.1,"passed":true,"tokens":5138,"timeMs":25300,"cost":0.0183},{"modelName":"DeepSeek v3.2","score":85.2,"functional":80.9,"quality":78.5,"passed":true,"tokens":3463,"timeMs":92773,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":95.7,"functional":90.9,"quality":81,"passed":true,"tokens":3018,"timeMs":28410,"cost":0.0333},{"modelName":"GLM 4.7 Flash","score":54.6,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.1,"functional":89.4,"quality":81,"passed":true,"tokens":2735,"timeMs":61654,"cost":0.0012},{"modelName":"Grok 4","score":87.3,"functional":83,"quality":79.2,"passed":true,"tokens":2422,"timeMs":62332,"cost":0.0356},{"modelName":"Grok 4.1 Fast","score":93.8,"functional":89.1,"quality":80.2,"passed":true,"tokens":2640,"timeMs":108766,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7548,"timeMs":142951,"cost":0.043}]},{"taskId":"saas-core/billing/task-5","category":"saas-core","subcategory":"billing","results":[{"modelName":"GLM 4-Plus","score":85.8,"functional":81.5,"quality":78.7,"passed":true,"tokens":5236,"timeMs":91973,"cost":0.0047},{"modelName":"MiniMax M2.1","score":93.4,"functional":88.8,"quality":81.2,"passed":true,"tokens":18257,"timeMs":210842,"cost":0.0107},{"modelName":"GLM-4.7","score":86.9,"functional":82.6,"quality":79.9,"passed":true,"tokens":3430,"timeMs":60078,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":81.9,"functional":77.8,"quality":74,"passed":true,"tokens":2157,"timeMs":24572,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4582,"timeMs":39859,"cost":0.0642},{"modelName":"Claude Sonnet 4.5","score":90.2,"functional":85.7,"quality":79.9,"passed":true,"tokens":3008,"timeMs":37606,"cost":0.0362},{"modelName":"Claude Opus 4.5","score":87.6,"functional":83.2,"quality":78.9,"passed":true,"tokens":2879,"timeMs":53973,"cost":0.0734},{"modelName":"Claude Haiku 4.5","score":92,"functional":87.4,"quality":79.9,"passed":true,"tokens":4973,"timeMs":18816,"cost":0.0143},{"modelName":"DeepSeek v3.2","score":85.8,"functional":81.5,"quality":78.7,"passed":true,"tokens":2482,"timeMs":98061,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":94.7,"functional":90,"quality":80.8,"passed":true,"tokens":2971,"timeMs":34277,"cost":0.0294},{"modelName":"GLM 4.7 Flash","score":54.6,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.5,"functional":90.7,"quality":81.4,"passed":true,"tokens":2519,"timeMs":69142,"cost":0.0011},{"modelName":"Grok 4","score":89.7,"functional":85.2,"quality":79.9,"passed":true,"tokens":2757,"timeMs":58625,"cost":0.0336},{"modelName":"Grok 4.1 Fast","score":93.5,"functional":88.8,"quality":80.1,"passed":true,"tokens":3416,"timeMs":105382,"cost":0.0015},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7548,"timeMs":142951,"cost":0.043}]},{"taskId":"saas-core/multi-tenant/task-1","category":"saas-core","subcategory":"multi-tenant","results":[{"modelName":"GLM 4-Plus","score":85.2,"functional":80.9,"quality":78.5,"passed":true,"tokens":4466,"timeMs":84355,"cost":0.0047},{"modelName":"MiniMax M2.1","score":94.4,"functional":89.7,"quality":81.5,"passed":true,"tokens":14425,"timeMs":122306,"cost":0.0141},{"modelName":"GLM-4.7","score":84.5,"functional":80.2,"quality":79.1,"passed":true,"tokens":3317,"timeMs":46956,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":80.6,"functional":76.6,"quality":73.7,"passed":true,"tokens":2109,"timeMs":34874,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4543,"timeMs":38370,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":87.8,"functional":83.4,"quality":79.2,"passed":true,"tokens":3846,"timeMs":29459,"cost":0.0345},{"modelName":"Claude Opus 4.5","score":89.8,"functional":85.3,"quality":79.6,"passed":true,"tokens":3212,"timeMs":49058,"cost":0.08},{"modelName":"Claude Haiku 4.5","score":94.3,"functional":89.5,"quality":80.5,"passed":true,"tokens":3634,"timeMs":21615,"cost":0.0169},{"modelName":"DeepSeek v3.2","score":87.5,"functional":83.1,"quality":79.2,"passed":true,"tokens":2885,"timeMs":91815,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":92.8,"functional":88.2,"quality":80.2,"passed":true,"tokens":2356,"timeMs":21502,"cost":0.0232},{"modelName":"GLM 4.7 Flash","score":82.3,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.7,"functional":91,"quality":81.5,"passed":true,"tokens":3362,"timeMs":68983,"cost":0.0014},{"modelName":"Grok 4","score":92.1,"functional":87.5,"quality":80.6,"passed":true,"tokens":2874,"timeMs":65327,"cost":0.0251},{"modelName":"Grok 4.1 Fast","score":92.1,"functional":87.4,"quality":79.7,"passed":true,"tokens":3671,"timeMs":78472,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7203,"timeMs":143429,"cost":0.0411}]},{"taskId":"saas-core/multi-tenant/task-2","category":"saas-core","subcategory":"multi-tenant","results":[{"modelName":"GLM 4-Plus","score":85.8,"functional":81.5,"quality":78.7,"passed":true,"tokens":4079,"timeMs":85499,"cost":0.0061},{"modelName":"MiniMax M2.1","score":94.1,"functional":89.4,"quality":81.4,"passed":true,"tokens":14304,"timeMs":194790,"cost":0.0154},{"modelName":"GLM-4.7","score":82.3,"functional":78.2,"quality":78.5,"passed":true,"tokens":3479,"timeMs":68662,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":80.6,"functional":76.5,"quality":73.6,"passed":true,"tokens":2118,"timeMs":31529,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4543,"timeMs":38370,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":86.2,"functional":81.9,"quality":78.7,"passed":true,"tokens":3642,"timeMs":36046,"cost":0.0345},{"modelName":"Claude Opus 4.5","score":92.2,"functional":87.6,"quality":80.3,"passed":true,"tokens":4211,"timeMs":56196,"cost":0.061},{"modelName":"Claude Haiku 4.5","score":95.7,"functional":90.9,"quality":81,"passed":true,"tokens":5029,"timeMs":25552,"cost":0.0202},{"modelName":"DeepSeek v3.2","score":89.9,"functional":85.4,"quality":79.9,"passed":true,"tokens":2574,"timeMs":95001,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":90.4,"functional":85.9,"quality":79.4,"passed":true,"tokens":2272,"timeMs":24238,"cost":0.0259},{"modelName":"GLM 4.7 Flash","score":82.3,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.8,"functional":90.1,"quality":81.2,"passed":true,"tokens":2814,"timeMs":71443,"cost":0.0011},{"modelName":"Grok 4","score":94,"functional":89.3,"quality":81.2,"passed":true,"tokens":3088,"timeMs":59601,"cost":0.0286},{"modelName":"Grok 4.1 Fast","score":89.8,"functional":85.3,"quality":79,"passed":true,"tokens":2935,"timeMs":72020,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7203,"timeMs":143429,"cost":0.0411}]},{"taskId":"saas-core/multi-tenant/task-3","category":"saas-core","subcategory":"multi-tenant","results":[{"modelName":"GLM 4-Plus","score":87.5,"functional":83.1,"quality":79.2,"passed":true,"tokens":4476,"timeMs":87865,"cost":0.0055},{"modelName":"MiniMax M2.1","score":92.7,"functional":88,"quality":81,"passed":true,"tokens":15374,"timeMs":134853,"cost":0.0144},{"modelName":"GLM-4.7","score":81.1,"functional":77,"quality":78.1,"passed":true,"tokens":3273,"timeMs":63495,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":81.7,"functional":77.6,"quality":74,"passed":true,"tokens":1857,"timeMs":35831,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4543,"timeMs":38370,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":85.6,"functional":81.3,"quality":78.5,"passed":true,"tokens":3253,"timeMs":38633,"cost":0.0436},{"modelName":"Claude Opus 4.5","score":94.4,"functional":89.7,"quality":81,"passed":true,"tokens":2929,"timeMs":34207,"cost":0.0748},{"modelName":"Claude Haiku 4.5","score":95.9,"functional":91.1,"quality":81,"passed":true,"tokens":3744,"timeMs":16131,"cost":0.0181},{"modelName":"DeepSeek v3.2","score":92.3,"functional":87.7,"quality":80.6,"passed":true,"tokens":2774,"timeMs":73973,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":88,"functional":83.6,"quality":78.7,"passed":true,"tokens":3221,"timeMs":23043,"cost":0.0327},{"modelName":"GLM 4.7 Flash","score":82.3,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.9,"functional":88.2,"quality":80.6,"passed":true,"tokens":2787,"timeMs":57024,"cost":0.0012},{"modelName":"Grok 4","score":95,"functional":90.2,"quality":81.5,"passed":true,"tokens":2308,"timeMs":74400,"cost":0.0332},{"modelName":"Grok 4.1 Fast","score":87.4,"functional":83,"quality":78.3,"passed":true,"tokens":3479,"timeMs":96987,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7203,"timeMs":143429,"cost":0.0411}]},{"taskId":"saas-core/multi-tenant/task-4","category":"saas-core","subcategory":"multi-tenant","results":[{"modelName":"GLM 4-Plus","score":89.9,"functional":85.4,"quality":79.9,"passed":true,"tokens":3575,"timeMs":109157,"cost":0.0051},{"modelName":"MiniMax M2.1","score":90.5,"functional":85.9,"quality":80.3,"passed":true,"tokens":14163,"timeMs":164649,"cost":0.0147},{"modelName":"GLM-4.7","score":81,"functional":77,"quality":78.1,"passed":true,"tokens":3938,"timeMs":50608,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":83.7,"functional":79.5,"quality":74.6,"passed":true,"tokens":2050,"timeMs":24711,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4543,"timeMs":38370,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":86.2,"functional":81.9,"quality":78.7,"passed":true,"tokens":3649,"timeMs":45848,"cost":0.0438},{"modelName":"Claude Opus 4.5","score":95.8,"functional":91,"quality":81.4,"passed":true,"tokens":4102,"timeMs":42202,"cost":0.0771},{"modelName":"Claude Haiku 4.5","score":95,"functional":90.2,"quality":80.8,"passed":true,"tokens":3671,"timeMs":20618,"cost":0.0146},{"modelName":"DeepSeek v3.2","score":94.2,"functional":89.5,"quality":81.2,"passed":true,"tokens":3132,"timeMs":100818,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":86.4,"functional":82,"quality":78.2,"passed":true,"tokens":3157,"timeMs":28312,"cost":0.032},{"modelName":"GLM 4.7 Flash","score":82.3,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.4,"functional":85.9,"quality":79.9,"passed":true,"tokens":2533,"timeMs":65615,"cost":0.0014},{"modelName":"Grok 4","score":94.7,"functional":89.9,"quality":81.4,"passed":true,"tokens":2701,"timeMs":53510,"cost":0.0255},{"modelName":"Grok 4.1 Fast","score":85.2,"functional":81,"quality":77.6,"passed":true,"tokens":2932,"timeMs":74716,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7203,"timeMs":143429,"cost":0.0411}]},{"taskId":"saas-core/multi-tenant/task-5","category":"saas-core","subcategory":"multi-tenant","results":[{"modelName":"GLM 4-Plus","score":92.3,"functional":87.7,"quality":80.6,"passed":true,"tokens":4736,"timeMs":89073,"cost":0.005},{"modelName":"MiniMax M2.1","score":88,"functional":83.6,"quality":79.6,"passed":true,"tokens":16391,"timeMs":151642,"cost":0.016},{"modelName":"GLM-4.7","score":82.1,"functional":78,"quality":78.4,"passed":true,"tokens":2982,"timeMs":69921,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":86.2,"functional":81.9,"quality":75.3,"passed":true,"tokens":2347,"timeMs":35887,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4543,"timeMs":38370,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":87.9,"functional":83.5,"quality":79.2,"passed":true,"tokens":3562,"timeMs":34747,"cost":0.0378},{"modelName":"Claude Opus 4.5","score":96.1,"functional":91.3,"quality":81.5,"passed":true,"tokens":2952,"timeMs":43301,"cost":0.0761},{"modelName":"Claude Haiku 4.5","score":93,"functional":88.4,"quality":80.2,"passed":true,"tokens":4635,"timeMs":19718,"cost":0.02},{"modelName":"DeepSeek v3.2","score":95.1,"functional":90.4,"quality":81.5,"passed":true,"tokens":3474,"timeMs":66508,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":85.8,"functional":81.5,"quality":78.1,"passed":true,"tokens":2437,"timeMs":35312,"cost":0.0245},{"modelName":"GLM 4.7 Flash","score":82.3,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.1,"functional":83.7,"quality":79.2,"passed":true,"tokens":2666,"timeMs":62471,"cost":0.0012},{"modelName":"Grok 4","score":93.3,"functional":88.6,"quality":81,"passed":true,"tokens":2850,"timeMs":79964,"cost":0.0243},{"modelName":"Grok 4.1 Fast","score":84,"functional":79.8,"quality":77.3,"passed":true,"tokens":2894,"timeMs":69865,"cost":0.0015},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":7203,"timeMs":143429,"cost":0.0411}]},{"taskId":"saas-core/realtime/task-1","category":"saas-core","subcategory":"realtime","results":[{"modelName":"GLM 4-Plus","score":94.2,"functional":89.5,"quality":81.2,"passed":true,"tokens":5076,"timeMs":114512,"cost":0.0062},{"modelName":"MiniMax M2.1","score":85.9,"functional":81.6,"quality":78.9,"passed":true,"tokens":15277,"timeMs":161694,"cost":0.0128},{"modelName":"GLM-4.7","score":84.2,"functional":80,"quality":79,"passed":true,"tokens":3147,"timeMs":51132,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":88.5,"functional":84,"quality":76,"passed":true,"tokens":2236,"timeMs":34506,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4613,"timeMs":41170,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":90.2,"functional":85.7,"quality":79.9,"passed":true,"tokens":3896,"timeMs":50920,"cost":0.0374},{"modelName":"Claude Opus 4.5","score":95.1,"functional":90.4,"quality":81.2,"passed":true,"tokens":4280,"timeMs":36651,"cost":0.0796},{"modelName":"Claude Haiku 4.5","score":90.6,"functional":86.1,"quality":79.4,"passed":true,"tokens":4885,"timeMs":19691,"cost":0.0137},{"modelName":"DeepSeek v3.2","score":94.9,"functional":90.1,"quality":81.4,"passed":true,"tokens":2599,"timeMs":77163,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":86.4,"functional":82.1,"quality":78.2,"passed":true,"tokens":2974,"timeMs":19812,"cost":0.0302},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.4,"functional":82.1,"quality":78.7,"passed":true,"tokens":2673,"timeMs":79186,"cost":0.0011},{"modelName":"Grok 4","score":91,"functional":86.5,"quality":80.3,"passed":true,"tokens":2859,"timeMs":52774,"cost":0.0312},{"modelName":"Grok 4.1 Fast","score":83.9,"functional":79.7,"quality":77.2,"passed":true,"tokens":2956,"timeMs":94374,"cost":0.0012},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":4622,"timeMs":91155,"cost":0.0254}]},{"taskId":"saas-core/realtime/task-2","category":"saas-core","subcategory":"realtime","results":[{"modelName":"GLM 4-Plus","score":95.2,"functional":90.4,"quality":81.5,"passed":true,"tokens":4235,"timeMs":99780,"cost":0.0058},{"modelName":"MiniMax M2.1","score":84.6,"functional":80.4,"quality":78.6,"passed":true,"tokens":17643,"timeMs":194658,"cost":0.0139},{"modelName":"GLM-4.7","score":86.6,"functional":82.3,"quality":79.8,"passed":true,"tokens":3792,"timeMs":56317,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":90,"functional":85.5,"quality":76.5,"passed":true,"tokens":1767,"timeMs":28124,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4613,"timeMs":41170,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":92.7,"functional":88,"quality":80.6,"passed":true,"tokens":3674,"timeMs":39391,"cost":0.0367},{"modelName":"Claude Opus 4.5","score":93.2,"functional":88.6,"quality":80.6,"passed":true,"tokens":2960,"timeMs":33457,"cost":0.0726},{"modelName":"Claude Haiku 4.5","score":88.2,"functional":83.8,"quality":78.7,"passed":true,"tokens":4917,"timeMs":19917,"cost":0.0198},{"modelName":"DeepSeek v3.2","score":93.4,"functional":88.8,"quality":81,"passed":true,"tokens":2470,"timeMs":107584,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":88.1,"functional":83.7,"quality":78.8,"passed":true,"tokens":3056,"timeMs":27723,"cost":0.0294},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.8,"functional":81.5,"quality":78.5,"passed":true,"tokens":2443,"timeMs":80415,"cost":0.0012},{"modelName":"Grok 4","score":88.6,"functional":84.1,"quality":79.6,"passed":true,"tokens":2739,"timeMs":92130,"cost":0.0252},{"modelName":"Grok 4.1 Fast","score":85,"functional":80.8,"quality":77.6,"passed":true,"tokens":3655,"timeMs":110877,"cost":0.0015},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":4622,"timeMs":91155,"cost":0.0254}]},{"taskId":"saas-core/realtime/task-3","category":"saas-core","subcategory":"realtime","results":[{"modelName":"GLM 4-Plus","score":94.9,"functional":90.1,"quality":81.4,"passed":true,"tokens":4375,"timeMs":113308,"cost":0.0046},{"modelName":"MiniMax M2.1","score":84.5,"functional":80.3,"quality":78.5,"passed":true,"tokens":15399,"timeMs":124709,"cost":0.0142},{"modelName":"GLM-4.7","score":88.9,"functional":84.5,"quality":80.5,"passed":true,"tokens":2839,"timeMs":46408,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":90.4,"functional":85.9,"quality":76.6,"passed":true,"tokens":2414,"timeMs":21088,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4613,"timeMs":41170,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":94.6,"functional":89.8,"quality":81.2,"passed":true,"tokens":2722,"timeMs":37170,"cost":0.0426},{"modelName":"Claude Opus 4.5","score":90.8,"functional":86.2,"quality":79.9,"passed":true,"tokens":3949,"timeMs":55439,"cost":0.0693},{"modelName":"Claude Haiku 4.5","score":86.6,"functional":82.2,"quality":78.2,"passed":true,"tokens":4518,"timeMs":24370,"cost":0.016},{"modelName":"DeepSeek v3.2","score":91.2,"functional":86.7,"quality":80.3,"passed":true,"tokens":3096,"timeMs":76369,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":90.4,"functional":85.9,"quality":79.5,"passed":true,"tokens":2278,"timeMs":30811,"cost":0.0315},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.4,"functional":82.1,"quality":78.7,"passed":true,"tokens":2512,"timeMs":55722,"cost":0.001},{"modelName":"Grok 4","score":86.4,"functional":82.1,"quality":78.9,"passed":true,"tokens":2768,"timeMs":80096,"cost":0.0252},{"modelName":"Grok 4.1 Fast","score":87.1,"functional":82.7,"quality":78.2,"passed":true,"tokens":2805,"timeMs":78487,"cost":0.0014},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":4622,"timeMs":91155,"cost":0.0254}]},{"taskId":"saas-core/realtime/task-4","category":"saas-core","subcategory":"realtime","results":[{"modelName":"GLM 4-Plus","score":93.5,"functional":88.8,"quality":81,"passed":true,"tokens":4725,"timeMs":79584,"cost":0.006},{"modelName":"MiniMax M2.1","score":85.7,"functional":81.4,"quality":78.9,"passed":true,"tokens":16973,"timeMs":175112,"cost":0.0127},{"modelName":"GLM-4.7","score":90.5,"functional":85.9,"quality":80.9,"passed":true,"tokens":3835,"timeMs":44492,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":89.6,"functional":85.1,"quality":76.4,"passed":true,"tokens":2386,"timeMs":34956,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4613,"timeMs":41170,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":95.5,"functional":90.7,"quality":81.5,"passed":true,"tokens":3506,"timeMs":49141,"cost":0.0382},{"modelName":"Claude Opus 4.5","score":88.4,"functional":84,"quality":79.2,"passed":true,"tokens":3306,"timeMs":32596,"cost":0.0747},{"modelName":"Claude Haiku 4.5","score":86,"functional":81.7,"quality":78.1,"passed":true,"tokens":3996,"timeMs":24348,"cost":0.0166},{"modelName":"DeepSeek v3.2","score":88.8,"functional":84.3,"quality":79.6,"passed":true,"tokens":2430,"timeMs":107453,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":92.9,"functional":88.2,"quality":80.2,"passed":true,"tokens":2975,"timeMs":31602,"cost":0.0251},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.1,"functional":83.7,"quality":79.2,"passed":true,"tokens":2923,"timeMs":72601,"cost":0.0011},{"modelName":"Grok 4","score":85.2,"functional":80.9,"quality":78.6,"passed":true,"tokens":2842,"timeMs":68175,"cost":0.0276},{"modelName":"Grok 4.1 Fast","score":89.5,"functional":85.1,"quality":78.9,"passed":true,"tokens":2938,"timeMs":92706,"cost":0.0014},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":4622,"timeMs":91155,"cost":0.0254}]},{"taskId":"saas-core/realtime/task-5","category":"saas-core","subcategory":"realtime","results":[{"modelName":"GLM 4-Plus","score":91.2,"functional":86.7,"quality":80.3,"passed":true,"tokens":4302,"timeMs":97304,"cost":0.0057},{"modelName":"MiniMax M2.1","score":87.7,"functional":83.3,"quality":79.5,"passed":true,"tokens":17648,"timeMs":200191,"cost":0.0145},{"modelName":"GLM-4.7","score":90.9,"functional":86.3,"quality":81.1,"passed":true,"tokens":2988,"timeMs":58059,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":87.8,"functional":83.4,"quality":75.8,"passed":true,"tokens":2314,"timeMs":22627,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4613,"timeMs":41170,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":95.2,"functional":90.5,"quality":81.4,"passed":true,"tokens":3295,"timeMs":50557,"cost":0.0455},{"modelName":"Claude Opus 4.5","score":86.8,"functional":82.4,"quality":78.7,"passed":true,"tokens":4096,"timeMs":55280,"cost":0.0774},{"modelName":"Claude Haiku 4.5","score":86.6,"functional":82.3,"quality":78.2,"passed":true,"tokens":4925,"timeMs":27930,"cost":0.02},{"modelName":"DeepSeek v3.2","score":86.6,"functional":82.3,"quality":78.9,"passed":true,"tokens":2477,"timeMs":63832,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":94.8,"functional":90,"quality":80.8,"passed":true,"tokens":2931,"timeMs":24228,"cost":0.0325},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.5,"functional":85.9,"quality":79.9,"passed":true,"tokens":2443,"timeMs":87090,"cost":0.0012},{"modelName":"Grok 4","score":85.1,"functional":80.9,"quality":78.5,"passed":true,"tokens":2757,"timeMs":91549,"cost":0.0289},{"modelName":"Grok 4.1 Fast","score":91.8,"functional":87.2,"quality":79.6,"passed":true,"tokens":3577,"timeMs":92693,"cost":0.0015},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":4622,"timeMs":91155,"cost":0.0254}]},{"taskId":"saas-core/security/task-1","category":"saas-core","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":88.8,"functional":84.3,"quality":79.6,"passed":true,"tokens":4736,"timeMs":96868,"cost":0.0055},{"modelName":"MiniMax M2.1","score":90.2,"functional":85.7,"quality":80.2,"passed":true,"tokens":14567,"timeMs":131407,"cost":0.0125},{"modelName":"GLM-4.7","score":90.1,"functional":85.6,"quality":80.8,"passed":true,"tokens":3888,"timeMs":47803,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":85.4,"functional":81.1,"quality":75.1,"passed":true,"tokens":2182,"timeMs":29502,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4827,"timeMs":42600,"cost":0.0676},{"modelName":"Claude Sonnet 4.5","score":93.8,"functional":89.1,"quality":81,"passed":true,"tokens":3167,"timeMs":45266,"cost":0.0377},{"modelName":"Claude Opus 4.5","score":86.2,"functional":81.8,"quality":78.5,"passed":true,"tokens":4130,"timeMs":34198,"cost":0.0736},{"modelName":"Claude Haiku 4.5","score":88.3,"functional":83.9,"quality":78.8,"passed":true,"tokens":3572,"timeMs":23822,"cost":0.0146},{"modelName":"DeepSeek v3.2","score":85.4,"functional":81.1,"quality":78.6,"passed":true,"tokens":3401,"timeMs":93659,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":95.7,"functional":90.9,"quality":81,"passed":true,"tokens":2897,"timeMs":29841,"cost":0.0279},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.9,"functional":88.3,"quality":80.6,"passed":true,"tokens":3032,"timeMs":70472,"cost":0.001},{"modelName":"Grok 4","score":86.2,"functional":81.9,"quality":78.9,"passed":true,"tokens":3051,"timeMs":87793,"cost":0.0325},{"modelName":"Grok 4.1 Fast","score":93.4,"functional":88.7,"quality":80.1,"passed":true,"tokens":3228,"timeMs":105171,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4177,"timeMs":77228,"cost":0.0217}]},{"taskId":"saas-core/security/task-2","category":"saas-core","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":86.6,"functional":82.3,"quality":78.9,"passed":true,"tokens":4647,"timeMs":107916,"cost":0.0044},{"modelName":"MiniMax M2.1","score":92.4,"functional":87.8,"quality":80.9,"passed":true,"tokens":13061,"timeMs":192197,"cost":0.011},{"modelName":"GLM-4.7","score":88.3,"functional":83.8,"quality":80.3,"passed":true,"tokens":2989,"timeMs":47056,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":83,"functional":78.9,"quality":74.4,"passed":true,"tokens":2165,"timeMs":30293,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4827,"timeMs":42600,"cost":0.0676},{"modelName":"Claude Sonnet 4.5","score":91.6,"functional":87,"quality":80.3,"passed":true,"tokens":2752,"timeMs":50633,"cost":0.0341},{"modelName":"Claude Opus 4.5","score":86.8,"functional":82.4,"quality":78.7,"passed":true,"tokens":2910,"timeMs":32155,"cost":0.0602},{"modelName":"Claude Haiku 4.5","score":90.6,"functional":86.1,"quality":79.5,"passed":true,"tokens":3967,"timeMs":16393,"cost":0.0146},{"modelName":"DeepSeek v3.2","score":85.3,"functional":81,"quality":78.5,"passed":true,"tokens":2566,"timeMs":84422,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":95.4,"functional":90.7,"quality":81,"passed":true,"tokens":2920,"timeMs":21338,"cost":0.0265},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.8,"functional":90.1,"quality":81.2,"passed":true,"tokens":2782,"timeMs":75132,"cost":0.0013},{"modelName":"Grok 4","score":88.3,"functional":83.9,"quality":79.5,"passed":true,"tokens":2653,"timeMs":87487,"cost":0.0314},{"modelName":"Grok 4.1 Fast","score":93.8,"functional":89.1,"quality":80.2,"passed":true,"tokens":3626,"timeMs":71973,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4177,"timeMs":77228,"cost":0.0217}]},{"taskId":"saas-core/security/task-3","category":"saas-core","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":85.4,"functional":81.1,"quality":78.6,"passed":true,"tokens":4602,"timeMs":108904,"cost":0.0056},{"modelName":"MiniMax M2.1","score":94,"functional":89.3,"quality":81.4,"passed":true,"tokens":15040,"timeMs":150240,"cost":0.0146},{"modelName":"GLM-4.7","score":85.9,"functional":81.6,"quality":79.5,"passed":true,"tokens":3815,"timeMs":73714,"cost":0.004},{"modelName":"Gemini 3 Flash","score":81.2,"functional":77.1,"quality":73.8,"passed":true,"tokens":2081,"timeMs":19675,"cost":0.0052},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4827,"timeMs":42600,"cost":0.0676},{"modelName":"Claude Sonnet 4.5","score":89.1,"functional":84.7,"quality":79.6,"passed":true,"tokens":3872,"timeMs":50906,"cost":0.0331},{"modelName":"Claude Opus 4.5","score":88.5,"functional":84,"quality":79.2,"passed":true,"tokens":3807,"timeMs":39423,"cost":0.0787},{"modelName":"Claude Haiku 4.5","score":93.1,"functional":88.4,"quality":80.2,"passed":true,"tokens":4333,"timeMs":19669,"cost":0.0152},{"modelName":"DeepSeek v3.2","score":86.4,"functional":82.1,"quality":78.9,"passed":true,"tokens":2603,"timeMs":81689,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":94,"functional":89.3,"quality":80.5,"passed":true,"tokens":2245,"timeMs":21748,"cost":0.0265},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.8,"functional":91,"quality":81.5,"passed":true,"tokens":2699,"timeMs":80530,"cost":0.0014},{"modelName":"Grok 4","score":90.7,"functional":86.2,"quality":80.2,"passed":true,"tokens":2204,"timeMs":88254,"cost":0.0303},{"modelName":"Grok 4.1 Fast","score":93,"functional":88.3,"quality":80,"passed":true,"tokens":3477,"timeMs":100957,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4177,"timeMs":77228,"cost":0.0217}]},{"taskId":"saas-core/security/task-4","category":"saas-core","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":85.3,"functional":81.1,"quality":78.5,"passed":true,"tokens":5013,"timeMs":115541,"cost":0.0057},{"modelName":"MiniMax M2.1","score":94.4,"functional":89.7,"quality":81.5,"passed":true,"tokens":17519,"timeMs":146689,"cost":0.0116},{"modelName":"GLM-4.7","score":83.5,"functional":79.3,"quality":78.8,"passed":true,"tokens":3326,"timeMs":58731,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":80.4,"functional":76.4,"quality":73.6,"passed":true,"tokens":1995,"timeMs":35473,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4827,"timeMs":42600,"cost":0.0676},{"modelName":"Claude Sonnet 4.5","score":87,"functional":82.7,"quality":78.9,"passed":true,"tokens":3594,"timeMs":54326,"cost":0.0452},{"modelName":"Claude Opus 4.5","score":90.8,"functional":86.3,"quality":79.9,"passed":true,"tokens":4163,"timeMs":34036,"cost":0.062},{"modelName":"Claude Haiku 4.5","score":95,"functional":90.2,"quality":80.8,"passed":true,"tokens":5068,"timeMs":27315,"cost":0.015},{"modelName":"DeepSeek v3.2","score":88.5,"functional":84.1,"quality":79.5,"passed":true,"tokens":3000,"timeMs":105358,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":91.8,"functional":87.2,"quality":79.9,"passed":true,"tokens":2223,"timeMs":30187,"cost":0.0248},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.5,"functional":90.7,"quality":81.4,"passed":true,"tokens":2644,"timeMs":53731,"cost":0.001},{"modelName":"Grok 4","score":93,"functional":88.4,"quality":80.9,"passed":true,"tokens":2756,"timeMs":67049,"cost":0.0346},{"modelName":"Grok 4.1 Fast","score":91.2,"functional":86.6,"quality":79.4,"passed":true,"tokens":3161,"timeMs":71002,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4177,"timeMs":77228,"cost":0.0217}]},{"taskId":"saas-core/security/task-5","category":"saas-core","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":86.4,"functional":82.1,"quality":78.9,"passed":true,"tokens":4318,"timeMs":86306,"cost":0.0051},{"modelName":"MiniMax M2.1","score":93.6,"functional":88.9,"quality":81.3,"passed":true,"tokens":16322,"timeMs":124925,"cost":0.0126},{"modelName":"GLM-4.7","score":81.7,"functional":77.6,"quality":78.3,"passed":true,"tokens":3454,"timeMs":58935,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":80.9,"functional":76.9,"quality":73.8,"passed":true,"tokens":2266,"timeMs":24674,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4827,"timeMs":42600,"cost":0.0676},{"modelName":"Claude Sonnet 4.5","score":85.8,"functional":81.5,"quality":78.6,"passed":true,"tokens":3482,"timeMs":35494,"cost":0.0359},{"modelName":"Claude Opus 4.5","score":93.3,"functional":88.6,"quality":80.6,"passed":true,"tokens":4155,"timeMs":43014,"cost":0.0585},{"modelName":"Claude Haiku 4.5","score":95.9,"functional":91.1,"quality":81,"passed":true,"tokens":4768,"timeMs":17393,"cost":0.0164},{"modelName":"DeepSeek v3.2","score":90.9,"functional":86.4,"quality":80.2,"passed":true,"tokens":2817,"timeMs":71112,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":89.3,"functional":84.8,"quality":79.1,"passed":true,"tokens":2939,"timeMs":23071,"cost":0.0245},{"modelName":"GLM 4.7 Flash","score":83,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.1,"functional":89.3,"quality":81,"passed":true,"tokens":2730,"timeMs":69055,"cost":0.0014},{"modelName":"Grok 4","score":94.6,"functional":89.8,"quality":81.4,"passed":true,"tokens":3188,"timeMs":78960,"cost":0.0323},{"modelName":"Grok 4.1 Fast","score":88.8,"functional":84.3,"quality":78.7,"passed":true,"tokens":2740,"timeMs":113740,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4177,"timeMs":77228,"cost":0.0217}]},{"taskId":"saas-core/advanced/task-1","category":"saas-core","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":88.5,"functional":84.1,"quality":79.5,"passed":true,"tokens":4520,"timeMs":101459,"cost":0.0054},{"modelName":"MiniMax M2.1","score":91.8,"functional":87.2,"quality":80.7,"passed":true,"tokens":14690,"timeMs":170421,"cost":0.0117},{"modelName":"GLM-4.7","score":80.9,"functional":76.9,"quality":78.1,"passed":true,"tokens":4077,"timeMs":47113,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":82.5,"functional":78.4,"quality":74.2,"passed":true,"tokens":2042,"timeMs":34881,"cost":0.0052},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4243,"timeMs":43145,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":85.7,"functional":81.4,"quality":78.5,"passed":true,"tokens":3957,"timeMs":32258,"cost":0.037},{"modelName":"Claude Opus 4.5","score":95.2,"functional":90.4,"quality":81.2,"passed":true,"tokens":3359,"timeMs":51258,"cost":0.0712},{"modelName":"Claude Haiku 4.5","score":95.6,"functional":90.9,"quality":81,"passed":true,"tokens":3674,"timeMs":16382,"cost":0.0158},{"modelName":"DeepSeek v3.2","score":93.2,"functional":88.6,"quality":80.9,"passed":true,"tokens":2556,"timeMs":94310,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":87.2,"functional":82.8,"quality":78.5,"passed":true,"tokens":2784,"timeMs":20806,"cost":0.0281},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.8,"functional":87.2,"quality":80.3,"passed":true,"tokens":3259,"timeMs":56405,"cost":0.0012},{"modelName":"Grok 4","score":95,"functional":90.2,"quality":81.5,"passed":true,"tokens":2228,"timeMs":79005,"cost":0.0357},{"modelName":"Grok 4.1 Fast","score":86.4,"functional":82,"quality":78,"passed":true,"tokens":3453,"timeMs":95927,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":6457,"timeMs":150188,"cost":0.038}]},{"taskId":"saas-core/advanced/task-2","category":"saas-core","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":90.9,"functional":86.4,"quality":80.2,"passed":true,"tokens":4119,"timeMs":74331,"cost":0.006},{"modelName":"MiniMax M2.1","score":89.4,"functional":84.9,"quality":80,"passed":true,"tokens":13892,"timeMs":166214,"cost":0.011},{"modelName":"GLM-4.7","score":81.4,"functional":77.3,"quality":78.2,"passed":true,"tokens":3029,"timeMs":45019,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":84.8,"functional":80.5,"quality":74.9,"passed":true,"tokens":1845,"timeMs":24708,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4243,"timeMs":43145,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":86.8,"functional":82.5,"quality":78.9,"passed":true,"tokens":3592,"timeMs":43886,"cost":0.0368},{"modelName":"Claude Opus 4.5","score":96.1,"functional":91.3,"quality":81.5,"passed":true,"tokens":3462,"timeMs":39937,"cost":0.0589},{"modelName":"Claude Haiku 4.5","score":94.2,"functional":89.5,"quality":80.5,"passed":true,"tokens":4297,"timeMs":22650,"cost":0.0182},{"modelName":"DeepSeek v3.2","score":94.8,"functional":90,"quality":81.4,"passed":true,"tokens":2583,"timeMs":66622,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":85.9,"functional":81.6,"quality":78.1,"passed":true,"tokens":2574,"timeMs":27198,"cost":0.0252},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.4,"functional":84.9,"quality":79.6,"passed":true,"tokens":2707,"timeMs":90313,"cost":0.0011},{"modelName":"Grok 4","score":94.2,"functional":89.5,"quality":81.3,"passed":true,"tokens":3177,"timeMs":95624,"cost":0.0298},{"modelName":"Grok 4.1 Fast","score":84.6,"functional":80.3,"quality":77.4,"passed":true,"tokens":3171,"timeMs":99366,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":6457,"timeMs":150188,"cost":0.038}]},{"taskId":"saas-core/advanced/task-3","category":"saas-core","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":93.2,"functional":88.6,"quality":80.9,"passed":true,"tokens":5144,"timeMs":68858,"cost":0.0053},{"modelName":"MiniMax M2.1","score":87,"functional":82.6,"quality":79.3,"passed":true,"tokens":16721,"timeMs":151491,"cost":0.0144},{"modelName":"GLM-4.7","score":82.9,"functional":78.8,"quality":78.7,"passed":true,"tokens":3296,"timeMs":61228,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":87.2,"functional":82.9,"quality":75.6,"passed":true,"tokens":1847,"timeMs":35295,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4243,"timeMs":43145,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":88.8,"functional":84.4,"quality":79.5,"passed":true,"tokens":2943,"timeMs":43478,"cost":0.0428},{"modelName":"Claude Opus 4.5","score":95.8,"functional":91,"quality":81.4,"passed":true,"tokens":3496,"timeMs":53051,"cost":0.0731},{"modelName":"Claude Haiku 4.5","score":92,"functional":87.4,"quality":79.9,"passed":true,"tokens":4380,"timeMs":25995,"cost":0.019},{"modelName":"DeepSeek v3.2","score":95.2,"functional":90.4,"quality":81.5,"passed":true,"tokens":3556,"timeMs":76950,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":85.9,"functional":81.6,"quality":78.1,"passed":true,"tokens":2968,"timeMs":23278,"cost":0.0254},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.2,"functional":82.9,"quality":78.9,"passed":true,"tokens":2557,"timeMs":70457,"cost":0.0011},{"modelName":"Grok 4","score":92.4,"functional":87.7,"quality":80.7,"passed":true,"tokens":2879,"timeMs":61493,"cost":0.0312},{"modelName":"Grok 4.1 Fast","score":83.8,"functional":79.6,"quality":77.2,"passed":true,"tokens":2922,"timeMs":77304,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":6457,"timeMs":150188,"cost":0.038}]},{"taskId":"saas-core/advanced/task-4","category":"saas-core","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":94.8,"functional":90,"quality":81.4,"passed":true,"tokens":5154,"timeMs":72610,"cost":0.0055},{"modelName":"MiniMax M2.1","score":85.2,"functional":80.9,"quality":78.7,"passed":true,"tokens":13411,"timeMs":131424,"cost":0.0126},{"modelName":"GLM-4.7","score":85.2,"functional":81,"quality":79.4,"passed":true,"tokens":4104,"timeMs":66413,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":89.3,"functional":84.8,"quality":76.3,"passed":true,"tokens":1832,"timeMs":35641,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4243,"timeMs":43145,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":91.3,"functional":86.7,"quality":80.2,"passed":true,"tokens":3586,"timeMs":47071,"cost":0.0416},{"modelName":"Claude Opus 4.5","score":94.4,"functional":89.7,"quality":81,"passed":true,"tokens":3073,"timeMs":48212,"cost":0.0602},{"modelName":"Claude Haiku 4.5","score":89.5,"functional":85.1,"quality":79.1,"passed":true,"tokens":3704,"timeMs":16274,"cost":0.0152},{"modelName":"DeepSeek v3.2","score":94.4,"functional":89.7,"quality":81.3,"passed":true,"tokens":3010,"timeMs":106594,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":87,"functional":82.6,"quality":78.4,"passed":true,"tokens":3182,"timeMs":23113,"cost":0.0323},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86,"functional":81.7,"quality":78.6,"passed":true,"tokens":3245,"timeMs":70446,"cost":0.0011},{"modelName":"Grok 4","score":90,"functional":85.5,"quality":80,"passed":true,"tokens":3186,"timeMs":56978,"cost":0.0333},{"modelName":"Grok 4.1 Fast","score":84.3,"functional":80.1,"quality":77.3,"passed":true,"tokens":2948,"timeMs":66708,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":6457,"timeMs":150188,"cost":0.038}]},{"taskId":"saas-core/advanced/task-5","category":"saas-core","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":95.2,"functional":90.4,"quality":81.5,"passed":true,"tokens":5098,"timeMs":123818,"cost":0.005},{"modelName":"MiniMax M2.1","score":84.4,"functional":80.2,"quality":78.5,"passed":true,"tokens":17209,"timeMs":203218,"cost":0.0135},{"modelName":"GLM-4.7","score":87.7,"functional":83.3,"quality":80.1,"passed":true,"tokens":3618,"timeMs":66170,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":90.3,"functional":85.8,"quality":76.6,"passed":true,"tokens":2491,"timeMs":27847,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4243,"timeMs":43145,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":93.6,"functional":88.9,"quality":80.9,"passed":true,"tokens":3203,"timeMs":37408,"cost":0.0452},{"modelName":"Claude Opus 4.5","score":92.2,"functional":87.6,"quality":80.3,"passed":true,"tokens":3561,"timeMs":53527,"cost":0.0642},{"modelName":"Claude Haiku 4.5","score":87.4,"functional":83,"quality":78.5,"passed":true,"tokens":4980,"timeMs":20160,"cost":0.0197},{"modelName":"DeepSeek v3.2","score":92.5,"functional":87.9,"quality":80.7,"passed":true,"tokens":2786,"timeMs":66929,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":89,"functional":84.6,"quality":79,"passed":true,"tokens":2810,"timeMs":27040,"cost":0.0298},{"modelName":"GLM 4.7 Flash","score":86.5,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.9,"functional":81.6,"quality":78.5,"passed":true,"tokens":2772,"timeMs":84938,"cost":0.0013},{"modelName":"Grok 4","score":87.6,"functional":83.2,"quality":79.3,"passed":true,"tokens":3089,"timeMs":64388,"cost":0.0277},{"modelName":"Grok 4.1 Fast","score":85.8,"functional":81.6,"quality":77.8,"passed":true,"tokens":3283,"timeMs":88712,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":6457,"timeMs":150188,"cost":0.038}]},{"taskId":"glue-code/data-pipeline/task-1","category":"glue-code","subcategory":"data-pipeline","results":[{"modelName":"GLM 4-Plus","score":91.4,"functional":86.8,"quality":81.3,"passed":true,"tokens":4629,"timeMs":105335,"cost":0.0042},{"modelName":"MiniMax M2.1","score":81.9,"functional":77.8,"quality":78.6,"passed":true,"tokens":13438,"timeMs":160700,"cost":0.0149},{"modelName":"GLM-4.7","score":86.7,"functional":82.4,"quality":80.7,"passed":true,"tokens":3587,"timeMs":67157,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":87.2,"functional":82.9,"quality":76.5,"passed":true,"tokens":1864,"timeMs":23951,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":80.5,"functional":76.5,"quality":79.1,"passed":true,"tokens":3334,"timeMs":28544,"cost":0.0333},{"modelName":"Claude Sonnet 4.5","score":92.1,"functional":87.5,"quality":81.4,"passed":true,"tokens":3135,"timeMs":30581,"cost":0.045},{"modelName":"Claude Opus 4.5","score":86.7,"functional":82.4,"quality":79.6,"passed":true,"tokens":3356,"timeMs":46215,"cost":0.0733},{"modelName":"Claude Haiku 4.5","score":83.2,"functional":79,"quality":78.1,"passed":true,"tokens":3698,"timeMs":26526,"cost":0.0178},{"modelName":"DeepSeek v3.2","score":87.1,"functional":82.8,"quality":80,"passed":true,"tokens":3470,"timeMs":97933,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":88.5,"functional":84.1,"quality":79.8,"passed":true,"tokens":2842,"timeMs":26972,"cost":0.0261},{"modelName":"GLM 4.7 Flash","score":53,"functional":50.3,"quality":79,"passed":false,"tokens":1227,"timeMs":5731,"cost":0.0003},{"modelName":"Grok 4 Fast","score":84,"functional":79.8,"quality":78.9,"passed":true,"tokens":2693,"timeMs":63342,"cost":0.0011},{"modelName":"Grok 4","score":82.8,"functional":78.6,"quality":78.7,"passed":true,"tokens":2377,"timeMs":61601,"cost":0.0329},{"modelName":"Grok 4.1 Fast","score":85.1,"functional":80.9,"quality":78.5,"passed":true,"tokens":3473,"timeMs":109081,"cost":0.0015}]},{"taskId":"glue-code/data-pipeline/task-2","category":"glue-code","subcategory":"data-pipeline","results":[{"modelName":"GLM 4-Plus","score":89.6,"functional":85.1,"quality":80.7,"passed":true,"tokens":4692,"timeMs":90638,"cost":0.0052},{"modelName":"MiniMax M2.1","score":83.5,"functional":79.3,"quality":79.1,"passed":true,"tokens":16027,"timeMs":196478,"cost":0.0129},{"modelName":"GLM-4.7","score":87.8,"functional":83.4,"quality":81,"passed":true,"tokens":4141,"timeMs":64025,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":85.9,"functional":81.6,"quality":76.2,"passed":true,"tokens":2080,"timeMs":34220,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":78.1,"functional":74.1,"quality":78.3,"passed":true,"tokens":2958,"timeMs":38106,"cost":0.0356},{"modelName":"Claude Sonnet 4.5","score":92.5,"functional":87.9,"quality":81.5,"passed":true,"tokens":3585,"timeMs":50307,"cost":0.0377},{"modelName":"Claude Opus 4.5","score":84.6,"functional":80.4,"quality":78.9,"passed":true,"tokens":4198,"timeMs":43682,"cost":0.0602},{"modelName":"Claude Haiku 4.5","score":83.1,"functional":78.9,"quality":78.1,"passed":true,"tokens":4518,"timeMs":23463,"cost":0.0171},{"modelName":"DeepSeek v3.2","score":84.8,"functional":80.5,"quality":79.3,"passed":true,"tokens":2454,"timeMs":114702,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":90.8,"functional":86.2,"quality":80.5,"passed":true,"tokens":2571,"timeMs":21181,"cost":0.0329},{"modelName":"GLM 4.7 Flash","score":51.6,"functional":49,"quality":78.6,"passed":false,"tokens":1479,"timeMs":8280,"cost":0.0004},{"modelName":"Grok 4 Fast","score":86.1,"functional":81.8,"quality":79.5,"passed":true,"tokens":2761,"timeMs":78872,"cost":0.0011},{"modelName":"Grok 4","score":82,"functional":77.9,"quality":78.5,"passed":true,"tokens":3117,"timeMs":78610,"cost":0.0277},{"modelName":"Grok 4.1 Fast","score":87.6,"functional":83.2,"quality":79.2,"passed":true,"tokens":3376,"timeMs":69316,"cost":0.0016}]},{"taskId":"glue-code/data-pipeline/task-3","category":"glue-code","subcategory":"data-pipeline","results":[{"modelName":"GLM 4-Plus","score":87.2,"functional":82.8,"quality":80,"passed":true,"tokens":4485,"timeMs":70170,"cost":0.0046},{"modelName":"MiniMax M2.1","score":85.8,"functional":81.5,"quality":79.8,"passed":true,"tokens":14662,"timeMs":145482,"cost":0.0133},{"modelName":"GLM-4.7","score":87.7,"functional":83.3,"quality":81,"passed":true,"tokens":3276,"timeMs":49452,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":83.8,"functional":79.6,"quality":75.5,"passed":true,"tokens":2134,"timeMs":23994,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":75.9,"functional":72.1,"quality":77.7,"passed":true,"tokens":3177,"timeMs":27013,"cost":0.0329},{"modelName":"Claude Sonnet 4.5","score":91.7,"functional":87.2,"quality":81.3,"passed":true,"tokens":3456,"timeMs":38627,"cost":0.043},{"modelName":"Claude Opus 4.5","score":83.3,"functional":79.2,"quality":78.6,"passed":true,"tokens":3542,"timeMs":53930,"cost":0.07},{"modelName":"Claude Haiku 4.5","score":84.2,"functional":80,"quality":78.4,"passed":true,"tokens":4004,"timeMs":16609,"cost":0.0201},{"modelName":"DeepSeek v3.2","score":83,"functional":78.8,"quality":78.7,"passed":true,"tokens":3575,"timeMs":79417,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":92.3,"functional":87.7,"quality":80.9,"passed":true,"tokens":2630,"timeMs":25946,"cost":0.0308},{"modelName":"GLM 4.7 Flash","score":51.3,"functional":48.8,"quality":78.5,"passed":false,"tokens":1384,"timeMs":8669,"cost":0.0004},{"modelName":"Grok 4 Fast","score":88.5,"functional":84.1,"quality":80.2,"passed":true,"tokens":3465,"timeMs":78571,"cost":0.0011},{"modelName":"Grok 4","score":82.5,"functional":78.3,"quality":78.6,"passed":true,"tokens":2609,"timeMs":82641,"cost":0.0283},{"modelName":"Grok 4.1 Fast","score":89.6,"functional":85.1,"quality":79.8,"passed":true,"tokens":3172,"timeMs":66512,"cost":0.0013}]},{"taskId":"glue-code/data-pipeline/task-4","category":"glue-code","subcategory":"data-pipeline","results":[{"modelName":"GLM 4-Plus","score":84.8,"functional":80.5,"quality":79.3,"passed":true,"tokens":3911,"timeMs":84630,"cost":0.0045},{"modelName":"MiniMax M2.1","score":88.2,"functional":83.8,"quality":80.5,"passed":true,"tokens":14474,"timeMs":183864,"cost":0.0139},{"modelName":"GLM-4.7","score":86.4,"functional":82.1,"quality":80.6,"passed":true,"tokens":3626,"timeMs":51958,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":81.3,"functional":77.3,"quality":74.8,"passed":true,"tokens":1795,"timeMs":31095,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":74.5,"functional":70.7,"quality":77.3,"passed":true,"tokens":3459,"timeMs":36150,"cost":0.0263},{"modelName":"Claude Sonnet 4.5","score":89.9,"functional":85.4,"quality":80.7,"passed":true,"tokens":3730,"timeMs":46649,"cost":0.0373},{"modelName":"Claude Opus 4.5","score":83.3,"functional":79.1,"quality":78.5,"passed":true,"tokens":3451,"timeMs":46413,"cost":0.0785},{"modelName":"Claude Haiku 4.5","score":86.3,"functional":81.9,"quality":79,"passed":true,"tokens":4576,"timeMs":25611,"cost":0.0198},{"modelName":"DeepSeek v3.2","score":82.2,"functional":78.1,"quality":78.5,"passed":true,"tokens":3055,"timeMs":71887,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":92.7,"functional":88.1,"quality":81.1,"passed":true,"tokens":3081,"timeMs":22063,"cost":0.0266},{"modelName":"GLM 4.7 Flash","score":52.3,"functional":49.7,"quality":78.8,"passed":false,"tokens":1621,"timeMs":6359,"cost":0.0004},{"modelName":"Grok 4 Fast","score":90.8,"functional":86.3,"quality":80.9,"passed":true,"tokens":2399,"timeMs":87990,"cost":0.0011},{"modelName":"Grok 4","score":84,"functional":79.8,"quality":79.1,"passed":true,"tokens":2185,"timeMs":88758,"cost":0.0333},{"modelName":"Grok 4.1 Fast","score":90.7,"functional":86.2,"quality":80.2,"passed":true,"tokens":2621,"timeMs":65522,"cost":0.0016}]},{"taskId":"glue-code/data-pipeline/task-5","category":"glue-code","subcategory":"data-pipeline","results":[{"modelName":"GLM 4-Plus","score":83,"functional":78.8,"quality":78.7,"passed":true,"tokens":3719,"timeMs":122601,"cost":0.0046},{"modelName":"MiniMax M2.1","score":90.2,"functional":85.7,"quality":81.1,"passed":true,"tokens":17824,"timeMs":125475,"cost":0.0132},{"modelName":"GLM-4.7","score":84.3,"functional":80,"quality":80,"passed":true,"tokens":3805,"timeMs":70667,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":79.1,"functional":75.2,"quality":74.1,"passed":true,"tokens":2165,"timeMs":20779,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":74.2,"functional":70.5,"quality":77.2,"passed":true,"tokens":2939,"timeMs":33901,"cost":0.0266},{"modelName":"Claude Sonnet 4.5","score":87.5,"functional":83.1,"quality":80,"passed":true,"tokens":2891,"timeMs":33843,"cost":0.0359},{"modelName":"Claude Opus 4.5","score":84.4,"functional":80.2,"quality":78.9,"passed":true,"tokens":3227,"timeMs":39069,"cost":0.0739},{"modelName":"Claude Haiku 4.5","score":88.7,"functional":84.3,"quality":79.8,"passed":true,"tokens":4612,"timeMs":26480,"cost":0.0188},{"modelName":"DeepSeek v3.2","score":82.7,"functional":78.5,"quality":78.6,"passed":true,"tokens":2879,"timeMs":86806,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":91.9,"functional":87.3,"quality":80.8,"passed":true,"tokens":3152,"timeMs":25435,"cost":0.0281},{"modelName":"GLM 4.7 Flash","score":54.2,"functional":51.5,"quality":79.4,"passed":false,"tokens":1536,"timeMs":9537,"cost":0.0004},{"modelName":"Grok 4 Fast","score":92.4,"functional":87.7,"quality":81.4,"passed":true,"tokens":2779,"timeMs":86421,"cost":0.0014},{"modelName":"Grok 4","score":86.3,"functional":82,"quality":79.8,"passed":true,"tokens":2267,"timeMs":69223,"cost":0.0293},{"modelName":"Grok 4.1 Fast","score":90.6,"functional":86.1,"quality":80.1,"passed":true,"tokens":3225,"timeMs":98471,"cost":0.0013}]},{"taskId":"glue-code/file-processing/task-1","category":"glue-code","subcategory":"file-processing","results":[{"modelName":"GLM 4-Plus","score":82.2,"functional":78.1,"quality":78.5,"passed":true,"tokens":3557,"timeMs":98855,"cost":0.0046},{"modelName":"MiniMax M2.1","score":91.3,"functional":86.8,"quality":81.5,"passed":true,"tokens":15113,"timeMs":119725,"cost":0.0119},{"modelName":"GLM-4.7","score":81.8,"functional":77.7,"quality":79.2,"passed":true,"tokens":3381,"timeMs":72293,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":77.7,"functional":73.8,"quality":73.7,"passed":true,"tokens":1994,"timeMs":35109,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":40663,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":85.1,"functional":80.9,"quality":79.3,"passed":true,"tokens":2795,"timeMs":40660,"cost":0.035},{"modelName":"Claude Opus 4.5","score":86.4,"functional":82.1,"quality":79.5,"passed":true,"tokens":3765,"timeMs":53961,"cost":0.0563},{"modelName":"Claude Haiku 4.5","score":91,"functional":86.4,"quality":80.5,"passed":true,"tokens":5219,"timeMs":18962,"cost":0.0174},{"modelName":"DeepSeek v3.2","score":84.2,"functional":80,"quality":79.1,"passed":true,"tokens":2827,"timeMs":104142,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":90.1,"functional":85.6,"quality":80.3,"passed":true,"tokens":3184,"timeMs":24779,"cost":0.0303},{"modelName":"GLM 4.7 Flash","score":83.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.8,"functional":88.1,"quality":81.5,"passed":true,"tokens":2918,"timeMs":52249,"cost":0.0012},{"modelName":"Grok 4","score":88.8,"functional":84.4,"quality":80.5,"passed":true,"tokens":2495,"timeMs":96587,"cost":0.0275},{"modelName":"Grok 4.1 Fast","score":89.3,"functional":84.8,"quality":79.7,"passed":true,"tokens":3209,"timeMs":85414,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5027,"timeMs":96560,"cost":0.0283}]},{"taskId":"glue-code/file-processing/task-2","category":"glue-code","subcategory":"file-processing","results":[{"modelName":"GLM 4-Plus","score":82.7,"functional":78.5,"quality":78.6,"passed":true,"tokens":4370,"timeMs":95743,"cost":0.0057},{"modelName":"MiniMax M2.1","score":91.2,"functional":86.6,"quality":81.4,"passed":true,"tokens":12697,"timeMs":166361,"cost":0.0129},{"modelName":"GLM-4.7","score":79.6,"functional":75.6,"quality":78.6,"passed":true,"tokens":3528,"timeMs":65518,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":77.5,"functional":73.6,"quality":73.6,"passed":true,"tokens":2536,"timeMs":32795,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":40663,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":83.3,"functional":79.2,"quality":78.7,"passed":true,"tokens":2854,"timeMs":37810,"cost":0.0379},{"modelName":"Claude Opus 4.5","score":88.9,"functional":84.5,"quality":80.2,"passed":true,"tokens":3855,"timeMs":38180,"cost":0.0766},{"modelName":"Claude Haiku 4.5","score":92.5,"functional":87.9,"quality":80.9,"passed":true,"tokens":3732,"timeMs":20453,"cost":0.0172},{"modelName":"DeepSeek v3.2","score":86.5,"functional":82.2,"quality":79.8,"passed":true,"tokens":2553,"timeMs":99758,"cost":0.0022},{"modelName":"OpenAI GPT-5.2","score":87.7,"functional":83.3,"quality":79.5,"passed":true,"tokens":2854,"timeMs":34375,"cost":0.0304},{"modelName":"GLM 4.7 Flash","score":83.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92,"functional":87.4,"quality":81.3,"passed":true,"tokens":2821,"timeMs":82284,"cost":0.0011},{"modelName":"Grok 4","score":90.8,"functional":86.3,"quality":81.1,"passed":true,"tokens":3010,"timeMs":93414,"cost":0.0351},{"modelName":"Grok 4.1 Fast","score":87.2,"functional":82.8,"quality":79.1,"passed":true,"tokens":3228,"timeMs":89938,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5027,"timeMs":96560,"cost":0.0283}]},{"taskId":"glue-code/file-processing/task-3","category":"glue-code","subcategory":"file-processing","results":[{"modelName":"GLM 4-Plus","score":84.2,"functional":80,"quality":79.1,"passed":true,"tokens":4150,"timeMs":81372,"cost":0.0059},{"modelName":"MiniMax M2.1","score":89.9,"functional":85.4,"quality":81,"passed":true,"tokens":15869,"timeMs":163544,"cost":0.0156},{"modelName":"GLM-4.7","score":78.2,"functional":74.3,"quality":78.1,"passed":true,"tokens":4133,"timeMs":57804,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":78.5,"functional":74.6,"quality":73.9,"passed":true,"tokens":2185,"timeMs":26162,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":40663,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":82.6,"functional":78.4,"quality":78.5,"passed":true,"tokens":3149,"timeMs":34649,"cost":0.0326},{"modelName":"Claude Opus 4.5","score":91.2,"functional":86.6,"quality":80.9,"passed":true,"tokens":2928,"timeMs":40112,"cost":0.0714},{"modelName":"Claude Haiku 4.5","score":93,"functional":88.3,"quality":81.1,"passed":true,"tokens":4147,"timeMs":23772,"cost":0.02},{"modelName":"DeepSeek v3.2","score":89,"functional":84.5,"quality":80.5,"passed":true,"tokens":3173,"timeMs":103362,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":85.3,"functional":81,"quality":78.8,"passed":true,"tokens":3094,"timeMs":22730,"cost":0.0303},{"modelName":"GLM 4.7 Flash","score":83.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.2,"functional":85.7,"quality":80.7,"passed":true,"tokens":3418,"timeMs":84383,"cost":0.001},{"modelName":"Grok 4","score":91.9,"functional":87.3,"quality":81.5,"passed":true,"tokens":2854,"timeMs":85107,"cost":0.0321},{"modelName":"Grok 4.1 Fast","score":84.7,"functional":80.4,"quality":78.4,"passed":true,"tokens":2619,"timeMs":102561,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5027,"timeMs":96560,"cost":0.0283}]},{"taskId":"glue-code/file-processing/task-4","category":"glue-code","subcategory":"file-processing","results":[{"modelName":"GLM 4-Plus","score":86.5,"functional":82.2,"quality":79.8,"passed":true,"tokens":4105,"timeMs":73087,"cost":0.0055},{"modelName":"MiniMax M2.1","score":87.8,"functional":83.4,"quality":80.4,"passed":true,"tokens":16582,"timeMs":197453,"cost":0.0141},{"modelName":"GLM-4.7","score":78,"functional":74.1,"quality":78.1,"passed":true,"tokens":3261,"timeMs":53696,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":80.4,"functional":76.4,"quality":74.5,"passed":true,"tokens":2205,"timeMs":30412,"cost":0.0048},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":40663,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":83,"functional":78.9,"quality":78.6,"passed":true,"tokens":3953,"timeMs":43843,"cost":0.0386},{"modelName":"Claude Opus 4.5","score":92.7,"functional":88.1,"quality":81.4,"passed":true,"tokens":3614,"timeMs":53062,"cost":0.0609},{"modelName":"Claude Haiku 4.5","score":92.2,"functional":87.5,"quality":80.8,"passed":true,"tokens":4033,"timeMs":20033,"cost":0.0197},{"modelName":"DeepSeek v3.2","score":91,"functional":86.5,"quality":81.1,"passed":true,"tokens":3588,"timeMs":63335,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":83.5,"functional":79.3,"quality":78.3,"passed":true,"tokens":2317,"timeMs":32357,"cost":0.0296},{"modelName":"GLM 4.7 Flash","score":83.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.8,"functional":83.4,"quality":80,"passed":true,"tokens":2395,"timeMs":80170,"cost":0.0012},{"modelName":"Grok 4","score":91.8,"functional":87.2,"quality":81.4,"passed":true,"tokens":3123,"timeMs":84466,"cost":0.0281},{"modelName":"Grok 4.1 Fast","score":82.5,"functional":78.4,"quality":77.7,"passed":true,"tokens":2977,"timeMs":79197,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5027,"timeMs":96560,"cost":0.0283}]},{"taskId":"glue-code/file-processing/task-5","category":"glue-code","subcategory":"file-processing","results":[{"modelName":"GLM 4-Plus","score":89,"functional":84.5,"quality":80.5,"passed":true,"tokens":4913,"timeMs":90609,"cost":0.0051},{"modelName":"MiniMax M2.1","score":85.3,"functional":81,"quality":79.7,"passed":true,"tokens":13730,"timeMs":160424,"cost":0.0152},{"modelName":"GLM-4.7","score":78.9,"functional":75,"quality":78.4,"passed":true,"tokens":3535,"timeMs":40155,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":82.9,"functional":78.7,"quality":75.2,"passed":true,"tokens":2199,"timeMs":35769,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":40663,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":84.6,"functional":80.4,"quality":79.1,"passed":true,"tokens":3865,"timeMs":44277,"cost":0.0333},{"modelName":"Claude Opus 4.5","score":93.1,"functional":88.5,"quality":81.5,"passed":true,"tokens":3954,"timeMs":48899,"cost":0.0648},{"modelName":"Claude Haiku 4.5","score":90.3,"functional":85.8,"quality":80.3,"passed":true,"tokens":4661,"timeMs":18582,"cost":0.015},{"modelName":"DeepSeek v3.2","score":92.1,"functional":87.5,"quality":81.5,"passed":true,"tokens":3248,"timeMs":104099,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":82.8,"functional":78.6,"quality":78.1,"passed":true,"tokens":2373,"timeMs":23543,"cost":0.0283},{"modelName":"GLM 4.7 Flash","score":83.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.4,"functional":81.1,"quality":79.3,"passed":true,"tokens":2524,"timeMs":87575,"cost":0.0013},{"modelName":"Grok 4","score":90.5,"functional":86,"quality":81,"passed":true,"tokens":2393,"timeMs":63676,"cost":0.028},{"modelName":"Grok 4.1 Fast","score":81.1,"functional":77,"quality":77.3,"passed":true,"tokens":2869,"timeMs":71481,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5027,"timeMs":96560,"cost":0.0283}]},{"taskId":"glue-code/message-queue/task-1","category":"glue-code","subcategory":"message-queue","results":[{"modelName":"GLM 4-Plus","score":91,"functional":86.5,"quality":81.1,"passed":true,"tokens":3593,"timeMs":91794,"cost":0.0051},{"modelName":"MiniMax M2.1","score":83.1,"functional":78.9,"quality":79,"passed":true,"tokens":14826,"timeMs":125430,"cost":0.014},{"modelName":"GLM-4.7","score":80.9,"functional":76.8,"quality":79,"passed":true,"tokens":3818,"timeMs":49173,"cost":0.0036},{"modelName":"Gemini 3 Flash","score":85.2,"functional":80.9,"quality":75.9,"passed":true,"tokens":2438,"timeMs":26834,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":84.2,"functional":80,"quality":80.2,"passed":true,"tokens":3756,"timeMs":28091,"cost":0.0288},{"modelName":"Claude Sonnet 4.5","score":86.9,"functional":82.6,"quality":79.8,"passed":true,"tokens":2836,"timeMs":36809,"cost":0.0359},{"modelName":"Claude Opus 4.5","score":92.3,"functional":87.7,"quality":81.3,"passed":true,"tokens":3063,"timeMs":42305,"cost":0.0625},{"modelName":"Claude Haiku 4.5","score":87.9,"functional":83.5,"quality":79.5,"passed":true,"tokens":3803,"timeMs":25193,"cost":0.0176},{"modelName":"DeepSeek v3.2","score":92,"functional":87.4,"quality":81.4,"passed":true,"tokens":3035,"timeMs":101935,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":83.2,"functional":79.1,"quality":78.2,"passed":true,"tokens":3226,"timeMs":36086,"cost":0.026},{"modelName":"GLM 4.7 Flash","score":58.9,"functional":56,"quality":80.8,"passed":false,"tokens":1384,"timeMs":5800,"cost":0.0004},{"modelName":"Grok 4 Fast","score":83.6,"functional":79.4,"quality":78.7,"passed":true,"tokens":2335,"timeMs":85819,"cost":0.0009},{"modelName":"Grok 4","score":88.4,"functional":83.9,"quality":80.4,"passed":true,"tokens":2967,"timeMs":90033,"cost":0.0334},{"modelName":"Grok 4.1 Fast","score":80.9,"functional":76.8,"quality":77.2,"passed":true,"tokens":2706,"timeMs":96781,"cost":0.0011}]},{"taskId":"glue-code/message-queue/task-2","category":"glue-code","subcategory":"message-queue","results":[{"modelName":"GLM 4-Plus","score":92.1,"functional":87.5,"quality":81.5,"passed":true,"tokens":4093,"timeMs":75939,"cost":0.0054},{"modelName":"MiniMax M2.1","score":81.7,"functional":77.6,"quality":78.6,"passed":true,"tokens":12661,"timeMs":143945,"cost":0.0159},{"modelName":"GLM-4.7","score":83.3,"functional":79.2,"quality":79.7,"passed":true,"tokens":3739,"timeMs":57404,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":86.9,"functional":82.5,"quality":76.4,"passed":true,"tokens":2528,"timeMs":21889,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":83.5,"functional":79.3,"quality":80,"passed":true,"tokens":3003,"timeMs":26296,"cost":0.0364},{"modelName":"Claude Sonnet 4.5","score":89.4,"functional":84.9,"quality":80.5,"passed":true,"tokens":3225,"timeMs":43930,"cost":0.0464},{"modelName":"Claude Opus 4.5","score":90.5,"functional":86,"quality":80.7,"passed":true,"tokens":3866,"timeMs":34612,"cost":0.0801},{"modelName":"Claude Haiku 4.5","score":85.5,"functional":81.3,"quality":78.8,"passed":true,"tokens":3583,"timeMs":23588,"cost":0.0166},{"modelName":"DeepSeek v3.2","score":90.7,"functional":86.2,"quality":81,"passed":true,"tokens":3460,"timeMs":87002,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":84.8,"functional":80.6,"quality":78.7,"passed":true,"tokens":2340,"timeMs":30296,"cost":0.0235},{"modelName":"GLM 4.7 Flash","score":56.6,"functional":53.7,"quality":80.1,"passed":false,"tokens":1205,"timeMs":10386,"cost":0.0004},{"modelName":"Grok 4 Fast","score":82.8,"functional":78.7,"quality":78.5,"passed":true,"tokens":2834,"timeMs":50505,"cost":0.001},{"modelName":"Grok 4","score":85.9,"functional":81.6,"quality":79.7,"passed":true,"tokens":2699,"timeMs":61037,"cost":0.0265},{"modelName":"Grok 4.1 Fast","score":81.8,"functional":77.7,"quality":77.5,"passed":true,"tokens":2782,"timeMs":108887,"cost":0.0012}]},{"taskId":"glue-code/message-queue/task-3","category":"glue-code","subcategory":"message-queue","results":[{"modelName":"GLM 4-Plus","score":92,"functional":87.4,"quality":81.4,"passed":true,"tokens":4593,"timeMs":67750,"cost":0.0045},{"modelName":"MiniMax M2.1","score":81.5,"functional":77.4,"quality":78.5,"passed":true,"tokens":15229,"timeMs":119384,"cost":0.0128},{"modelName":"GLM-4.7","score":85.7,"functional":81.4,"quality":80.4,"passed":true,"tokens":3740,"timeMs":55921,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":87.4,"functional":83.1,"quality":76.6,"passed":true,"tokens":1739,"timeMs":24047,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":81.8,"functional":77.7,"quality":79.5,"passed":true,"tokens":2912,"timeMs":33024,"cost":0.0348},{"modelName":"Claude Sonnet 4.5","score":91.4,"functional":86.8,"quality":81.1,"passed":true,"tokens":3010,"timeMs":49984,"cost":0.0442},{"modelName":"Claude Opus 4.5","score":88.1,"functional":83.7,"quality":80,"passed":true,"tokens":4257,"timeMs":40010,"cost":0.0614},{"modelName":"Claude Haiku 4.5","score":83.7,"functional":79.6,"quality":78.3,"passed":true,"tokens":4882,"timeMs":19776,"cost":0.0171},{"modelName":"DeepSeek v3.2","score":88.5,"functional":84.1,"quality":80.4,"passed":true,"tokens":2591,"timeMs":109210,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":87.1,"functional":82.7,"quality":79.4,"passed":true,"tokens":2931,"timeMs":28641,"cost":0.0331},{"modelName":"GLM 4.7 Flash","score":54.1,"functional":51.4,"quality":79.4,"passed":false,"tokens":1196,"timeMs":8116,"cost":0.0004},{"modelName":"Grok 4 Fast","score":83.3,"functional":79.1,"quality":78.6,"passed":true,"tokens":3133,"timeMs":75727,"cost":0.001},{"modelName":"Grok 4","score":83.7,"functional":79.5,"quality":79,"passed":true,"tokens":2425,"timeMs":89648,"cost":0.0281},{"modelName":"Grok 4.1 Fast","score":83.8,"functional":79.6,"quality":78.1,"passed":true,"tokens":3188,"timeMs":83514,"cost":0.0012}]},{"taskId":"glue-code/message-queue/task-4","category":"glue-code","subcategory":"message-queue","results":[{"modelName":"GLM 4-Plus","score":90.7,"functional":86.2,"quality":81,"passed":true,"tokens":3744,"timeMs":78228,"cost":0.0047},{"modelName":"MiniMax M2.1","score":82.5,"functional":78.3,"quality":78.8,"passed":true,"tokens":16545,"timeMs":117898,"cost":0.0155},{"modelName":"GLM-4.7","score":87.3,"functional":83,"quality":80.9,"passed":true,"tokens":2795,"timeMs":49436,"cost":0.004},{"modelName":"Gemini 3 Flash","score":86.8,"functional":82.5,"quality":76.4,"passed":true,"tokens":1970,"timeMs":32711,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":79.5,"functional":75.5,"quality":78.8,"passed":true,"tokens":3090,"timeMs":34417,"cost":0.0251},{"modelName":"Claude Sonnet 4.5","score":92.5,"functional":87.8,"quality":81.5,"passed":true,"tokens":2735,"timeMs":29429,"cost":0.0369},{"modelName":"Claude Opus 4.5","score":85.7,"functional":81.4,"quality":79.3,"passed":true,"tokens":3401,"timeMs":48612,"cost":0.0736},{"modelName":"Claude Haiku 4.5","score":83,"functional":78.8,"quality":78.1,"passed":true,"tokens":3690,"timeMs":18023,"cost":0.0196},{"modelName":"DeepSeek v3.2","score":86.1,"functional":81.8,"quality":79.7,"passed":true,"tokens":3284,"timeMs":80258,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":89.5,"functional":85.1,"quality":80.1,"passed":true,"tokens":2338,"timeMs":31178,"cost":0.0227},{"modelName":"GLM 4.7 Flash","score":52.2,"functional":49.6,"quality":78.8,"passed":false,"tokens":1438,"timeMs":5849,"cost":0.0004},{"modelName":"Grok 4 Fast","score":84.8,"functional":80.6,"quality":79.1,"passed":true,"tokens":3369,"timeMs":87955,"cost":0.0014},{"modelName":"Grok 4","score":82.3,"functional":78.2,"quality":78.6,"passed":true,"tokens":3179,"timeMs":63269,"cost":0.0259},{"modelName":"Grok 4.1 Fast","score":86.2,"functional":81.9,"quality":78.8,"passed":true,"tokens":3509,"timeMs":112429,"cost":0.0011}]},{"taskId":"glue-code/message-queue/task-5","category":"glue-code","subcategory":"message-queue","results":[{"modelName":"GLM 4-Plus","score":88.6,"functional":84.1,"quality":80.4,"passed":true,"tokens":4067,"timeMs":118308,"cost":0.005},{"modelName":"MiniMax M2.1","score":84.4,"functional":80.2,"quality":79.4,"passed":true,"tokens":14820,"timeMs":176836,"cost":0.012},{"modelName":"GLM-4.7","score":87.9,"functional":83.5,"quality":81.1,"passed":true,"tokens":4055,"timeMs":64741,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":85.1,"functional":80.8,"quality":75.9,"passed":true,"tokens":1789,"timeMs":32717,"cost":0.0048},{"modelName":"Gemini 3 Pro Preview","score":77,"functional":73.2,"quality":78,"passed":true,"tokens":3643,"timeMs":37563,"cost":0.0246},{"modelName":"Claude Sonnet 4.5","score":92.3,"functional":87.7,"quality":81.4,"passed":true,"tokens":2840,"timeMs":41248,"cost":0.0402},{"modelName":"Claude Opus 4.5","score":83.9,"functional":79.7,"quality":78.7,"passed":true,"tokens":2921,"timeMs":39138,"cost":0.0771},{"modelName":"Claude Haiku 4.5","score":83.4,"functional":79.3,"quality":78.2,"passed":true,"tokens":3871,"timeMs":20062,"cost":0.0168},{"modelName":"DeepSeek v3.2","score":83.9,"functional":79.7,"quality":79,"passed":true,"tokens":2614,"timeMs":70983,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":91.6,"functional":87,"quality":80.7,"passed":true,"tokens":2543,"timeMs":30690,"cost":0.03},{"modelName":"GLM 4.7 Flash","score":51.3,"functional":48.7,"quality":78.5,"passed":false,"tokens":1340,"timeMs":5841,"cost":0.0003},{"modelName":"Grok 4 Fast","score":87.1,"functional":82.8,"quality":79.8,"passed":true,"tokens":2402,"timeMs":53335,"cost":0.0013},{"modelName":"Grok 4","score":82.1,"functional":78,"quality":78.5,"passed":true,"tokens":2572,"timeMs":54703,"cost":0.0244},{"modelName":"Grok 4.1 Fast","score":88.6,"functional":84.1,"quality":79.5,"passed":true,"tokens":3616,"timeMs":94622,"cost":0.0011}]},{"taskId":"glue-code/scheduler/task-1","category":"glue-code","subcategory":"scheduler","results":[{"modelName":"GLM 4-Plus","score":86.1,"functional":81.8,"quality":79.7,"passed":true,"tokens":3825,"timeMs":124385,"cost":0.0048},{"modelName":"MiniMax M2.1","score":86.8,"functional":82.5,"quality":80.1,"passed":true,"tokens":15009,"timeMs":194576,"cost":0.0157},{"modelName":"GLM-4.7","score":87.3,"functional":82.9,"quality":80.9,"passed":true,"tokens":3874,"timeMs":54738,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":82.7,"functional":78.6,"quality":75.2,"passed":true,"tokens":1819,"timeMs":33085,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":91.1,"functional":86.5,"quality":81,"passed":true,"tokens":2949,"timeMs":40794,"cost":0.0451},{"modelName":"Claude Opus 4.5","score":83.2,"functional":79,"quality":78.5,"passed":true,"tokens":3503,"timeMs":36040,"cost":0.0718},{"modelName":"Claude Haiku 4.5","score":85,"functional":80.8,"quality":78.7,"passed":true,"tokens":5228,"timeMs":20685,"cost":0.0138},{"modelName":"DeepSeek v3.2","score":82.5,"functional":78.4,"quality":78.6,"passed":true,"tokens":2581,"timeMs":72417,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":92.6,"functional":88,"quality":81,"passed":true,"tokens":3161,"timeMs":33720,"cost":0.0263},{"modelName":"GLM 4.7 Flash","score":54.1,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.6,"functional":85.1,"quality":80.5,"passed":true,"tokens":2949,"timeMs":81270,"cost":0.0013},{"modelName":"Grok 4","score":83,"functional":78.9,"quality":78.8,"passed":true,"tokens":2393,"timeMs":89210,"cost":0.0304},{"modelName":"Grok 4.1 Fast","score":90.2,"functional":85.7,"quality":80,"passed":true,"tokens":2601,"timeMs":113798,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3809,"timeMs":70200,"cost":0.0209}]},{"taskId":"glue-code/scheduler/task-2","category":"glue-code","subcategory":"scheduler","results":[{"modelName":"GLM 4-Plus","score":83.9,"functional":79.7,"quality":79,"passed":true,"tokens":4035,"timeMs":70340,"cost":0.0051},{"modelName":"MiniMax M2.1","score":89.2,"functional":84.7,"quality":80.8,"passed":true,"tokens":14147,"timeMs":192579,"cost":0.0139},{"modelName":"GLM-4.7","score":85.5,"functional":81.3,"quality":80.4,"passed":true,"tokens":4118,"timeMs":63740,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":80.3,"functional":76.3,"quality":74.5,"passed":true,"tokens":2027,"timeMs":30006,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":88.9,"functional":84.5,"quality":80.4,"passed":true,"tokens":2886,"timeMs":38439,"cost":0.0355},{"modelName":"Claude Opus 4.5","score":83.6,"functional":79.4,"quality":78.6,"passed":true,"tokens":3838,"timeMs":35329,"cost":0.0699},{"modelName":"Claude Haiku 4.5","score":87.3,"functional":82.9,"quality":79.4,"passed":true,"tokens":4410,"timeMs":26825,"cost":0.0147},{"modelName":"DeepSeek v3.2","score":82.2,"functional":78.1,"quality":78.5,"passed":true,"tokens":2587,"timeMs":71335,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":92.5,"functional":87.9,"quality":81,"passed":true,"tokens":2782,"timeMs":29762,"cost":0.0233},{"modelName":"GLM 4.7 Flash","score":54.1,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.6,"functional":87,"quality":81.1,"passed":true,"tokens":2651,"timeMs":55609,"cost":0.0011},{"modelName":"Grok 4","score":85,"functional":80.7,"quality":79.4,"passed":true,"tokens":3032,"timeMs":63567,"cost":0.0294},{"modelName":"Grok 4.1 Fast","score":90.8,"functional":86.3,"quality":80.2,"passed":true,"tokens":2745,"timeMs":81865,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3809,"timeMs":70200,"cost":0.0209}]},{"taskId":"glue-code/scheduler/task-3","category":"glue-code","subcategory":"scheduler","results":[{"modelName":"GLM 4-Plus","score":82.5,"functional":78.4,"quality":78.6,"passed":true,"tokens":4651,"timeMs":109821,"cost":0.0053},{"modelName":"MiniMax M2.1","score":90.8,"functional":86.3,"quality":81.3,"passed":true,"tokens":17866,"timeMs":201459,"cost":0.0148},{"modelName":"GLM-4.7","score":83.2,"functional":79,"quality":79.6,"passed":true,"tokens":2779,"timeMs":60032,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":78.4,"functional":74.5,"quality":73.9,"passed":true,"tokens":1907,"timeMs":20152,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":86.4,"functional":82.1,"quality":79.7,"passed":true,"tokens":3223,"timeMs":29604,"cost":0.0445},{"modelName":"Claude Opus 4.5","score":85.2,"functional":80.9,"quality":79.1,"passed":true,"tokens":4030,"timeMs":33705,"cost":0.0817},{"modelName":"Claude Haiku 4.5","score":89.8,"functional":85.3,"quality":80.1,"passed":true,"tokens":3551,"timeMs":22498,"cost":0.0151},{"modelName":"DeepSeek v3.2","score":83.2,"functional":79.1,"quality":78.8,"passed":true,"tokens":3404,"timeMs":99917,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":91.2,"functional":86.7,"quality":80.6,"passed":true,"tokens":2580,"timeMs":31824,"cost":0.0329},{"modelName":"GLM 4.7 Flash","score":54.1,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.7,"functional":88.1,"quality":81.5,"passed":true,"tokens":2397,"timeMs":50516,"cost":0.001},{"modelName":"Grok 4","score":87.4,"functional":83,"quality":80.1,"passed":true,"tokens":2944,"timeMs":79620,"cost":0.0304},{"modelName":"Grok 4.1 Fast","score":90.2,"functional":85.6,"quality":80,"passed":true,"tokens":2618,"timeMs":91387,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3809,"timeMs":70200,"cost":0.0209}]},{"taskId":"glue-code/scheduler/task-4","category":"glue-code","subcategory":"scheduler","results":[{"modelName":"GLM 4-Plus","score":82.3,"functional":78.1,"quality":78.5,"passed":true,"tokens":5252,"timeMs":115564,"cost":0.0057},{"modelName":"MiniMax M2.1","score":91.4,"functional":86.8,"quality":81.5,"passed":true,"tokens":13372,"timeMs":143595,"cost":0.0133},{"modelName":"GLM-4.7","score":80.8,"functional":76.7,"quality":78.9,"passed":true,"tokens":4068,"timeMs":40800,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":77.5,"functional":73.6,"quality":73.6,"passed":true,"tokens":1864,"timeMs":35343,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":84.2,"functional":80,"quality":79,"passed":true,"tokens":2905,"timeMs":52188,"cost":0.0438},{"modelName":"Claude Opus 4.5","score":87.5,"functional":83.1,"quality":79.8,"passed":true,"tokens":3986,"timeMs":47364,"cost":0.0671},{"modelName":"Claude Haiku 4.5","score":91.8,"functional":87.2,"quality":80.7,"passed":true,"tokens":4004,"timeMs":15880,"cost":0.0199},{"modelName":"DeepSeek v3.2","score":85.2,"functional":80.9,"quality":79.4,"passed":true,"tokens":2871,"timeMs":91151,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":89.1,"functional":84.6,"quality":80,"passed":true,"tokens":2572,"timeMs":23055,"cost":0.0314},{"modelName":"GLM 4.7 Flash","score":54.1,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.6,"functional":88,"quality":81.4,"passed":true,"tokens":2594,"timeMs":58429,"cost":0.001},{"modelName":"Grok 4","score":89.8,"functional":85.3,"quality":80.8,"passed":true,"tokens":3055,"timeMs":97489,"cost":0.0321},{"modelName":"Grok 4.1 Fast","score":88.4,"functional":84,"quality":79.5,"passed":true,"tokens":2605,"timeMs":99836,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3809,"timeMs":70200,"cost":0.0209}]},{"taskId":"glue-code/scheduler/task-5","category":"glue-code","subcategory":"scheduler","results":[{"modelName":"GLM 4-Plus","score":83.2,"functional":79.1,"quality":78.8,"passed":true,"tokens":4009,"timeMs":102279,"cost":0.0054},{"modelName":"MiniMax M2.1","score":90.8,"functional":86.2,"quality":81.3,"passed":true,"tokens":14688,"timeMs":194701,"cost":0.0124},{"modelName":"GLM-4.7","score":78.9,"functional":74.9,"quality":78.3,"passed":true,"tokens":3872,"timeMs":60224,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":77.8,"functional":73.9,"quality":73.7,"passed":true,"tokens":2110,"timeMs":26018,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":82.9,"functional":78.7,"quality":78.6,"passed":true,"tokens":3710,"timeMs":53138,"cost":0.0417},{"modelName":"Claude Opus 4.5","score":89.9,"functional":85.4,"quality":80.5,"passed":true,"tokens":3620,"timeMs":35956,"cost":0.0603},{"modelName":"Claude Haiku 4.5","score":92.9,"functional":88.2,"quality":81,"passed":true,"tokens":4626,"timeMs":16921,"cost":0.0163},{"modelName":"DeepSeek v3.2","score":87.6,"functional":83.2,"quality":80.1,"passed":true,"tokens":3531,"timeMs":98875,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":86.6,"functional":82.3,"quality":79.2,"passed":true,"tokens":3194,"timeMs":31128,"cost":0.0275},{"modelName":"GLM 4.7 Flash","score":54.1,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.3,"functional":86.7,"quality":81,"passed":true,"tokens":2958,"timeMs":81629,"cost":0.001},{"modelName":"Grok 4","score":91.4,"functional":86.8,"quality":81.3,"passed":true,"tokens":3101,"timeMs":73635,"cost":0.0322},{"modelName":"Grok 4.1 Fast","score":86.1,"functional":81.8,"quality":78.8,"passed":true,"tokens":3825,"timeMs":77108,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3809,"timeMs":70200,"cost":0.0209}]},{"taskId":"glue-code/webhook/task-1","category":"glue-code","subcategory":"webhook","results":[{"modelName":"GLM 4-Plus","score":85.2,"functional":80.9,"quality":79.4,"passed":true,"tokens":4354,"timeMs":70271,"cost":0.0045},{"modelName":"MiniMax M2.1","score":89.1,"functional":84.6,"quality":80.8,"passed":true,"tokens":16451,"timeMs":180691,"cost":0.0114},{"modelName":"GLM-4.7","score":77.9,"functional":74,"quality":78.1,"passed":true,"tokens":3256,"timeMs":70041,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":79.2,"functional":75.3,"quality":74.1,"passed":true,"tokens":2526,"timeMs":27504,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":80.7,"functional":76.6,"quality":79.1,"passed":true,"tokens":3162,"timeMs":32580,"cost":0.0332},{"modelName":"Claude Sonnet 4.5","score":82.6,"functional":78.5,"quality":78.5,"passed":true,"tokens":2996,"timeMs":44385,"cost":0.0367},{"modelName":"Claude Opus 4.5","score":92,"functional":87.4,"quality":81.1,"passed":true,"tokens":3397,"timeMs":35407,"cost":0.0814},{"modelName":"Claude Haiku 4.5","score":92.8,"functional":88.1,"quality":81,"passed":true,"tokens":3570,"timeMs":16955,"cost":0.0186},{"modelName":"DeepSeek v3.2","score":89.9,"functional":85.4,"quality":80.8,"passed":true,"tokens":2624,"timeMs":105755,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":84.4,"functional":80.2,"quality":78.6,"passed":true,"tokens":3106,"timeMs":30043,"cost":0.0232},{"modelName":"GLM 4.7 Flash","score":61.1,"functional":58,"quality":81.4,"passed":true,"tokens":1477,"timeMs":10192,"cost":0.0003},{"modelName":"Grok 4 Fast","score":89.2,"functional":84.7,"quality":80.4,"passed":true,"tokens":2466,"timeMs":88796,"cost":0.0014},{"modelName":"Grok 4","score":92,"functional":87.4,"quality":81.5,"passed":true,"tokens":3128,"timeMs":61441,"cost":0.0356},{"modelName":"Grok 4.1 Fast","score":83.7,"functional":79.5,"quality":78.1,"passed":true,"tokens":3599,"timeMs":108515,"cost":0.0012}]},{"taskId":"glue-code/webhook/task-2","category":"glue-code","subcategory":"webhook","results":[{"modelName":"GLM 4-Plus","score":87.6,"functional":83.2,"quality":80.1,"passed":true,"tokens":3667,"timeMs":121786,"cost":0.0042},{"modelName":"MiniMax M2.1","score":86.7,"functional":82.4,"quality":80.1,"passed":true,"tokens":13072,"timeMs":137843,"cost":0.0119},{"modelName":"GLM-4.7","score":78.2,"functional":74.3,"quality":78.2,"passed":true,"tokens":3067,"timeMs":50771,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":81.5,"functional":77.4,"quality":74.8,"passed":true,"tokens":2069,"timeMs":20205,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":82.8,"functional":78.6,"quality":79.7,"passed":true,"tokens":2774,"timeMs":30423,"cost":0.0355},{"modelName":"Claude Sonnet 4.5","score":83.6,"functional":79.4,"quality":78.8,"passed":true,"tokens":3153,"timeMs":33086,"cost":0.0351},{"modelName":"Claude Opus 4.5","score":93,"functional":88.4,"quality":81.5,"passed":true,"tokens":3105,"timeMs":37692,"cost":0.0718},{"modelName":"Claude Haiku 4.5","score":91.5,"functional":86.9,"quality":80.6,"passed":true,"tokens":5211,"timeMs":20344,"cost":0.0156},{"modelName":"DeepSeek v3.2","score":91.6,"functional":87,"quality":81.3,"passed":true,"tokens":3580,"timeMs":102473,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":83,"functional":78.9,"quality":78.1,"passed":true,"tokens":2273,"timeMs":34841,"cost":0.0245},{"modelName":"GLM 4.7 Flash","score":61.1,"functional":58.1,"quality":81.5,"passed":true,"tokens":1432,"timeMs":9912,"cost":0.0003},{"modelName":"Grok 4 Fast","score":86.7,"functional":82.3,"quality":79.7,"passed":true,"tokens":3223,"timeMs":58133,"cost":0.0013},{"modelName":"Grok 4","score":91.4,"functional":86.8,"quality":81.3,"passed":true,"tokens":2845,"timeMs":54287,"cost":0.0329},{"modelName":"Grok 4.1 Fast","score":81.8,"functional":77.7,"quality":77.5,"passed":true,"tokens":3390,"timeMs":107279,"cost":0.0015}]},{"taskId":"glue-code/webhook/task-3","category":"glue-code","subcategory":"webhook","results":[{"modelName":"GLM 4-Plus","score":90,"functional":85.5,"quality":80.8,"passed":true,"tokens":4918,"timeMs":102954,"cost":0.0044},{"modelName":"MiniMax M2.1","score":84.3,"functional":80.1,"quality":79.4,"passed":true,"tokens":15549,"timeMs":153474,"cost":0.0145},{"modelName":"GLM-4.7","score":79.7,"functional":75.7,"quality":78.6,"passed":true,"tokens":3947,"timeMs":47672,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":83.9,"functional":79.7,"quality":75.6,"passed":true,"tokens":1971,"timeMs":21383,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":84,"functional":79.8,"quality":80.1,"passed":true,"tokens":3357,"timeMs":40637,"cost":0.0323},{"modelName":"Claude Sonnet 4.5","score":85.5,"functional":81.3,"quality":79.4,"passed":true,"tokens":3015,"timeMs":41956,"cost":0.0405},{"modelName":"Claude Opus 4.5","score":92.9,"functional":88.3,"quality":81.4,"passed":true,"tokens":3945,"timeMs":41710,"cost":0.0653},{"modelName":"Claude Haiku 4.5","score":89.3,"functional":84.9,"quality":80,"passed":true,"tokens":5272,"timeMs":24366,"cost":0.0188},{"modelName":"DeepSeek v3.2","score":92.2,"functional":87.6,"quality":81.5,"passed":true,"tokens":3538,"timeMs":86134,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":82.8,"functional":78.7,"quality":78.1,"passed":true,"tokens":2273,"timeMs":36218,"cost":0.0303},{"modelName":"GLM 4.7 Flash","score":60,"functional":57,"quality":81.1,"passed":false,"tokens":1208,"timeMs":10216,"cost":0.0003},{"modelName":"Grok 4 Fast","score":84.5,"functional":80.3,"quality":79,"passed":true,"tokens":2326,"timeMs":59665,"cost":0.001},{"modelName":"Grok 4","score":89.6,"functional":85.2,"quality":80.8,"passed":true,"tokens":2973,"timeMs":79812,"cost":0.0344},{"modelName":"Grok 4.1 Fast","score":80.8,"functional":76.8,"quality":77.2,"passed":true,"tokens":3482,"timeMs":71389,"cost":0.0013}]},{"taskId":"glue-code/webhook/task-4","category":"glue-code","subcategory":"webhook","results":[{"modelName":"GLM 4-Plus","score":91.6,"functional":87,"quality":81.3,"passed":true,"tokens":4723,"timeMs":103484,"cost":0.0055},{"modelName":"MiniMax M2.1","score":82.4,"functional":78.3,"quality":78.8,"passed":true,"tokens":13403,"timeMs":195922,"cost":0.0127},{"modelName":"GLM-4.7","score":81.9,"functional":77.8,"quality":79.3,"passed":true,"tokens":3248,"timeMs":52393,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":86,"functional":81.7,"quality":76.2,"passed":true,"tokens":2241,"timeMs":31822,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":84,"functional":79.8,"quality":80.1,"passed":true,"tokens":3039,"timeMs":27778,"cost":0.0357},{"modelName":"Claude Sonnet 4.5","score":88,"functional":83.6,"quality":80.1,"passed":true,"tokens":3263,"timeMs":44521,"cost":0.0346},{"modelName":"Claude Opus 4.5","score":91.6,"functional":87.1,"quality":81,"passed":true,"tokens":3231,"timeMs":40783,"cost":0.0686},{"modelName":"Claude Haiku 4.5","score":86.9,"functional":82.5,"quality":79.2,"passed":true,"tokens":3805,"timeMs":26955,"cost":0.0165},{"modelName":"DeepSeek v3.2","score":91.5,"functional":87,"quality":81.3,"passed":true,"tokens":2692,"timeMs":65647,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":83.8,"functional":79.6,"quality":78.4,"passed":true,"tokens":2883,"timeMs":20290,"cost":0.0277},{"modelName":"GLM 4.7 Flash","score":57.9,"functional":55,"quality":80.5,"passed":false,"tokens":1372,"timeMs":6278,"cost":0.0004},{"modelName":"Grok 4 Fast","score":83.1,"functional":78.9,"quality":78.6,"passed":true,"tokens":2618,"timeMs":68627,"cost":0.0012},{"modelName":"Grok 4","score":87.3,"functional":82.9,"quality":80.1,"passed":true,"tokens":2854,"timeMs":87906,"cost":0.0354},{"modelName":"Grok 4.1 Fast","score":81.1,"functional":77.1,"quality":77.3,"passed":true,"tokens":2854,"timeMs":93481,"cost":0.0016}]},{"taskId":"glue-code/webhook/task-5","category":"glue-code","subcategory":"webhook","results":[{"modelName":"GLM 4-Plus","score":92.2,"functional":87.6,"quality":81.5,"passed":true,"tokens":4266,"timeMs":114383,"cost":0.0042},{"modelName":"MiniMax M2.1","score":81.5,"functional":77.4,"quality":78.5,"passed":true,"tokens":12531,"timeMs":156346,"cost":0.0144},{"modelName":"GLM-4.7","score":84.4,"functional":80.2,"quality":80,"passed":true,"tokens":3061,"timeMs":57112,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":87.3,"functional":82.9,"quality":76.6,"passed":true,"tokens":2287,"timeMs":24074,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":82.9,"functional":78.8,"quality":79.8,"passed":true,"tokens":3001,"timeMs":30291,"cost":0.0256},{"modelName":"Claude Sonnet 4.5","score":90.3,"functional":85.8,"quality":80.8,"passed":true,"tokens":3471,"timeMs":46643,"cost":0.0325},{"modelName":"Claude Opus 4.5","score":89.5,"functional":85,"quality":80.4,"passed":true,"tokens":3064,"timeMs":36111,"cost":0.0635},{"modelName":"Claude Haiku 4.5","score":84.7,"functional":80.4,"quality":78.6,"passed":true,"tokens":4641,"timeMs":20933,"cost":0.0181},{"modelName":"DeepSeek v3.2","score":89.8,"functional":85.3,"quality":80.8,"passed":true,"tokens":3476,"timeMs":64601,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":85.7,"functional":81.4,"quality":79,"passed":true,"tokens":2879,"timeMs":23791,"cost":0.0329},{"modelName":"GLM 4.7 Flash","score":55.5,"functional":52.7,"quality":79.8,"passed":false,"tokens":1219,"timeMs":9452,"cost":0.0004},{"modelName":"Grok 4 Fast","score":82.9,"functional":78.7,"quality":78.5,"passed":true,"tokens":2594,"timeMs":62032,"cost":0.0011},{"modelName":"Grok 4","score":84.9,"functional":80.6,"quality":79.4,"passed":true,"tokens":2784,"timeMs":58206,"cost":0.0303},{"modelName":"Grok 4.1 Fast","score":82.6,"functional":78.5,"quality":77.7,"passed":true,"tokens":2635,"timeMs":85602,"cost":0.0015}]},{"taskId":"glue-code/advanced/task-1","category":"glue-code","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":91.6,"functional":87,"quality":81.3,"passed":true,"tokens":4891,"timeMs":75731,"cost":0.0052},{"modelName":"MiniMax M2.1","score":81.8,"functional":77.7,"quality":78.6,"passed":true,"tokens":17871,"timeMs":174036,"cost":0.0154},{"modelName":"GLM-4.7","score":86.5,"functional":82.2,"quality":80.6,"passed":true,"tokens":3114,"timeMs":43865,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":87.3,"functional":82.9,"quality":76.6,"passed":true,"tokens":1735,"timeMs":20655,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4236,"timeMs":38360,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":92,"functional":87.4,"quality":81.3,"passed":true,"tokens":4074,"timeMs":43291,"cost":0.0457},{"modelName":"Claude Opus 4.5","score":87,"functional":82.7,"quality":79.7,"passed":true,"tokens":4180,"timeMs":54210,"cost":0.0704},{"modelName":"Claude Haiku 4.5","score":83.3,"functional":79.1,"quality":78.1,"passed":true,"tokens":4782,"timeMs":18711,"cost":0.0186},{"modelName":"DeepSeek v3.2","score":87.5,"functional":83.1,"quality":80.1,"passed":true,"tokens":2447,"timeMs":103942,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":88.2,"functional":83.8,"quality":79.7,"passed":true,"tokens":2879,"timeMs":25303,"cost":0.0294},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83.8,"functional":79.6,"quality":78.8,"passed":true,"tokens":2913,"timeMs":77620,"cost":0.0011},{"modelName":"Grok 4","score":83,"functional":78.8,"quality":78.8,"passed":true,"tokens":2858,"timeMs":55079,"cost":0.0358},{"modelName":"Grok 4.1 Fast","score":84.8,"functional":80.6,"quality":78.4,"passed":true,"tokens":2638,"timeMs":93225,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":8080,"timeMs":170875,"cost":0.0477}]},{"taskId":"glue-code/advanced/task-2","category":"glue-code","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":89.8,"functional":85.4,"quality":80.8,"passed":true,"tokens":5061,"timeMs":68065,"cost":0.0045},{"modelName":"MiniMax M2.1","score":83.2,"functional":79,"quality":79,"passed":true,"tokens":17951,"timeMs":121935,"cost":0.0114},{"modelName":"GLM-4.7","score":87.7,"functional":83.3,"quality":81,"passed":true,"tokens":2823,"timeMs":46133,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":86.2,"functional":81.9,"quality":76.2,"passed":true,"tokens":1708,"timeMs":24394,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4236,"timeMs":38360,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":92.6,"functional":87.9,"quality":81.5,"passed":true,"tokens":3639,"timeMs":42212,"cost":0.0433},{"modelName":"Claude Opus 4.5","score":84.8,"functional":80.6,"quality":79,"passed":true,"tokens":3346,"timeMs":41853,"cost":0.0803},{"modelName":"Claude Haiku 4.5","score":83,"functional":78.9,"quality":78.1,"passed":true,"tokens":4976,"timeMs":27853,"cost":0.0136},{"modelName":"DeepSeek v3.2","score":85,"functional":80.8,"quality":79.4,"passed":true,"tokens":3364,"timeMs":76588,"cost":0.0022},{"modelName":"OpenAI GPT-5.2","score":90.5,"functional":86,"quality":80.4,"passed":true,"tokens":2375,"timeMs":20954,"cost":0.0315},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.8,"functional":81.5,"quality":79.4,"passed":true,"tokens":2333,"timeMs":61662,"cost":0.0011},{"modelName":"Grok 4","score":82,"functional":77.9,"quality":78.5,"passed":true,"tokens":2540,"timeMs":77150,"cost":0.0308},{"modelName":"Grok 4.1 Fast","score":87.3,"functional":82.9,"quality":79.1,"passed":true,"tokens":2803,"timeMs":74252,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":8080,"timeMs":170875,"cost":0.0477}]},{"taskId":"glue-code/advanced/task-3","category":"glue-code","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":87.5,"functional":83.1,"quality":80.1,"passed":true,"tokens":5105,"timeMs":81818,"cost":0.0051},{"modelName":"MiniMax M2.1","score":85.4,"functional":81.2,"quality":79.7,"passed":true,"tokens":13948,"timeMs":130934,"cost":0.0111},{"modelName":"GLM-4.7","score":87.8,"functional":83.4,"quality":81,"passed":true,"tokens":3322,"timeMs":41517,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":84.1,"functional":79.9,"quality":75.6,"passed":true,"tokens":2118,"timeMs":19892,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4236,"timeMs":38360,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":91.9,"functional":87.3,"quality":81.3,"passed":true,"tokens":3348,"timeMs":43970,"cost":0.0372},{"modelName":"Claude Opus 4.5","score":83.4,"functional":79.3,"quality":78.6,"passed":true,"tokens":3188,"timeMs":45717,"cost":0.0701},{"modelName":"Claude Haiku 4.5","score":84,"functional":79.8,"quality":78.4,"passed":true,"tokens":4278,"timeMs":15674,"cost":0.0187},{"modelName":"DeepSeek v3.2","score":83.1,"functional":79,"quality":78.8,"passed":true,"tokens":2794,"timeMs":88713,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":92.2,"functional":87.6,"quality":80.9,"passed":true,"tokens":3189,"timeMs":33687,"cost":0.0315},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.2,"functional":83.8,"quality":80.1,"passed":true,"tokens":2964,"timeMs":62233,"cost":0.0012},{"modelName":"Grok 4","score":82.3,"functional":78.2,"quality":78.6,"passed":true,"tokens":2381,"timeMs":76092,"cost":0.0243},{"modelName":"Grok 4.1 Fast","score":89.4,"functional":84.9,"quality":79.8,"passed":true,"tokens":3624,"timeMs":66805,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":8080,"timeMs":170875,"cost":0.0477}]},{"taskId":"glue-code/advanced/task-4","category":"glue-code","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":85.1,"functional":80.8,"quality":79.4,"passed":true,"tokens":4725,"timeMs":97680,"cost":0.0058},{"modelName":"MiniMax M2.1","score":87.9,"functional":83.5,"quality":80.4,"passed":true,"tokens":12598,"timeMs":189477,"cost":0.0124},{"modelName":"GLM-4.7","score":86.6,"functional":82.3,"quality":80.7,"passed":true,"tokens":3936,"timeMs":67616,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":81.6,"functional":77.6,"quality":74.9,"passed":true,"tokens":1910,"timeMs":21821,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4236,"timeMs":38360,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":90.2,"functional":85.7,"quality":80.8,"passed":true,"tokens":3438,"timeMs":49040,"cost":0.0377},{"modelName":"Claude Opus 4.5","score":83.2,"functional":79,"quality":78.5,"passed":true,"tokens":3458,"timeMs":43242,"cost":0.0805},{"modelName":"Claude Haiku 4.5","score":85.9,"functional":81.7,"quality":79,"passed":true,"tokens":5198,"timeMs":20002,"cost":0.0168},{"modelName":"DeepSeek v3.2","score":82.2,"functional":78.1,"quality":78.5,"passed":true,"tokens":3263,"timeMs":81634,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":92.7,"functional":88.1,"quality":81.1,"passed":true,"tokens":2615,"timeMs":19620,"cost":0.0235},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.6,"functional":86,"quality":80.8,"passed":true,"tokens":3144,"timeMs":75558,"cost":0.0012},{"modelName":"Grok 4","score":83.8,"functional":79.6,"quality":79,"passed":true,"tokens":2819,"timeMs":77322,"cost":0.0327},{"modelName":"Grok 4.1 Fast","score":90.6,"functional":86.1,"quality":80.1,"passed":true,"tokens":3607,"timeMs":65423,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":8080,"timeMs":170875,"cost":0.0477}]},{"taskId":"glue-code/advanced/task-5","category":"glue-code","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":83.2,"functional":79,"quality":78.8,"passed":true,"tokens":4213,"timeMs":75703,"cost":0.0054},{"modelName":"MiniMax M2.1","score":90,"functional":85.5,"quality":81.1,"passed":true,"tokens":14655,"timeMs":192439,"cost":0.0121},{"modelName":"GLM-4.7","score":84.6,"functional":80.3,"quality":80.1,"passed":true,"tokens":2971,"timeMs":59907,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":79.4,"functional":75.4,"quality":74.2,"passed":true,"tokens":2155,"timeMs":23612,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4236,"timeMs":38360,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":87.8,"functional":83.5,"quality":80.1,"passed":true,"tokens":3696,"timeMs":33822,"cost":0.0365},{"modelName":"Claude Opus 4.5","score":84.2,"functional":80,"quality":78.8,"passed":true,"tokens":3193,"timeMs":52455,"cost":0.0786},{"modelName":"Claude Haiku 4.5","score":88.4,"functional":84,"quality":79.7,"passed":true,"tokens":3928,"timeMs":18219,"cost":0.0147},{"modelName":"DeepSeek v3.2","score":82.5,"functional":78.4,"quality":78.6,"passed":true,"tokens":2845,"timeMs":66083,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":92.1,"functional":87.5,"quality":80.9,"passed":true,"tokens":2996,"timeMs":25114,"cost":0.0262},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.2,"functional":87.6,"quality":81.3,"passed":true,"tokens":2929,"timeMs":60439,"cost":0.0012},{"modelName":"Grok 4","score":86,"functional":81.7,"quality":79.7,"passed":true,"tokens":2245,"timeMs":72507,"cost":0.0301},{"modelName":"Grok 4.1 Fast","score":90.7,"functional":86.1,"quality":80.2,"passed":true,"tokens":2814,"timeMs":76063,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":8080,"timeMs":170875,"cost":0.0477}]},{"taskId":"ai-integration/embeddings/task-1","category":"ai-integration","subcategory":"embeddings","results":[{"modelName":"GLM 4-Plus","score":81.2,"functional":77.2,"quality":78.5,"passed":true,"tokens":4840,"timeMs":88213,"cost":0.0043},{"modelName":"MiniMax M2.1","score":90.2,"functional":85.7,"quality":81.4,"passed":true,"tokens":17871,"timeMs":131948,"cost":0.0115},{"modelName":"GLM-4.7","score":81.1,"functional":77.1,"quality":79.3,"passed":true,"tokens":3170,"timeMs":65187,"cost":0.004},{"modelName":"Gemini 3 Flash","score":76.9,"functional":73,"quality":73.7,"passed":true,"tokens":2223,"timeMs":22858,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4467,"timeMs":39961,"cost":0.0626},{"modelName":"Claude Sonnet 4.5","score":84.4,"functional":80.2,"quality":79.4,"passed":true,"tokens":3292,"timeMs":52563,"cost":0.0385},{"modelName":"Claude Opus 4.5","score":85.1,"functional":80.9,"quality":79.4,"passed":true,"tokens":3765,"timeMs":32164,"cost":0.0555},{"modelName":"Claude Haiku 4.5","score":89.7,"functional":85.2,"quality":80.4,"passed":true,"tokens":4885,"timeMs":25429,"cost":0.015},{"modelName":"DeepSeek v3.2","score":83,"functional":78.8,"quality":79,"passed":true,"tokens":3138,"timeMs":72812,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":89.4,"functional":84.9,"quality":80.4,"passed":true,"tokens":2859,"timeMs":34900,"cost":0.0303},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.8,"functional":87.2,"quality":81.5,"passed":true,"tokens":3422,"timeMs":81983,"cost":0.0012},{"modelName":"Grok 4","score":87.5,"functional":83.1,"quality":80.4,"passed":true,"tokens":2630,"timeMs":53254,"cost":0.0326},{"modelName":"Grok 4.1 Fast","score":88.5,"functional":84.1,"quality":79.8,"passed":true,"tokens":3337,"timeMs":74808,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":4995,"timeMs":106014,"cost":0.0282}]},{"taskId":"ai-integration/embeddings/task-2","category":"ai-integration","subcategory":"embeddings","results":[{"modelName":"GLM 4-Plus","score":81.5,"functional":77.5,"quality":78.6,"passed":true,"tokens":4659,"timeMs":112352,"cost":0.0048},{"modelName":"MiniMax M2.1","score":90.3,"functional":85.8,"quality":81.5,"passed":true,"tokens":16554,"timeMs":134094,"cost":0.0124},{"modelName":"GLM-4.7","score":78.8,"functional":74.9,"quality":78.6,"passed":true,"tokens":4061,"timeMs":64174,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":76.5,"functional":72.6,"quality":73.6,"passed":true,"tokens":2547,"timeMs":20561,"cost":0.0048},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4467,"timeMs":39961,"cost":0.0626},{"modelName":"Claude Sonnet 4.5","score":82.5,"functional":78.4,"quality":78.8,"passed":true,"tokens":3299,"timeMs":51477,"cost":0.0429},{"modelName":"Claude Opus 4.5","score":87.6,"functional":83.2,"quality":80.1,"passed":true,"tokens":3280,"timeMs":38650,"cost":0.0708},{"modelName":"Claude Haiku 4.5","score":91.4,"functional":86.8,"quality":80.9,"passed":true,"tokens":5272,"timeMs":18706,"cost":0.0174},{"modelName":"DeepSeek v3.2","score":85.2,"functional":80.9,"quality":79.7,"passed":true,"tokens":3571,"timeMs":112950,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":87,"functional":82.7,"quality":79.6,"passed":true,"tokens":3204,"timeMs":34545,"cost":0.0301},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.2,"functional":86.6,"quality":81.3,"passed":true,"tokens":3381,"timeMs":79779,"cost":0.0013},{"modelName":"Grok 4","score":89.6,"functional":85.1,"quality":81.1,"passed":true,"tokens":2646,"timeMs":96242,"cost":0.0353},{"modelName":"Grok 4.1 Fast","score":86.5,"functional":82.1,"quality":79.2,"passed":true,"tokens":2582,"timeMs":114545,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":4995,"timeMs":106014,"cost":0.0282}]},{"taskId":"ai-integration/embeddings/task-3","category":"ai-integration","subcategory":"embeddings","results":[{"modelName":"GLM 4-Plus","score":83,"functional":78.8,"quality":79,"passed":true,"tokens":4877,"timeMs":120542,"cost":0.0046},{"modelName":"MiniMax M2.1","score":89.1,"functional":84.7,"quality":81.1,"passed":true,"tokens":14282,"timeMs":172355,"cost":0.0107},{"modelName":"GLM-4.7","score":77.3,"functional":73.5,"quality":78.2,"passed":true,"tokens":2983,"timeMs":72970,"cost":0.004},{"modelName":"Gemini 3 Flash","score":77.3,"functional":73.4,"quality":73.9,"passed":true,"tokens":2091,"timeMs":25754,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4467,"timeMs":39961,"cost":0.0626},{"modelName":"Claude Sonnet 4.5","score":81.6,"functional":77.5,"quality":78.5,"passed":true,"tokens":3766,"timeMs":54339,"cost":0.0461},{"modelName":"Claude Opus 4.5","score":89.9,"functional":85.4,"quality":80.8,"passed":true,"tokens":4256,"timeMs":42037,"cost":0.0812},{"modelName":"Claude Haiku 4.5","score":92,"functional":87.4,"quality":81.1,"passed":true,"tokens":3983,"timeMs":15959,"cost":0.0172},{"modelName":"DeepSeek v3.2","score":87.7,"functional":83.3,"quality":80.4,"passed":true,"tokens":2773,"timeMs":81048,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":84.6,"functional":80.4,"quality":78.9,"passed":true,"tokens":2417,"timeMs":34206,"cost":0.0251},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.4,"functional":85,"quality":80.8,"passed":true,"tokens":2418,"timeMs":74223,"cost":0.0014},{"modelName":"Grok 4","score":90.8,"functional":86.3,"quality":81.4,"passed":true,"tokens":2151,"timeMs":96799,"cost":0.0271},{"modelName":"Grok 4.1 Fast","score":84,"functional":79.8,"quality":78.5,"passed":true,"tokens":2928,"timeMs":86490,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":4995,"timeMs":106014,"cost":0.0282}]},{"taskId":"ai-integration/embeddings/task-4","category":"ai-integration","subcategory":"embeddings","results":[{"modelName":"GLM 4-Plus","score":85.2,"functional":81,"quality":79.7,"passed":true,"tokens":5168,"timeMs":94273,"cost":0.0044},{"modelName":"MiniMax M2.1","score":87.1,"functional":82.7,"quality":80.5,"passed":true,"tokens":14367,"timeMs":151160,"cost":0.0128},{"modelName":"GLM-4.7","score":76.9,"functional":73.1,"quality":78.1,"passed":true,"tokens":2908,"timeMs":53930,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":79.1,"functional":75.2,"quality":74.4,"passed":true,"tokens":2106,"timeMs":26784,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4467,"timeMs":39961,"cost":0.0626},{"modelName":"Claude Sonnet 4.5","score":81.9,"functional":77.8,"quality":78.6,"passed":true,"tokens":2903,"timeMs":33496,"cost":0.0463},{"modelName":"Claude Opus 4.5","score":91.6,"functional":87,"quality":81.3,"passed":true,"tokens":3969,"timeMs":35026,"cost":0.059},{"modelName":"Claude Haiku 4.5","score":91.3,"functional":86.8,"quality":80.9,"passed":true,"tokens":5009,"timeMs":26257,"cost":0.0135},{"modelName":"DeepSeek v3.2","score":89.8,"functional":85.3,"quality":81.1,"passed":true,"tokens":2973,"timeMs":83751,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":82.7,"functional":78.6,"quality":78.3,"passed":true,"tokens":2739,"timeMs":22052,"cost":0.0244},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.1,"functional":82.7,"quality":80.1,"passed":true,"tokens":2767,"timeMs":67336,"cost":0.0011},{"modelName":"Grok 4","score":90.9,"functional":86.3,"quality":81.5,"passed":true,"tokens":2911,"timeMs":57095,"cost":0.0361},{"modelName":"Grok 4.1 Fast","score":81.7,"functional":77.7,"quality":77.8,"passed":true,"tokens":3728,"timeMs":64879,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":4995,"timeMs":106014,"cost":0.0282}]},{"taskId":"ai-integration/function-calling/task-1","category":"ai-integration","subcategory":"function-calling","results":[{"modelName":"GLM 4-Plus","score":87.7,"functional":83.3,"quality":80.4,"passed":true,"tokens":4922,"timeMs":106010,"cost":0.0046},{"modelName":"MiniMax M2.1","score":84.6,"functional":80.4,"quality":79.8,"passed":true,"tokens":14588,"timeMs":175297,"cost":0.0147},{"modelName":"GLM-4.7","score":77.7,"functional":73.9,"quality":78.3,"passed":true,"tokens":3451,"timeMs":50862,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":81.5,"functional":77.5,"quality":75.1,"passed":true,"tokens":2491,"timeMs":29179,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4550,"timeMs":41533,"cost":0.0637},{"modelName":"Claude Sonnet 4.5","score":83.3,"functional":79.2,"quality":79,"passed":true,"tokens":2805,"timeMs":42769,"cost":0.0398},{"modelName":"Claude Opus 4.5","score":92.1,"functional":87.5,"quality":81.5,"passed":true,"tokens":3044,"timeMs":40884,"cost":0.0604},{"modelName":"Claude Haiku 4.5","score":89.6,"functional":85.1,"quality":80.4,"passed":true,"tokens":3783,"timeMs":18277,"cost":0.0179},{"modelName":"DeepSeek v3.2","score":91,"functional":86.5,"quality":81.4,"passed":true,"tokens":2425,"timeMs":101832,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":81.8,"functional":77.7,"quality":78.1,"passed":true,"tokens":2564,"timeMs":24478,"cost":0.0251},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":84.7,"functional":80.4,"quality":79.4,"passed":true,"tokens":3218,"timeMs":74220,"cost":0.0013},{"modelName":"Grok 4","score":89.7,"functional":85.2,"quality":81.1,"passed":true,"tokens":2646,"timeMs":89837,"cost":0.0279},{"modelName":"Grok 4.1 Fast","score":80.2,"functional":76.2,"quality":77.3,"passed":true,"tokens":3567,"timeMs":106295,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3954,"timeMs":70171,"cost":0.0216}]},{"taskId":"ai-integration/function-calling/task-2","category":"ai-integration","subcategory":"function-calling","results":[{"modelName":"GLM 4-Plus","score":89.8,"functional":85.3,"quality":81.1,"passed":true,"tokens":4220,"timeMs":90653,"cost":0.0054},{"modelName":"MiniMax M2.1","score":82.4,"functional":78.2,"quality":79.1,"passed":true,"tokens":13283,"timeMs":156290,"cost":0.0159},{"modelName":"GLM-4.7","score":79.6,"functional":75.6,"quality":78.9,"passed":true,"tokens":4113,"timeMs":48211,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":83.9,"functional":79.7,"quality":75.9,"passed":true,"tokens":1782,"timeMs":27653,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4550,"timeMs":41533,"cost":0.0637},{"modelName":"Claude Sonnet 4.5","score":85.6,"functional":81.3,"quality":79.7,"passed":true,"tokens":3058,"timeMs":49021,"cost":0.0432},{"modelName":"Claude Opus 4.5","score":91.5,"functional":86.9,"quality":81.3,"passed":true,"tokens":3257,"timeMs":36010,"cost":0.0721},{"modelName":"Claude Haiku 4.5","score":87.3,"functional":82.9,"quality":79.6,"passed":true,"tokens":3690,"timeMs":20447,"cost":0.0158},{"modelName":"DeepSeek v3.2","score":91.1,"functional":86.5,"quality":81.5,"passed":true,"tokens":2883,"timeMs":93054,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":82.1,"functional":78,"quality":78.2,"passed":true,"tokens":2439,"timeMs":21635,"cost":0.0311},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":82.8,"functional":78.6,"quality":78.8,"passed":true,"tokens":3026,"timeMs":74358,"cost":0.001},{"modelName":"Grok 4","score":87.7,"functional":83.3,"quality":80.5,"passed":true,"tokens":2934,"timeMs":73465,"cost":0.0326},{"modelName":"Grok 4.1 Fast","score":79.8,"functional":75.8,"quality":77.2,"passed":true,"tokens":2901,"timeMs":70497,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3954,"timeMs":70171,"cost":0.0216}]},{"taskId":"ai-integration/function-calling/task-3","category":"ai-integration","subcategory":"function-calling","results":[{"modelName":"GLM 4-Plus","score":91,"functional":86.5,"quality":81.4,"passed":true,"tokens":5244,"timeMs":117423,"cost":0.0042},{"modelName":"MiniMax M2.1","score":80.8,"functional":76.8,"quality":78.6,"passed":true,"tokens":13413,"timeMs":164974,"cost":0.0145},{"modelName":"GLM-4.7","score":82,"functional":77.9,"quality":79.6,"passed":true,"tokens":3799,"timeMs":68504,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":85.7,"functional":81.4,"quality":76.4,"passed":true,"tokens":1837,"timeMs":35643,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4550,"timeMs":41533,"cost":0.0637},{"modelName":"Claude Sonnet 4.5","score":88,"functional":83.6,"quality":80.4,"passed":true,"tokens":3824,"timeMs":54244,"cost":0.0364},{"modelName":"Claude Opus 4.5","score":89.8,"functional":85.3,"quality":80.8,"passed":true,"tokens":4032,"timeMs":32344,"cost":0.0819},{"modelName":"Claude Haiku 4.5","score":84.8,"functional":80.6,"quality":78.9,"passed":true,"tokens":4001,"timeMs":21236,"cost":0.0149},{"modelName":"DeepSeek v3.2","score":89.9,"functional":85.4,"quality":81.1,"passed":true,"tokens":3316,"timeMs":68151,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":83.5,"functional":79.4,"quality":78.6,"passed":true,"tokens":2583,"timeMs":27820,"cost":0.0263},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.8,"functional":77.7,"quality":78.5,"passed":true,"tokens":3107,"timeMs":85433,"cost":0.0013},{"modelName":"Grok 4","score":85.2,"functional":80.9,"quality":79.8,"passed":true,"tokens":2346,"timeMs":79261,"cost":0.0364},{"modelName":"Grok 4.1 Fast","score":80.6,"functional":76.6,"quality":77.5,"passed":true,"tokens":3329,"timeMs":70546,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3954,"timeMs":70171,"cost":0.0216}]},{"taskId":"ai-integration/function-calling/task-4","category":"ai-integration","subcategory":"function-calling","results":[{"modelName":"GLM 4-Plus","score":91.1,"functional":86.5,"quality":81.5,"passed":true,"tokens":4283,"timeMs":113680,"cost":0.0049},{"modelName":"MiniMax M2.1","score":80.4,"functional":76.4,"quality":78.5,"passed":true,"tokens":17475,"timeMs":147452,"cost":0.0121},{"modelName":"GLM-4.7","score":84.4,"functional":80.2,"quality":80.3,"passed":true,"tokens":3699,"timeMs":68826,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":86.4,"functional":82.1,"quality":76.6,"passed":true,"tokens":2081,"timeMs":35664,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4550,"timeMs":41533,"cost":0.0637},{"modelName":"Claude Sonnet 4.5","score":90.1,"functional":85.6,"quality":81.1,"passed":true,"tokens":3348,"timeMs":35400,"cost":0.0448},{"modelName":"Claude Opus 4.5","score":87.4,"functional":83.1,"quality":80.1,"passed":true,"tokens":3272,"timeMs":48854,"cost":0.0678},{"modelName":"Claude Haiku 4.5","score":82.9,"functional":78.8,"quality":78.3,"passed":true,"tokens":4391,"timeMs":15817,"cost":0.0178},{"modelName":"DeepSeek v3.2","score":87.9,"functional":83.5,"quality":80.5,"passed":true,"tokens":2983,"timeMs":80647,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":85.8,"functional":81.5,"quality":79.3,"passed":true,"tokens":2744,"timeMs":32183,"cost":0.0231},{"modelName":"GLM 4.7 Flash","score":86,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":82.1,"functional":78,"quality":78.6,"passed":true,"tokens":2791,"timeMs":73825,"cost":0.001},{"modelName":"Grok 4","score":82.9,"functional":78.8,"quality":79.1,"passed":true,"tokens":2496,"timeMs":57854,"cost":0.0256},{"modelName":"Grok 4.1 Fast","score":82.5,"functional":78.4,"quality":78,"passed":true,"tokens":2700,"timeMs":87594,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3954,"timeMs":70171,"cost":0.0216}]},{"taskId":"ai-integration/multimodal/task-1","category":"ai-integration","subcategory":"multimodal","results":[{"modelName":"GLM 4-Plus","score":89.9,"functional":85.4,"quality":81.1,"passed":true,"tokens":3737,"timeMs":86381,"cost":0.006},{"modelName":"MiniMax M2.1","score":81.3,"functional":77.2,"quality":78.8,"passed":true,"tokens":13235,"timeMs":174912,"cost":0.0119},{"modelName":"GLM-4.7","score":86.2,"functional":81.8,"quality":80.8,"passed":true,"tokens":3451,"timeMs":42158,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":85.9,"functional":81.7,"quality":76.5,"passed":true,"tokens":2144,"timeMs":26818,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":91.4,"functional":86.8,"quality":81.4,"passed":true,"tokens":3364,"timeMs":33610,"cost":0.0337},{"modelName":"Claude Opus 4.5","score":85,"functional":80.8,"quality":79.4,"passed":true,"tokens":2886,"timeMs":48012,"cost":0.0701},{"modelName":"Claude Haiku 4.5","score":82,"functional":77.9,"quality":78.1,"passed":true,"tokens":5080,"timeMs":20080,"cost":0.0156},{"modelName":"DeepSeek v3.2","score":85.4,"functional":81.1,"quality":79.8,"passed":true,"tokens":2644,"timeMs":107920,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":88.2,"functional":83.8,"quality":80,"passed":true,"tokens":3127,"timeMs":26087,"cost":0.0271},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83.6,"functional":79.4,"quality":79,"passed":true,"tokens":3460,"timeMs":81565,"cost":0.0013},{"modelName":"Grok 4","score":81.4,"functional":77.3,"quality":78.6,"passed":true,"tokens":2785,"timeMs":63272,"cost":0.0261},{"modelName":"Grok 4.1 Fast","score":84.9,"functional":80.6,"quality":78.7,"passed":true,"tokens":2667,"timeMs":76340,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4601,"timeMs":79889,"cost":0.0259}]},{"taskId":"ai-integration/multimodal/task-2","category":"ai-integration","subcategory":"multimodal","results":[{"modelName":"GLM 4-Plus","score":87.9,"functional":83.5,"quality":80.5,"passed":true,"tokens":4928,"timeMs":92013,"cost":0.0057},{"modelName":"MiniMax M2.1","score":83.1,"functional":78.9,"quality":79.3,"passed":true,"tokens":13583,"timeMs":201221,"cost":0.0154},{"modelName":"GLM-4.7","score":86.9,"functional":82.5,"quality":81.1,"passed":true,"tokens":3477,"timeMs":68849,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":84.4,"functional":80.1,"quality":76,"passed":true,"tokens":2518,"timeMs":32742,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":91.4,"functional":86.9,"quality":81.5,"passed":true,"tokens":3528,"timeMs":42520,"cost":0.0374},{"modelName":"Claude Opus 4.5","score":83.1,"functional":78.9,"quality":78.8,"passed":true,"tokens":3736,"timeMs":53808,"cost":0.077},{"modelName":"Claude Haiku 4.5","score":82.3,"functional":78.2,"quality":78.2,"passed":true,"tokens":4507,"timeMs":17576,"cost":0.0196},{"modelName":"DeepSeek v3.2","score":83.1,"functional":79,"quality":79.1,"passed":true,"tokens":3339,"timeMs":110677,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":90.3,"functional":85.8,"quality":80.6,"passed":true,"tokens":2194,"timeMs":34011,"cost":0.0229},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.8,"functional":81.5,"quality":79.7,"passed":true,"tokens":2818,"timeMs":59089,"cost":0.0011},{"modelName":"Grok 4","score":81,"functional":77,"quality":78.5,"passed":true,"tokens":2760,"timeMs":59713,"cost":0.0289},{"modelName":"Grok 4.1 Fast","score":87.3,"functional":82.9,"quality":79.4,"passed":true,"tokens":2598,"timeMs":103498,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4601,"timeMs":79889,"cost":0.0259}]},{"taskId":"ai-integration/multimodal/task-3","category":"ai-integration","subcategory":"multimodal","results":[{"modelName":"GLM 4-Plus","score":85.4,"functional":81.1,"quality":79.8,"passed":true,"tokens":4017,"timeMs":71259,"cost":0.0048},{"modelName":"MiniMax M2.1","score":85.5,"functional":81.2,"quality":80,"passed":true,"tokens":15312,"timeMs":139775,"cost":0.0127},{"modelName":"GLM-4.7","score":86.4,"functional":82.1,"quality":80.9,"passed":true,"tokens":2907,"timeMs":60194,"cost":0.004},{"modelName":"Gemini 3 Flash","score":82.1,"functional":78,"quality":75.3,"passed":true,"tokens":2322,"timeMs":24932,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":90.3,"functional":85.8,"quality":81.1,"passed":true,"tokens":3012,"timeMs":54395,"cost":0.0393},{"modelName":"Claude Opus 4.5","score":82.2,"functional":78.1,"quality":78.5,"passed":true,"tokens":3255,"timeMs":39438,"cost":0.0617},{"modelName":"Claude Haiku 4.5","score":83.8,"functional":79.6,"quality":78.6,"passed":true,"tokens":3706,"timeMs":24317,"cost":0.0182},{"modelName":"DeepSeek v3.2","score":81.6,"functional":77.5,"quality":78.6,"passed":true,"tokens":3497,"timeMs":98923,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":91.6,"functional":87,"quality":81,"passed":true,"tokens":3010,"timeMs":35641,"cost":0.0304},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.3,"functional":83.9,"quality":80.4,"passed":true,"tokens":2919,"timeMs":70287,"cost":0.0012},{"modelName":"Grok 4","score":81.8,"functional":77.7,"quality":78.8,"passed":true,"tokens":2374,"timeMs":64051,"cost":0.0323},{"modelName":"Grok 4.1 Fast","score":89.1,"functional":84.6,"quality":80,"passed":true,"tokens":3401,"timeMs":69138,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4601,"timeMs":79889,"cost":0.0259}]},{"taskId":"ai-integration/multimodal/task-4","category":"ai-integration","subcategory":"multimodal","results":[{"modelName":"GLM 4-Plus","score":83.1,"functional":79,"quality":79.1,"passed":true,"tokens":4939,"timeMs":74063,"cost":0.0047},{"modelName":"MiniMax M2.1","score":87.9,"functional":83.5,"quality":80.7,"passed":true,"tokens":14783,"timeMs":168266,"cost":0.0156},{"modelName":"GLM-4.7","score":84.8,"functional":80.6,"quality":80.4,"passed":true,"tokens":3314,"timeMs":58246,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":79.6,"functional":75.6,"quality":74.6,"passed":true,"tokens":2165,"timeMs":20700,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":88.2,"functional":83.8,"quality":80.5,"passed":true,"tokens":3101,"timeMs":40355,"cost":0.0426},{"modelName":"Claude Opus 4.5","score":82.5,"functional":78.4,"quality":78.6,"passed":true,"tokens":4233,"timeMs":49337,"cost":0.06},{"modelName":"Claude Haiku 4.5","score":86,"functional":81.7,"quality":79.3,"passed":true,"tokens":3643,"timeMs":15720,"cost":0.0163},{"modelName":"DeepSeek v3.2","score":81.2,"functional":77.1,"quality":78.5,"passed":true,"tokens":3083,"timeMs":78126,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":91.6,"functional":87,"quality":81,"passed":true,"tokens":2868,"timeMs":26453,"cost":0.0298},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.4,"functional":85.9,"quality":81.1,"passed":true,"tokens":2554,"timeMs":57025,"cost":0.0012},{"modelName":"Grok 4","score":83.7,"functional":79.5,"quality":79.3,"passed":true,"tokens":2786,"timeMs":55740,"cost":0.034},{"modelName":"Grok 4.1 Fast","score":89.8,"functional":85.3,"quality":80.2,"passed":true,"tokens":3789,"timeMs":113084,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4601,"timeMs":79889,"cost":0.0259}]},{"taskId":"ai-integration/rag-chatbot/task-1","category":"ai-integration","subcategory":"rag-chatbot","results":[{"modelName":"GLM 4-Plus","score":81.6,"functional":77.5,"quality":78.6,"passed":true,"tokens":4970,"timeMs":114639,"cost":0.0054},{"modelName":"MiniMax M2.1","score":89.7,"functional":85.2,"quality":81.3,"passed":true,"tokens":16369,"timeMs":183284,"cost":0.0135},{"modelName":"GLM-4.7","score":82.5,"functional":78.4,"quality":79.7,"passed":true,"tokens":4024,"timeMs":68816,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":77.6,"functional":73.7,"quality":74,"passed":true,"tokens":2001,"timeMs":34915,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4504,"timeMs":41468,"cost":0.0631},{"modelName":"Claude Sonnet 4.5","score":85.8,"functional":81.5,"quality":79.8,"passed":true,"tokens":2900,"timeMs":53369,"cost":0.0381},{"modelName":"Claude Opus 4.5","score":83.9,"functional":79.7,"quality":79,"passed":true,"tokens":4115,"timeMs":36347,"cost":0.0669},{"modelName":"Claude Haiku 4.5","score":88.5,"functional":84,"quality":80,"passed":true,"tokens":4298,"timeMs":24316,"cost":0.019},{"modelName":"DeepSeek v3.2","score":82,"functional":77.9,"quality":78.8,"passed":true,"tokens":2660,"timeMs":69431,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":90.5,"functional":86,"quality":80.7,"passed":true,"tokens":2728,"timeMs":26827,"cost":0.0291},{"modelName":"GLM 4.7 Flash","score":84.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.6,"functional":87,"quality":81.4,"passed":true,"tokens":3020,"timeMs":50149,"cost":0.0012},{"modelName":"Grok 4","score":86.1,"functional":81.8,"quality":80,"passed":true,"tokens":2337,"timeMs":66136,"cost":0.0247},{"modelName":"Grok 4.1 Fast","score":89.3,"functional":84.8,"quality":80.1,"passed":true,"tokens":3566,"timeMs":66641,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":6977,"timeMs":145412,"cost":0.0399}]},{"taskId":"ai-integration/rag-chatbot/task-2","category":"ai-integration","subcategory":"rag-chatbot","results":[{"modelName":"GLM 4-Plus","score":81.2,"functional":77.2,"quality":78.5,"passed":true,"tokens":4695,"timeMs":111297,"cost":0.006},{"modelName":"MiniMax M2.1","score":90.4,"functional":85.9,"quality":81.5,"passed":true,"tokens":16504,"timeMs":138120,"cost":0.013},{"modelName":"GLM-4.7","score":80.1,"functional":76.1,"quality":79,"passed":true,"tokens":3993,"timeMs":53292,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":76.5,"functional":72.7,"quality":73.6,"passed":true,"tokens":2270,"timeMs":27653,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4504,"timeMs":41468,"cost":0.0631},{"modelName":"Claude Sonnet 4.5","score":83.5,"functional":79.3,"quality":79.1,"passed":true,"tokens":2997,"timeMs":35297,"cost":0.0363},{"modelName":"Claude Opus 4.5","score":86.2,"functional":81.9,"quality":79.7,"passed":true,"tokens":3361,"timeMs":38644,"cost":0.0706},{"modelName":"Claude Haiku 4.5","score":90.6,"functional":86,"quality":80.6,"passed":true,"tokens":4072,"timeMs":18303,"cost":0.0141},{"modelName":"DeepSeek v3.2","score":83.9,"functional":79.7,"quality":79.3,"passed":true,"tokens":3367,"timeMs":94781,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":88.4,"functional":84,"quality":80.1,"passed":true,"tokens":2214,"timeMs":29534,"cost":0.0234},{"modelName":"GLM 4.7 Flash","score":84.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.7,"functional":87.1,"quality":81.5,"passed":true,"tokens":3001,"timeMs":90283,"cost":0.001},{"modelName":"Grok 4","score":88.5,"functional":84.1,"quality":80.7,"passed":true,"tokens":2513,"timeMs":85742,"cost":0.0344},{"modelName":"Grok 4.1 Fast","score":87.7,"functional":83.3,"quality":79.6,"passed":true,"tokens":3607,"timeMs":108096,"cost":0.0015},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":6977,"timeMs":145412,"cost":0.0399}]},{"taskId":"ai-integration/rag-chatbot/task-3","category":"ai-integration","subcategory":"rag-chatbot","results":[{"modelName":"GLM 4-Plus","score":82,"functional":77.9,"quality":78.8,"passed":true,"tokens":3959,"timeMs":83774,"cost":0.0061},{"modelName":"MiniMax M2.1","score":89.9,"functional":85.4,"quality":81.4,"passed":true,"tokens":15645,"timeMs":126556,"cost":0.0151},{"modelName":"GLM-4.7","score":78.1,"functional":74.2,"quality":78.4,"passed":true,"tokens":3112,"timeMs":68749,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":76.7,"functional":72.8,"quality":73.7,"passed":true,"tokens":1923,"timeMs":27332,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4504,"timeMs":41468,"cost":0.0631},{"modelName":"Claude Sonnet 4.5","score":82,"functional":77.9,"quality":78.6,"passed":true,"tokens":2910,"timeMs":41301,"cost":0.0435},{"modelName":"Claude Opus 4.5","score":88.6,"functional":84.2,"quality":80.4,"passed":true,"tokens":3700,"timeMs":40016,"cost":0.0655},{"modelName":"Claude Haiku 4.5","score":91.8,"functional":87.2,"quality":81,"passed":true,"tokens":4394,"timeMs":23943,"cost":0.0191},{"modelName":"DeepSeek v3.2","score":86.3,"functional":82,"quality":80,"passed":true,"tokens":2833,"timeMs":69576,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":86,"functional":81.7,"quality":79.3,"passed":true,"tokens":2937,"timeMs":20182,"cost":0.0304},{"modelName":"GLM 4.7 Flash","score":84.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.5,"functional":86,"quality":81.1,"passed":true,"tokens":2350,"timeMs":62862,"cost":0.001},{"modelName":"Grok 4","score":90.3,"functional":85.7,"quality":81.3,"passed":true,"tokens":2759,"timeMs":94649,"cost":0.0278},{"modelName":"Grok 4.1 Fast","score":85.4,"functional":81.1,"quality":78.9,"passed":true,"tokens":3810,"timeMs":98134,"cost":0.0015},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":6977,"timeMs":145412,"cost":0.0399}]},{"taskId":"ai-integration/rag-chatbot/task-4","category":"ai-integration","subcategory":"rag-chatbot","results":[{"modelName":"GLM 4-Plus","score":83.9,"functional":79.7,"quality":79.3,"passed":true,"tokens":3824,"timeMs":72111,"cost":0.0052},{"modelName":"MiniMax M2.1","score":88.3,"functional":83.9,"quality":80.9,"passed":true,"tokens":14162,"timeMs":147281,"cost":0.0143},{"modelName":"GLM-4.7","score":77,"functional":73.1,"quality":78.1,"passed":true,"tokens":3182,"timeMs":50552,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":78,"functional":74.1,"quality":74.1,"passed":true,"tokens":2554,"timeMs":30772,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4504,"timeMs":41468,"cost":0.0631},{"modelName":"Claude Sonnet 4.5","score":81.6,"functional":77.5,"quality":78.5,"passed":true,"tokens":3595,"timeMs":30536,"cost":0.0367},{"modelName":"Claude Opus 4.5","score":90.7,"functional":86.2,"quality":81.1,"passed":true,"tokens":3059,"timeMs":45195,"cost":0.0686},{"modelName":"Claude Haiku 4.5","score":91.8,"functional":87.2,"quality":81,"passed":true,"tokens":3573,"timeMs":25174,"cost":0.0194},{"modelName":"DeepSeek v3.2","score":88.7,"functional":84.2,"quality":80.7,"passed":true,"tokens":3529,"timeMs":80187,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":83.7,"functional":79.5,"quality":78.6,"passed":true,"tokens":2203,"timeMs":34489,"cost":0.0308},{"modelName":"GLM 4.7 Flash","score":84.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.5,"functional":84,"quality":80.5,"passed":true,"tokens":2424,"timeMs":54077,"cost":0.0011},{"modelName":"Grok 4","score":91,"functional":86.4,"quality":81.5,"passed":true,"tokens":2189,"timeMs":93610,"cost":0.0297},{"modelName":"Grok 4.1 Fast","score":83,"functional":78.8,"quality":78.1,"passed":true,"tokens":2606,"timeMs":75174,"cost":0.0012},{"modelName":"Qwen3 Max","score":87.6,"functional":85,"quality":80,"passed":true,"tokens":6977,"timeMs":145412,"cost":0.0399}]},{"taskId":"ai-integration/structured-output/task-1","category":"ai-integration","subcategory":"structured-output","results":[{"modelName":"GLM 4-Plus","score":86.3,"functional":82,"quality":80,"passed":true,"tokens":5288,"timeMs":71019,"cost":0.0047},{"modelName":"MiniMax M2.1","score":86,"functional":81.7,"quality":80.2,"passed":true,"tokens":15741,"timeMs":210617,"cost":0.015},{"modelName":"GLM-4.7","score":77.1,"functional":73.3,"quality":78.1,"passed":true,"tokens":3181,"timeMs":67509,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":80.1,"functional":76.1,"quality":74.7,"passed":true,"tokens":2201,"timeMs":21919,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4578,"timeMs":42130,"cost":0.0641},{"modelName":"Claude Sonnet 4.5","score":82.4,"functional":78.3,"quality":78.8,"passed":true,"tokens":3176,"timeMs":50148,"cost":0.0354},{"modelName":"Claude Opus 4.5","score":92,"functional":87.4,"quality":81.4,"passed":true,"tokens":3421,"timeMs":42895,"cost":0.0812},{"modelName":"Claude Haiku 4.5","score":90.7,"functional":86.2,"quality":80.7,"passed":true,"tokens":4913,"timeMs":27456,"cost":0.0162},{"modelName":"DeepSeek v3.2","score":90.4,"functional":85.9,"quality":81.3,"passed":true,"tokens":3140,"timeMs":106320,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":82.2,"functional":78.1,"quality":78.2,"passed":true,"tokens":2837,"timeMs":28962,"cost":0.0241},{"modelName":"GLM 4.7 Flash","score":89.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86,"functional":81.7,"quality":79.8,"passed":true,"tokens":2397,"timeMs":70047,"cost":0.0014},{"modelName":"Grok 4","score":90.5,"functional":86,"quality":81.4,"passed":true,"tokens":3097,"timeMs":94380,"cost":0.0245},{"modelName":"Grok 4.1 Fast","score":81,"functional":76.9,"quality":77.5,"passed":true,"tokens":3002,"timeMs":110700,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4246,"timeMs":81593,"cost":0.0233}]},{"taskId":"ai-integration/structured-output/task-2","category":"ai-integration","subcategory":"structured-output","results":[{"modelName":"GLM 4-Plus","score":88.7,"functional":84.2,"quality":80.7,"passed":true,"tokens":4959,"timeMs":113736,"cost":0.0058},{"modelName":"MiniMax M2.1","score":83.6,"functional":79.4,"quality":79.4,"passed":true,"tokens":14193,"timeMs":177338,"cost":0.0153},{"modelName":"GLM-4.7","score":78.4,"functional":74.5,"quality":78.5,"passed":true,"tokens":2948,"timeMs":46226,"cost":0.0041},{"modelName":"Gemini 3 Flash","score":82.6,"functional":78.5,"quality":75.5,"passed":true,"tokens":2099,"timeMs":31107,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4578,"timeMs":42130,"cost":0.0641},{"modelName":"Claude Sonnet 4.5","score":84.2,"functional":80,"quality":79.3,"passed":true,"tokens":3046,"timeMs":34744,"cost":0.0421},{"modelName":"Claude Opus 4.5","score":92,"functional":87.4,"quality":81.5,"passed":true,"tokens":3198,"timeMs":38641,"cost":0.0567},{"modelName":"Claude Haiku 4.5","score":88.6,"functional":84.2,"quality":80.1,"passed":true,"tokens":4119,"timeMs":19574,"cost":0.0176},{"modelName":"DeepSeek v3.2","score":91.2,"functional":86.6,"quality":81.5,"passed":true,"tokens":3022,"timeMs":103001,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":81.8,"functional":77.7,"quality":78.1,"passed":true,"tokens":2284,"timeMs":28664,"cost":0.0231},{"modelName":"GLM 4.7 Flash","score":89.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83.7,"functional":79.6,"quality":79.1,"passed":true,"tokens":2424,"timeMs":89920,"cost":0.0014},{"modelName":"Grok 4","score":88.9,"functional":84.5,"quality":80.9,"passed":true,"tokens":2996,"timeMs":55505,"cost":0.0352},{"modelName":"Grok 4.1 Fast","score":79.9,"functional":75.9,"quality":77.2,"passed":true,"tokens":3537,"timeMs":75790,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4246,"timeMs":81593,"cost":0.0233}]},{"taskId":"ai-integration/structured-output/task-3","category":"ai-integration","subcategory":"structured-output","results":[{"modelName":"GLM 4-Plus","score":90.5,"functional":85.9,"quality":81.3,"passed":true,"tokens":4023,"timeMs":105821,"cost":0.0044},{"modelName":"MiniMax M2.1","score":81.6,"functional":77.5,"quality":78.8,"passed":true,"tokens":12597,"timeMs":117367,"cost":0.0149},{"modelName":"GLM-4.7","score":80.6,"functional":76.6,"quality":79.2,"passed":true,"tokens":3270,"timeMs":65230,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":84.8,"functional":80.6,"quality":76.1,"passed":true,"tokens":2025,"timeMs":24289,"cost":0.0038},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4578,"timeMs":42130,"cost":0.0641},{"modelName":"Claude Sonnet 4.5","score":86.6,"functional":82.3,"quality":80,"passed":true,"tokens":3416,"timeMs":35079,"cost":0.039},{"modelName":"Claude Opus 4.5","score":90.9,"functional":86.3,"quality":81.1,"passed":true,"tokens":3919,"timeMs":37718,"cost":0.0684},{"modelName":"Claude Haiku 4.5","score":86.2,"functional":81.9,"quality":79.3,"passed":true,"tokens":4989,"timeMs":27847,"cost":0.0177},{"modelName":"DeepSeek v3.2","score":90.7,"functional":86.2,"quality":81.4,"passed":true,"tokens":2839,"timeMs":66561,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":82.6,"functional":78.5,"quality":78.3,"passed":true,"tokens":2354,"timeMs":28401,"cost":0.0293},{"modelName":"GLM 4.7 Flash","score":89.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":82.2,"functional":78.1,"quality":78.6,"passed":true,"tokens":2778,"timeMs":67726,"cost":0.0014},{"modelName":"Grok 4","score":86.6,"functional":82.3,"quality":80.2,"passed":true,"tokens":2666,"timeMs":54311,"cost":0.0334},{"modelName":"Grok 4.1 Fast","score":80,"functional":76,"quality":77.3,"passed":true,"tokens":2684,"timeMs":102184,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4246,"timeMs":81593,"cost":0.0233}]},{"taskId":"ai-integration/structured-output/task-4","category":"ai-integration","subcategory":"structured-output","results":[{"modelName":"GLM 4-Plus","score":91.2,"functional":86.6,"quality":81.5,"passed":true,"tokens":4904,"timeMs":115571,"cost":0.0054},{"modelName":"MiniMax M2.1","score":80.5,"functional":76.5,"quality":78.5,"passed":true,"tokens":13109,"timeMs":148250,"cost":0.0107},{"modelName":"GLM-4.7","score":83.1,"functional":78.9,"quality":79.9,"passed":true,"tokens":3077,"timeMs":47794,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":86.2,"functional":81.9,"quality":76.5,"passed":true,"tokens":1952,"timeMs":36076,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4578,"timeMs":42130,"cost":0.0641},{"modelName":"Claude Sonnet 4.5","score":89,"functional":84.6,"quality":80.7,"passed":true,"tokens":4002,"timeMs":34072,"cost":0.0389},{"modelName":"Claude Opus 4.5","score":88.8,"functional":84.4,"quality":80.5,"passed":true,"tokens":3207,"timeMs":54581,"cost":0.0565},{"modelName":"Claude Haiku 4.5","score":83.9,"functional":79.7,"quality":78.6,"passed":true,"tokens":4235,"timeMs":24905,"cost":0.0146},{"modelName":"DeepSeek v3.2","score":89.1,"functional":84.7,"quality":80.9,"passed":true,"tokens":3063,"timeMs":72746,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":84.4,"functional":80.2,"quality":78.9,"passed":true,"tokens":2764,"timeMs":27661,"cost":0.0254},{"modelName":"GLM 4.7 Flash","score":89.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.8,"functional":77.7,"quality":78.5,"passed":true,"tokens":2992,"timeMs":69258,"cost":0.0011},{"modelName":"Grok 4","score":84.2,"functional":80,"quality":79.4,"passed":true,"tokens":2821,"timeMs":93691,"cost":0.0289},{"modelName":"Grok 4.1 Fast","score":81.3,"functional":77.3,"quality":77.7,"passed":true,"tokens":3593,"timeMs":101300,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4246,"timeMs":81593,"cost":0.0233}]},{"taskId":"ai-integration/advanced/task-1","category":"ai-integration","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":90.7,"functional":86.2,"quality":81.4,"passed":true,"tokens":4344,"timeMs":71100,"cost":0.0057},{"modelName":"MiniMax M2.1","score":80.7,"functional":76.6,"quality":78.6,"passed":true,"tokens":13937,"timeMs":168179,"cost":0.0133},{"modelName":"GLM-4.7","score":85.3,"functional":81,"quality":80.6,"passed":true,"tokens":3293,"timeMs":44164,"cost":0.0041},{"modelName":"Gemini 3 Flash","score":86.4,"functional":82.1,"quality":76.6,"passed":true,"tokens":2436,"timeMs":33216,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4232,"timeMs":41637,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":90.8,"functional":86.3,"quality":81.3,"passed":true,"tokens":3966,"timeMs":45370,"cost":0.0392},{"modelName":"Claude Opus 4.5","score":86.4,"functional":82,"quality":79.8,"passed":true,"tokens":3201,"timeMs":32844,"cost":0.0663},{"modelName":"Claude Haiku 4.5","score":82.4,"functional":78.3,"quality":78.2,"passed":true,"tokens":5281,"timeMs":17324,"cost":0.0193},{"modelName":"DeepSeek v3.2","score":86.8,"functional":82.5,"quality":80.2,"passed":true,"tokens":2752,"timeMs":87535,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":86.8,"functional":82.5,"quality":79.6,"passed":true,"tokens":2537,"timeMs":31253,"cost":0.0267},{"modelName":"GLM 4.7 Flash","score":89.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":82.6,"functional":78.5,"quality":78.8,"passed":true,"tokens":2489,"timeMs":60651,"cost":0.0014},{"modelName":"Grok 4","score":82.2,"functional":78.1,"quality":78.8,"passed":true,"tokens":2724,"timeMs":81539,"cost":0.0348},{"modelName":"Grok 4.1 Fast","score":83.5,"functional":79.3,"quality":78.3,"passed":true,"tokens":2774,"timeMs":106418,"cost":0.0014},{"modelName":"Qwen3 Max","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":5372,"timeMs":111442,"cost":0.0315}]},{"taskId":"ai-integration/advanced/task-2","category":"ai-integration","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":89.1,"functional":84.7,"quality":80.9,"passed":true,"tokens":4216,"timeMs":107228,"cost":0.0062},{"modelName":"MiniMax M2.1","score":82,"functional":77.9,"quality":79,"passed":true,"tokens":17915,"timeMs":147434,"cost":0.0126},{"modelName":"GLM-4.7","score":86.6,"functional":82.3,"quality":81,"passed":true,"tokens":3007,"timeMs":46402,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":85.4,"functional":81.1,"quality":76.3,"passed":true,"tokens":1772,"timeMs":21284,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4232,"timeMs":41637,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":91.6,"functional":87,"quality":81.5,"passed":true,"tokens":3475,"timeMs":45225,"cost":0.0334},{"modelName":"Claude Opus 4.5","score":84.1,"functional":79.9,"quality":79.1,"passed":true,"tokens":2931,"timeMs":50561,"cost":0.056},{"modelName":"Claude Haiku 4.5","score":82,"functional":77.9,"quality":78.1,"passed":true,"tokens":3562,"timeMs":23962,"cost":0.014},{"modelName":"DeepSeek v3.2","score":84.4,"functional":80.1,"quality":79.4,"passed":true,"tokens":3256,"timeMs":85759,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":89.2,"functional":84.8,"quality":80.3,"passed":true,"tokens":2462,"timeMs":31805,"cost":0.0322},{"modelName":"GLM 4.7 Flash","score":89.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":84.5,"functional":80.3,"quality":79.3,"passed":true,"tokens":2623,"timeMs":49311,"cost":0.0011},{"modelName":"Grok 4","score":81.1,"functional":77,"quality":78.5,"passed":true,"tokens":2296,"timeMs":68511,"cost":0.0354},{"modelName":"Grok 4.1 Fast","score":86,"functional":81.7,"quality":79,"passed":true,"tokens":3120,"timeMs":76200,"cost":0.0015},{"modelName":"Qwen3 Max","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":5372,"timeMs":111442,"cost":0.0315}]},{"taskId":"ai-integration/advanced/task-3","category":"ai-integration","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":86.8,"functional":82.5,"quality":80.2,"passed":true,"tokens":4933,"timeMs":68880,"cost":0.0043},{"modelName":"MiniMax M2.1","score":84.1,"functional":79.9,"quality":79.6,"passed":true,"tokens":13902,"timeMs":185416,"cost":0.012},{"modelName":"GLM-4.7","score":86.8,"functional":82.5,"quality":81,"passed":true,"tokens":3737,"timeMs":41404,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":83.4,"functional":79.2,"quality":75.7,"passed":true,"tokens":2326,"timeMs":24107,"cost":0.0038},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4232,"timeMs":41637,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":91.1,"functional":86.5,"quality":81.4,"passed":true,"tokens":3959,"timeMs":32112,"cost":0.0457},{"modelName":"Claude Opus 4.5","score":82.6,"functional":78.4,"quality":78.6,"passed":true,"tokens":3750,"timeMs":44975,"cost":0.0753},{"modelName":"Claude Haiku 4.5","score":82.8,"functional":78.7,"quality":78.3,"passed":true,"tokens":4096,"timeMs":24216,"cost":0.0146},{"modelName":"DeepSeek v3.2","score":82.3,"functional":78.2,"quality":78.8,"passed":true,"tokens":2826,"timeMs":87513,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":91,"functional":86.5,"quality":80.8,"passed":true,"tokens":2499,"timeMs":26480,"cost":0.031},{"modelName":"GLM 4.7 Flash","score":89.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.9,"functional":82.5,"quality":80,"passed":true,"tokens":2747,"timeMs":62776,"cost":0.0012},{"modelName":"Grok 4","score":81.2,"functional":77.2,"quality":78.6,"passed":true,"tokens":2376,"timeMs":72690,"cost":0.0286},{"modelName":"Grok 4.1 Fast","score":88.2,"functional":83.7,"quality":79.7,"passed":true,"tokens":3055,"timeMs":103197,"cost":0.0015},{"modelName":"Qwen3 Max","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":5372,"timeMs":111442,"cost":0.0315}]},{"taskId":"ai-integration/advanced/task-4","category":"ai-integration","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":84.4,"functional":80.1,"quality":79.4,"passed":true,"tokens":4624,"timeMs":111635,"cost":0.0048},{"modelName":"MiniMax M2.1","score":86.6,"functional":82.3,"quality":80.3,"passed":true,"tokens":13690,"timeMs":172891,"cost":0.0118},{"modelName":"GLM-4.7","score":85.8,"functional":81.5,"quality":80.7,"passed":true,"tokens":4075,"timeMs":59668,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":81,"functional":76.9,"quality":75,"passed":true,"tokens":2457,"timeMs":27278,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4232,"timeMs":41637,"cost":0.0593},{"modelName":"Claude Sonnet 4.5","score":89.5,"functional":85,"quality":80.9,"passed":true,"tokens":3807,"timeMs":31822,"cost":0.0445},{"modelName":"Claude Opus 4.5","score":82.2,"functional":78.1,"quality":78.5,"passed":true,"tokens":4101,"timeMs":37942,"cost":0.0587},{"modelName":"Claude Haiku 4.5","score":84.7,"functional":80.4,"quality":78.9,"passed":true,"tokens":5242,"timeMs":16268,"cost":0.0194},{"modelName":"DeepSeek v3.2","score":81.3,"functional":77.2,"quality":78.5,"passed":true,"tokens":3081,"timeMs":112241,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":91.7,"functional":87.2,"quality":81.1,"passed":true,"tokens":2164,"timeMs":31275,"cost":0.032},{"modelName":"GLM 4.7 Flash","score":89.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.3,"functional":84.8,"quality":80.7,"passed":true,"tokens":2441,"timeMs":51979,"cost":0.001},{"modelName":"Grok 4","score":82.5,"functional":78.4,"quality":79,"passed":true,"tokens":2263,"timeMs":76845,"cost":0.0244},{"modelName":"Grok 4.1 Fast","score":89.5,"functional":85,"quality":80.1,"passed":true,"tokens":3639,"timeMs":71640,"cost":0.0016},{"modelName":"Qwen3 Max","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":5372,"timeMs":111442,"cost":0.0315}]},{"taskId":"ai-integration/agents/task-1","category":"ai-integration","subcategory":"agents","results":[{"modelName":"GLM 4-Plus","score":82.4,"functional":78.2,"quality":78.8,"passed":true,"tokens":5274,"timeMs":94964,"cost":0.0044},{"modelName":"MiniMax M2.1","score":88.8,"functional":84.3,"quality":81,"passed":true,"tokens":18185,"timeMs":145975,"cost":0.0114},{"modelName":"GLM-4.7","score":83.9,"functional":79.7,"quality":80.2,"passed":true,"tokens":3926,"timeMs":64377,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":78.6,"functional":74.7,"quality":74.3,"passed":true,"tokens":2238,"timeMs":29426,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4544,"timeMs":39938,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":87.2,"functional":82.8,"quality":80.2,"passed":true,"tokens":3356,"timeMs":34852,"cost":0.0379},{"modelName":"Claude Opus 4.5","score":83,"functional":78.8,"quality":78.8,"passed":true,"tokens":3287,"timeMs":49563,"cost":0.062},{"modelName":"Claude Haiku 4.5","score":87.1,"functional":82.7,"quality":79.6,"passed":true,"tokens":4181,"timeMs":18490,"cost":0.0145},{"modelName":"DeepSeek v3.2","score":81.4,"functional":77.4,"quality":78.6,"passed":true,"tokens":3300,"timeMs":86982,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":91.3,"functional":86.7,"quality":80.9,"passed":true,"tokens":2765,"timeMs":24153,"cost":0.0228},{"modelName":"GLM 4.7 Flash","score":86.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.1,"functional":86.5,"quality":81.3,"passed":true,"tokens":2785,"timeMs":51130,"cost":0.0014},{"modelName":"Grok 4","score":84.7,"functional":80.5,"quality":79.6,"passed":true,"tokens":2175,"timeMs":60662,"cost":0.0256},{"modelName":"Grok 4.1 Fast","score":89.7,"functional":85.2,"quality":80.2,"passed":true,"tokens":3456,"timeMs":69984,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5135,"timeMs":103353,"cost":0.0287}]},{"taskId":"ai-integration/agents/task-2","category":"ai-integration","subcategory":"agents","results":[{"modelName":"GLM 4-Plus","score":81.3,"functional":77.2,"quality":78.5,"passed":true,"tokens":4237,"timeMs":99850,"cost":0.0044},{"modelName":"MiniMax M2.1","score":90.1,"functional":85.6,"quality":81.4,"passed":true,"tokens":12951,"timeMs":183698,"cost":0.0108},{"modelName":"GLM-4.7","score":81.4,"functional":77.4,"quality":79.4,"passed":true,"tokens":3290,"timeMs":61400,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":77,"functional":73.2,"quality":73.8,"passed":true,"tokens":2017,"timeMs":22724,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4544,"timeMs":39938,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":84.7,"functional":80.5,"quality":79.4,"passed":true,"tokens":3303,"timeMs":53828,"cost":0.0342},{"modelName":"Claude Opus 4.5","score":84.8,"functional":80.6,"quality":79.3,"passed":true,"tokens":3049,"timeMs":46349,"cost":0.0747},{"modelName":"Claude Haiku 4.5","score":89.4,"functional":85,"quality":80.3,"passed":true,"tokens":5071,"timeMs":18000,"cost":0.0193},{"modelName":"DeepSeek v3.2","score":82.7,"functional":78.6,"quality":79,"passed":true,"tokens":3039,"timeMs":82362,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":89.7,"functional":85.2,"quality":80.4,"passed":true,"tokens":2382,"timeMs":31253,"cost":0.0271},{"modelName":"GLM 4.7 Flash","score":86.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.8,"functional":87.2,"quality":81.5,"passed":true,"tokens":2700,"timeMs":90705,"cost":0.0014},{"modelName":"Grok 4","score":87.2,"functional":82.8,"quality":80.3,"passed":true,"tokens":2694,"timeMs":95836,"cost":0.0334},{"modelName":"Grok 4.1 Fast","score":88.7,"functional":84.3,"quality":79.9,"passed":true,"tokens":3350,"timeMs":74809,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5135,"timeMs":103353,"cost":0.0287}]},{"taskId":"ai-integration/agents/task-3","category":"ai-integration","subcategory":"agents","results":[{"modelName":"GLM 4-Plus","score":81.4,"functional":77.4,"quality":78.6,"passed":true,"tokens":5279,"timeMs":80270,"cost":0.0056},{"modelName":"MiniMax M2.1","score":90.4,"functional":85.8,"quality":81.5,"passed":true,"tokens":15071,"timeMs":205017,"cost":0.0141},{"modelName":"GLM-4.7","score":79.1,"functional":75.2,"quality":78.7,"passed":true,"tokens":3892,"timeMs":62529,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":76.4,"functional":72.6,"quality":73.6,"passed":true,"tokens":2529,"timeMs":20908,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4544,"timeMs":39938,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":82.7,"functional":78.6,"quality":78.8,"passed":true,"tokens":3190,"timeMs":47719,"cost":0.0462},{"modelName":"Claude Opus 4.5","score":87.2,"functional":82.9,"quality":80,"passed":true,"tokens":3332,"timeMs":56788,"cost":0.0665},{"modelName":"Claude Haiku 4.5","score":91.2,"functional":86.7,"quality":80.8,"passed":true,"tokens":4758,"timeMs":16555,"cost":0.0186},{"modelName":"DeepSeek v3.2","score":84.9,"functional":80.6,"quality":79.6,"passed":true,"tokens":2640,"timeMs":69939,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":87.4,"functional":83,"quality":79.7,"passed":true,"tokens":2310,"timeMs":28442,"cost":0.0283},{"modelName":"GLM 4.7 Flash","score":86.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.3,"functional":86.7,"quality":81.4,"passed":true,"tokens":2327,"timeMs":86295,"cost":0.001},{"modelName":"Grok 4","score":89.4,"functional":84.9,"quality":81,"passed":true,"tokens":2828,"timeMs":55839,"cost":0.0303},{"modelName":"Grok 4.1 Fast","score":86.8,"functional":82.4,"quality":79.3,"passed":true,"tokens":3809,"timeMs":70031,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5135,"timeMs":103353,"cost":0.0287}]},{"taskId":"ai-integration/agents/task-4","category":"ai-integration","subcategory":"agents","results":[{"modelName":"GLM 4-Plus","score":82.7,"functional":78.6,"quality":79,"passed":true,"tokens":4006,"timeMs":117440,"cost":0.0059},{"modelName":"MiniMax M2.1","score":89.4,"functional":84.9,"quality":81.2,"passed":true,"tokens":14597,"timeMs":143996,"cost":0.0153},{"modelName":"GLM-4.7","score":77.5,"functional":73.6,"quality":78.2,"passed":true,"tokens":2953,"timeMs":45061,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":77.1,"functional":73.3,"quality":73.8,"passed":true,"tokens":1853,"timeMs":33030,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4544,"timeMs":39938,"cost":0.0636},{"modelName":"Claude Sonnet 4.5","score":81.7,"functional":77.6,"quality":78.5,"passed":true,"tokens":3697,"timeMs":40110,"cost":0.0424},{"modelName":"Claude Opus 4.5","score":89.6,"functional":85.1,"quality":80.7,"passed":true,"tokens":4253,"timeMs":45881,"cost":0.0673},{"modelName":"Claude Haiku 4.5","score":92,"functional":87.4,"quality":81.1,"passed":true,"tokens":3748,"timeMs":21418,"cost":0.0163},{"modelName":"DeepSeek v3.2","score":87.4,"functional":83,"quality":80.3,"passed":true,"tokens":3527,"timeMs":99074,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":84.9,"functional":80.7,"quality":79,"passed":true,"tokens":3135,"timeMs":27523,"cost":0.0291},{"modelName":"GLM 4.7 Flash","score":86.4,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.7,"functional":85.2,"quality":80.9,"passed":true,"tokens":2595,"timeMs":73839,"cost":0.0012},{"modelName":"Grok 4","score":90.7,"functional":86.2,"quality":81.4,"passed":true,"tokens":2357,"timeMs":57920,"cost":0.0319},{"modelName":"Grok 4.1 Fast","score":84.3,"functional":80.1,"quality":78.6,"passed":true,"tokens":3130,"timeMs":95975,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5135,"timeMs":103353,"cost":0.0287}]},{"taskId":"ai-integration/fine-tuning/task-1","category":"ai-integration","subcategory":"fine-tuning","results":[{"modelName":"GLM 4-Plus","score":84.9,"functional":80.6,"quality":79.6,"passed":true,"tokens":5273,"timeMs":103810,"cost":0.0058},{"modelName":"MiniMax M2.1","score":87.4,"functional":83,"quality":80.6,"passed":true,"tokens":18297,"timeMs":155979,"cost":0.013},{"modelName":"GLM-4.7","score":76.9,"functional":73.1,"quality":78.1,"passed":true,"tokens":2940,"timeMs":47883,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":78.8,"functional":74.9,"quality":74.3,"passed":true,"tokens":2300,"timeMs":26166,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4474,"timeMs":40600,"cost":0.0627},{"modelName":"Claude Sonnet 4.5","score":81.8,"functional":77.7,"quality":78.6,"passed":true,"tokens":3191,"timeMs":41448,"cost":0.0411},{"modelName":"Claude Opus 4.5","score":91.4,"functional":86.8,"quality":81.3,"passed":true,"tokens":2883,"timeMs":55776,"cost":0.0818},{"modelName":"Claude Haiku 4.5","score":91.5,"functional":86.9,"quality":80.9,"passed":true,"tokens":3662,"timeMs":15323,"cost":0.0136},{"modelName":"DeepSeek v3.2","score":89.5,"functional":85.1,"quality":81,"passed":true,"tokens":3063,"timeMs":107950,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":82.9,"functional":78.8,"quality":78.4,"passed":true,"tokens":2435,"timeMs":32545,"cost":0.0312},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.4,"functional":83,"quality":80.2,"passed":true,"tokens":3136,"timeMs":74791,"cost":0.0012},{"modelName":"Grok 4","score":90.9,"functional":86.4,"quality":81.5,"passed":true,"tokens":2795,"timeMs":73215,"cost":0.0286},{"modelName":"Grok 4.1 Fast","score":82,"functional":77.9,"quality":77.9,"passed":true,"tokens":2609,"timeMs":97802,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5449,"timeMs":98475,"cost":0.0309}]},{"taskId":"ai-integration/fine-tuning/task-2","category":"ai-integration","subcategory":"fine-tuning","results":[{"modelName":"GLM 4-Plus","score":87.4,"functional":83,"quality":80.3,"passed":true,"tokens":5251,"timeMs":81421,"cost":0.0043},{"modelName":"MiniMax M2.1","score":85,"functional":80.7,"quality":79.9,"passed":true,"tokens":13682,"timeMs":203353,"cost":0.0123},{"modelName":"GLM-4.7","score":77.6,"functional":73.7,"quality":78.3,"passed":true,"tokens":3548,"timeMs":72465,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":81.2,"functional":77.1,"quality":75,"passed":true,"tokens":2215,"timeMs":23244,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4474,"timeMs":40600,"cost":0.0627},{"modelName":"Claude Sonnet 4.5","score":83.1,"functional":78.9,"quality":79,"passed":true,"tokens":3640,"timeMs":52231,"cost":0.046},{"modelName":"Claude Opus 4.5","score":92.1,"functional":87.5,"quality":81.5,"passed":true,"tokens":3650,"timeMs":42629,"cost":0.0655},{"modelName":"Claude Haiku 4.5","score":89.9,"functional":85.4,"quality":80.4,"passed":true,"tokens":5123,"timeMs":25874,"cost":0.0143},{"modelName":"DeepSeek v3.2","score":90.9,"functional":86.4,"quality":81.4,"passed":true,"tokens":2456,"timeMs":85377,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":81.8,"functional":77.8,"quality":78.1,"passed":true,"tokens":2730,"timeMs":26278,"cost":0.032},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85,"functional":80.7,"quality":79.4,"passed":true,"tokens":3320,"timeMs":49573,"cost":0.001},{"modelName":"Grok 4","score":89.9,"functional":85.4,"quality":81.2,"passed":true,"tokens":2881,"timeMs":95827,"cost":0.0326},{"modelName":"Grok 4.1 Fast","score":80.4,"functional":76.3,"quality":77.4,"passed":true,"tokens":3553,"timeMs":88033,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5449,"timeMs":98475,"cost":0.0309}]},{"taskId":"ai-integration/fine-tuning/task-3","category":"ai-integration","subcategory":"fine-tuning","results":[{"modelName":"GLM 4-Plus","score":89.6,"functional":85.1,"quality":81,"passed":true,"tokens":4422,"timeMs":81433,"cost":0.0045},{"modelName":"MiniMax M2.1","score":82.6,"functional":78.5,"quality":79.2,"passed":true,"tokens":13497,"timeMs":146788,"cost":0.0113},{"modelName":"GLM-4.7","score":79.3,"functional":75.3,"quality":78.8,"passed":true,"tokens":3368,"timeMs":58233,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":83.6,"functional":79.4,"quality":75.8,"passed":true,"tokens":2231,"timeMs":35371,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4474,"timeMs":40600,"cost":0.0627},{"modelName":"Claude Sonnet 4.5","score":85.2,"functional":81,"quality":79.6,"passed":true,"tokens":3193,"timeMs":49230,"cost":0.0437},{"modelName":"Claude Opus 4.5","score":91.7,"functional":87.1,"quality":81.4,"passed":true,"tokens":3840,"timeMs":54789,"cost":0.0703},{"modelName":"Claude Haiku 4.5","score":87.6,"functional":83.2,"quality":79.7,"passed":true,"tokens":5098,"timeMs":19054,"cost":0.0153},{"modelName":"DeepSeek v3.2","score":91.1,"functional":86.6,"quality":81.5,"passed":true,"tokens":2892,"timeMs":93539,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":82,"functional":77.9,"quality":78.1,"passed":true,"tokens":2777,"timeMs":32950,"cost":0.0254},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83,"functional":78.8,"quality":78.8,"passed":true,"tokens":2921,"timeMs":63756,"cost":0.0014},{"modelName":"Grok 4","score":88,"functional":83.6,"quality":80.6,"passed":true,"tokens":2530,"timeMs":59961,"cost":0.0254},{"modelName":"Grok 4.1 Fast","score":79.8,"functional":75.8,"quality":77.2,"passed":true,"tokens":2813,"timeMs":89045,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5449,"timeMs":98475,"cost":0.0309}]},{"taskId":"ai-integration/fine-tuning/task-4","category":"ai-integration","subcategory":"fine-tuning","results":[{"modelName":"GLM 4-Plus","score":90.9,"functional":86.4,"quality":81.4,"passed":true,"tokens":4725,"timeMs":68132,"cost":0.0052},{"modelName":"MiniMax M2.1","score":81,"functional":76.9,"quality":78.7,"passed":true,"tokens":15780,"timeMs":196762,"cost":0.0131},{"modelName":"GLM-4.7","score":81.7,"functional":77.6,"quality":79.5,"passed":true,"tokens":3764,"timeMs":70022,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":85.5,"functional":81.2,"quality":76.3,"passed":true,"tokens":2507,"timeMs":24820,"cost":0.0048},{"modelName":"Gemini 3 Pro Preview","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":4474,"timeMs":40600,"cost":0.0627},{"modelName":"Claude Sonnet 4.5","score":87.7,"functional":83.3,"quality":80.3,"passed":true,"tokens":2842,"timeMs":33068,"cost":0.0441},{"modelName":"Claude Opus 4.5","score":90.1,"functional":85.6,"quality":80.9,"passed":true,"tokens":3547,"timeMs":45789,"cost":0.0711},{"modelName":"Claude Haiku 4.5","score":85.1,"functional":80.9,"quality":79,"passed":true,"tokens":4169,"timeMs":25432,"cost":0.0154},{"modelName":"DeepSeek v3.2","score":90.1,"functional":85.6,"quality":81.2,"passed":true,"tokens":3614,"timeMs":107318,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":83.3,"functional":79.1,"quality":78.5,"passed":true,"tokens":3040,"timeMs":22034,"cost":0.0305},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.9,"functional":77.8,"quality":78.5,"passed":true,"tokens":2782,"timeMs":77085,"cost":0.001},{"modelName":"Grok 4","score":85.5,"functional":81.3,"quality":79.9,"passed":true,"tokens":2622,"timeMs":54064,"cost":0.0306},{"modelName":"Grok 4.1 Fast","score":80.5,"functional":76.4,"quality":77.4,"passed":true,"tokens":3366,"timeMs":68292,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":5449,"timeMs":98475,"cost":0.0309}]},{"taskId":"frontend/components/task-1","category":"frontend","subcategory":"components","results":[{"modelName":"GLM 4-Plus","score":96.1,"functional":91.3,"quality":81.5,"passed":true,"tokens":4868,"timeMs":72345,"cost":0.0053},{"modelName":"MiniMax M2.1","score":85.4,"functional":81.2,"quality":78.5,"passed":true,"tokens":13417,"timeMs":163897,"cost":0.0128},{"modelName":"GLM-4.7","score":89.1,"functional":84.6,"quality":80.2,"passed":true,"tokens":3969,"timeMs":71395,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":91.4,"functional":86.8,"quality":76.6,"passed":true,"tokens":2537,"timeMs":30992,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4484,"timeMs":38866,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":94.9,"functional":90.2,"quality":81,"passed":true,"tokens":4037,"timeMs":41621,"cost":0.0384},{"modelName":"Claude Opus 4.5","score":92.8,"functional":88.1,"quality":80.2,"passed":true,"tokens":4311,"timeMs":55884,"cost":0.0564},{"modelName":"Claude Haiku 4.5","score":88.1,"functional":83.7,"quality":78.4,"passed":true,"tokens":4947,"timeMs":22414,"cost":0.014},{"modelName":"DeepSeek v3.2","score":93.2,"functional":88.5,"quality":80.6,"passed":true,"tokens":3490,"timeMs":107814,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":90.4,"functional":85.9,"quality":79.2,"passed":true,"tokens":2452,"timeMs":34785,"cost":0.0245},{"modelName":"GLM 4.7 Flash","score":89,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87,"functional":82.7,"quality":78.6,"passed":true,"tokens":3066,"timeMs":84478,"cost":0.001},{"modelName":"Grok 4","score":88.2,"functional":83.8,"quality":79.2,"passed":true,"tokens":2355,"timeMs":95429,"cost":0.0256},{"modelName":"Grok 4.1 Fast","score":87.2,"functional":82.8,"quality":77.9,"passed":true,"tokens":3729,"timeMs":62414,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5901,"timeMs":97033,"cost":0.0335}]},{"taskId":"frontend/components/task-2","category":"frontend","subcategory":"components","results":[{"modelName":"GLM 4-Plus","score":95.1,"functional":90.4,"quality":81.2,"passed":true,"tokens":4104,"timeMs":104378,"cost":0.0043},{"modelName":"MiniMax M2.1","score":86.1,"functional":81.8,"quality":78.7,"passed":true,"tokens":17903,"timeMs":146982,"cost":0.0131},{"modelName":"GLM-4.7","score":91,"functional":86.4,"quality":80.8,"passed":true,"tokens":2955,"timeMs":67079,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":91.1,"functional":86.5,"quality":76.5,"passed":true,"tokens":1948,"timeMs":26531,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4484,"timeMs":38866,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":96.3,"functional":91.5,"quality":81.4,"passed":true,"tokens":3993,"timeMs":42131,"cost":0.0321},{"modelName":"Claude Opus 4.5","score":90.3,"functional":85.8,"quality":79.4,"passed":true,"tokens":4056,"timeMs":34051,"cost":0.0696},{"modelName":"Claude Haiku 4.5","score":87.1,"functional":82.7,"quality":78.1,"passed":true,"tokens":4449,"timeMs":15942,"cost":0.017},{"modelName":"DeepSeek v3.2","score":90.7,"functional":86.2,"quality":79.9,"passed":true,"tokens":3250,"timeMs":67377,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":92.9,"functional":88.3,"quality":79.9,"passed":true,"tokens":2264,"timeMs":31422,"cost":0.0287},{"modelName":"GLM 4.7 Flash","score":89,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.3,"functional":83.9,"quality":79,"passed":true,"tokens":3237,"timeMs":58066,"cost":0.0013},{"modelName":"Grok 4","score":86.6,"functional":82.2,"quality":78.7,"passed":true,"tokens":3192,"timeMs":70263,"cost":0.0333},{"modelName":"Grok 4.1 Fast","score":89.6,"functional":85.1,"quality":78.6,"passed":true,"tokens":3026,"timeMs":70362,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5901,"timeMs":97033,"cost":0.0335}]},{"taskId":"frontend/components/task-3","category":"frontend","subcategory":"components","results":[{"modelName":"GLM 4-Plus","score":93.2,"functional":88.5,"quality":80.6,"passed":true,"tokens":3859,"timeMs":89815,"cost":0.0059},{"modelName":"MiniMax M2.1","score":87.8,"functional":83.4,"quality":79.2,"passed":true,"tokens":16347,"timeMs":168426,"cost":0.0156},{"modelName":"GLM-4.7","score":91.9,"functional":87.3,"quality":81,"passed":true,"tokens":3676,"timeMs":64551,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":89.6,"functional":85.1,"quality":76.1,"passed":true,"tokens":2173,"timeMs":36156,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4484,"timeMs":38866,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":96.5,"functional":91.7,"quality":81.5,"passed":true,"tokens":3465,"timeMs":53798,"cost":0.0337},{"modelName":"Claude Opus 4.5","score":88.3,"functional":83.9,"quality":78.8,"passed":true,"tokens":3494,"timeMs":34207,"cost":0.0791},{"modelName":"Claude Haiku 4.5","score":87.2,"functional":82.8,"quality":78.1,"passed":true,"tokens":4101,"timeMs":16510,"cost":0.0174},{"modelName":"DeepSeek v3.2","score":88.4,"functional":84,"quality":79.2,"passed":true,"tokens":3538,"timeMs":114610,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":95.1,"functional":90.3,"quality":80.6,"passed":true,"tokens":2647,"timeMs":20676,"cost":0.0333},{"modelName":"GLM 4.7 Flash","score":89,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.5,"functional":86,"quality":79.6,"passed":true,"tokens":2357,"timeMs":64555,"cost":0.0009},{"modelName":"Grok 4","score":86,"functional":81.7,"quality":78.5,"passed":true,"tokens":3017,"timeMs":90399,"cost":0.0284},{"modelName":"Grok 4.1 Fast","score":92,"functional":87.4,"quality":79.4,"passed":true,"tokens":3482,"timeMs":79090,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5901,"timeMs":97033,"cost":0.0335}]},{"taskId":"frontend/components/task-4","category":"frontend","subcategory":"components","results":[{"modelName":"GLM 4-Plus","score":90.7,"functional":86.2,"quality":79.9,"passed":true,"tokens":3825,"timeMs":106656,"cost":0.0055},{"modelName":"MiniMax M2.1","score":90.2,"functional":85.7,"quality":79.9,"passed":true,"tokens":16485,"timeMs":209035,"cost":0.0155},{"modelName":"GLM-4.7","score":91.5,"functional":87,"quality":81,"passed":true,"tokens":4071,"timeMs":51980,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":87.4,"functional":83,"quality":75.4,"passed":true,"tokens":2333,"timeMs":26386,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4484,"timeMs":38866,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":95.5,"functional":90.7,"quality":81.2,"passed":true,"tokens":3709,"timeMs":45349,"cost":0.0425},{"modelName":"Claude Opus 4.5","score":87.2,"functional":82.9,"quality":78.5,"passed":true,"tokens":3975,"timeMs":36904,"cost":0.0716},{"modelName":"Claude Haiku 4.5","score":88.5,"functional":84.1,"quality":78.5,"passed":true,"tokens":5293,"timeMs":25352,"cost":0.018},{"modelName":"DeepSeek v3.2","score":86.8,"functional":82.4,"quality":78.7,"passed":true,"tokens":3453,"timeMs":101969,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":96.5,"functional":91.6,"quality":81,"passed":true,"tokens":3120,"timeMs":29010,"cost":0.0236},{"modelName":"GLM 4.7 Flash","score":89,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":93,"functional":88.3,"quality":80.3,"passed":true,"tokens":3380,"timeMs":52030,"cost":0.0014},{"modelName":"Grok 4","score":86.7,"functional":82.3,"quality":78.7,"passed":true,"tokens":2926,"timeMs":53166,"cost":0.0282},{"modelName":"Grok 4.1 Fast","score":93.9,"functional":89.2,"quality":79.9,"passed":true,"tokens":3865,"timeMs":67239,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5901,"timeMs":97033,"cost":0.0335}]},{"taskId":"frontend/components/task-5","category":"frontend","subcategory":"components","results":[{"modelName":"GLM 4-Plus","score":88.4,"functional":84,"quality":79.2,"passed":true,"tokens":5081,"timeMs":81615,"cost":0.005},{"modelName":"MiniMax M2.1","score":92.6,"functional":88,"quality":80.7,"passed":true,"tokens":15808,"timeMs":137681,"cost":0.0124},{"modelName":"GLM-4.7","score":90.1,"functional":85.6,"quality":80.5,"passed":true,"tokens":3246,"timeMs":57839,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":84.9,"functional":80.7,"quality":74.7,"passed":true,"tokens":2055,"timeMs":33314,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4484,"timeMs":38866,"cost":0.0628},{"modelName":"Claude Sonnet 4.5","score":93.5,"functional":88.9,"quality":80.6,"passed":true,"tokens":3036,"timeMs":37896,"cost":0.037},{"modelName":"Claude Opus 4.5","score":87.4,"functional":83,"quality":78.6,"passed":true,"tokens":4210,"timeMs":53533,"cost":0.0718},{"modelName":"Claude Haiku 4.5","score":90.7,"functional":86.1,"quality":79.2,"passed":true,"tokens":3556,"timeMs":17645,"cost":0.0175},{"modelName":"DeepSeek v3.2","score":86.2,"functional":81.9,"quality":78.5,"passed":true,"tokens":3156,"timeMs":65026,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":96.7,"functional":91.8,"quality":81,"passed":true,"tokens":2302,"timeMs":30042,"cost":0.029},{"modelName":"GLM 4.7 Flash","score":89,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.2,"functional":90.4,"quality":81,"passed":true,"tokens":2344,"timeMs":70692,"cost":0.0014},{"modelName":"Grok 4","score":88.4,"functional":84,"quality":79.2,"passed":true,"tokens":3049,"timeMs":68344,"cost":0.0351},{"modelName":"Grok 4.1 Fast","score":94.8,"functional":90,"quality":80.2,"passed":true,"tokens":2711,"timeMs":85940,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.4,"functional":85,"quality":80,"passed":true,"tokens":5901,"timeMs":97033,"cost":0.0335}]},{"taskId":"frontend/accessibility/task-1","category":"frontend","subcategory":"accessibility","results":[{"modelName":"GLM 4-Plus","score":86.8,"functional":82.4,"quality":78.7,"passed":true,"tokens":4779,"timeMs":115774,"cost":0.0052},{"modelName":"MiniMax M2.1","score":94.5,"functional":89.8,"quality":81.2,"passed":true,"tokens":16169,"timeMs":193453,"cost":0.0124},{"modelName":"GLM-4.7","score":87.8,"functional":83.5,"quality":79.8,"passed":true,"tokens":3489,"timeMs":58613,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":82.8,"functional":78.7,"quality":74,"passed":true,"tokens":2069,"timeMs":27349,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4429,"timeMs":39515,"cost":0.062},{"modelName":"Claude Sonnet 4.5","score":91.1,"functional":86.5,"quality":79.9,"passed":true,"tokens":3678,"timeMs":36169,"cost":0.0368},{"modelName":"Claude Opus 4.5","score":88.7,"functional":84.3,"quality":79,"passed":true,"tokens":4007,"timeMs":38666,"cost":0.0669},{"modelName":"Claude Haiku 4.5","score":93.1,"functional":88.5,"quality":79.9,"passed":true,"tokens":4358,"timeMs":22768,"cost":0.016},{"modelName":"DeepSeek v3.2","score":86.9,"functional":82.5,"quality":78.7,"passed":true,"tokens":3082,"timeMs":93906,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":95.7,"functional":90.9,"quality":80.7,"passed":true,"tokens":2421,"timeMs":33430,"cost":0.033},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":96.5,"functional":91.7,"quality":81.4,"passed":true,"tokens":3412,"timeMs":60344,"cost":0.001},{"modelName":"Grok 4","score":90.8,"functional":86.2,"quality":79.9,"passed":true,"tokens":2263,"timeMs":61759,"cost":0.0263},{"modelName":"Grok 4.1 Fast","score":94.4,"functional":89.7,"quality":80.1,"passed":true,"tokens":2888,"timeMs":88445,"cost":0.0012},{"modelName":"Qwen3 Max","score":87.7,"functional":85,"quality":80,"passed":true,"tokens":5810,"timeMs":139340,"cost":0.0332}]},{"taskId":"frontend/accessibility/task-2","category":"frontend","subcategory":"accessibility","results":[{"modelName":"GLM 4-Plus","score":86.2,"functional":81.9,"quality":78.5,"passed":true,"tokens":4429,"timeMs":110252,"cost":0.0049},{"modelName":"MiniMax M2.1","score":95.4,"functional":90.6,"quality":81.5,"passed":true,"tokens":16761,"timeMs":203365,"cost":0.0123},{"modelName":"GLM-4.7","score":85.4,"functional":81.1,"quality":79.1,"passed":true,"tokens":3088,"timeMs":50509,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":81.6,"functional":77.5,"quality":73.7,"passed":true,"tokens":2499,"timeMs":34269,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4429,"timeMs":39515,"cost":0.062},{"modelName":"Claude Sonnet 4.5","score":88.8,"functional":84.3,"quality":79.2,"passed":true,"tokens":3143,"timeMs":54359,"cost":0.0394},{"modelName":"Claude Opus 4.5","score":90.8,"functional":86.3,"quality":79.6,"passed":true,"tokens":3155,"timeMs":53650,"cost":0.0655},{"modelName":"Claude Haiku 4.5","score":95.3,"functional":90.6,"quality":80.6,"passed":true,"tokens":4701,"timeMs":27935,"cost":0.0175},{"modelName":"DeepSeek v3.2","score":88.6,"functional":84.2,"quality":79.2,"passed":true,"tokens":3054,"timeMs":112478,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":93.7,"functional":89,"quality":80.2,"passed":true,"tokens":2842,"timeMs":22984,"cost":0.0227},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":96.7,"functional":91.9,"quality":81.5,"passed":true,"tokens":3128,"timeMs":84360,"cost":0.0014},{"modelName":"Grok 4","score":93.2,"functional":88.5,"quality":80.7,"passed":true,"tokens":3170,"timeMs":72177,"cost":0.0347},{"modelName":"Grok 4.1 Fast","score":93,"functional":88.3,"quality":79.7,"passed":true,"tokens":3416,"timeMs":100722,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.7,"functional":85,"quality":80,"passed":true,"tokens":5810,"timeMs":139340,"cost":0.0332}]},{"taskId":"frontend/accessibility/task-3","category":"frontend","subcategory":"accessibility","results":[{"modelName":"GLM 4-Plus","score":86.9,"functional":82.5,"quality":78.7,"passed":true,"tokens":5088,"timeMs":108622,"cost":0.0058},{"modelName":"MiniMax M2.1","score":95.1,"functional":90.3,"quality":81.4,"passed":true,"tokens":18090,"timeMs":133814,"cost":0.0144},{"modelName":"GLM-4.7","score":83.3,"functional":79.1,"quality":78.5,"passed":true,"tokens":2796,"timeMs":52283,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":81.6,"functional":77.5,"quality":73.7,"passed":true,"tokens":2321,"timeMs":34100,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4429,"timeMs":39515,"cost":0.062},{"modelName":"Claude Sonnet 4.5","score":87.1,"functional":82.8,"quality":78.7,"passed":true,"tokens":3341,"timeMs":39896,"cost":0.037},{"modelName":"Claude Opus 4.5","score":93.3,"functional":88.6,"quality":80.3,"passed":true,"tokens":4317,"timeMs":48139,"cost":0.0606},{"modelName":"Claude Haiku 4.5","score":96.7,"functional":91.9,"quality":81,"passed":true,"tokens":4452,"timeMs":18559,"cost":0.0196},{"modelName":"DeepSeek v3.2","score":90.9,"functional":86.4,"quality":79.9,"passed":true,"tokens":3440,"timeMs":99520,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":91.3,"functional":86.7,"quality":79.4,"passed":true,"tokens":3013,"timeMs":30076,"cost":0.0236},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.7,"functional":91,"quality":81.2,"passed":true,"tokens":2636,"timeMs":66588,"cost":0.0013},{"modelName":"Grok 4","score":95.1,"functional":90.3,"quality":81.2,"passed":true,"tokens":2802,"timeMs":95617,"cost":0.0297},{"modelName":"Grok 4.1 Fast","score":90.7,"functional":86.2,"quality":79,"passed":true,"tokens":2994,"timeMs":109738,"cost":0.0015},{"modelName":"Qwen3 Max","score":87.7,"functional":85,"quality":80,"passed":true,"tokens":5810,"timeMs":139340,"cost":0.0332}]},{"taskId":"frontend/accessibility/task-4","category":"frontend","subcategory":"accessibility","results":[{"modelName":"GLM 4-Plus","score":88.6,"functional":84.2,"quality":79.2,"passed":true,"tokens":4553,"timeMs":105195,"cost":0.0044},{"modelName":"MiniMax M2.1","score":93.6,"functional":88.9,"quality":81,"passed":true,"tokens":14627,"timeMs":138073,"cost":0.013},{"modelName":"GLM-4.7","score":82.1,"functional":78,"quality":78.1,"passed":true,"tokens":3316,"timeMs":42959,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":82.7,"functional":78.6,"quality":74,"passed":true,"tokens":1750,"timeMs":22190,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4429,"timeMs":39515,"cost":0.062},{"modelName":"Claude Sonnet 4.5","score":86.6,"functional":82.2,"quality":78.5,"passed":true,"tokens":4014,"timeMs":54100,"cost":0.0448},{"modelName":"Claude Opus 4.5","score":95.5,"functional":90.7,"quality":81,"passed":true,"tokens":4089,"timeMs":44230,"cost":0.0551},{"modelName":"Claude Haiku 4.5","score":96.9,"functional":92.1,"quality":81,"passed":true,"tokens":3955,"timeMs":20645,"cost":0.015},{"modelName":"DeepSeek v3.2","score":93.4,"functional":88.7,"quality":80.7,"passed":true,"tokens":3442,"timeMs":89562,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":89,"functional":84.5,"quality":78.7,"passed":true,"tokens":2822,"timeMs":21739,"cost":0.0245},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":93.8,"functional":89.1,"quality":80.6,"passed":true,"tokens":2908,"timeMs":80972,"cost":0.0013},{"modelName":"Grok 4","score":96,"functional":91.2,"quality":81.5,"passed":true,"tokens":2501,"timeMs":91198,"cost":0.0336},{"modelName":"Grok 4.1 Fast","score":88.3,"functional":83.9,"quality":78.2,"passed":true,"tokens":3641,"timeMs":82260,"cost":0.0012},{"modelName":"Qwen3 Max","score":87.7,"functional":85,"quality":80,"passed":true,"tokens":5810,"timeMs":139340,"cost":0.0332}]},{"taskId":"frontend/accessibility/task-5","category":"frontend","subcategory":"accessibility","results":[{"modelName":"GLM 4-Plus","score":91,"functional":86.4,"quality":79.9,"passed":true,"tokens":5158,"timeMs":120923,"cost":0.0061},{"modelName":"MiniMax M2.1","score":91.4,"functional":86.8,"quality":80.3,"passed":true,"tokens":17411,"timeMs":212453,"cost":0.0131},{"modelName":"GLM-4.7","score":82,"functional":77.9,"quality":78.1,"passed":true,"tokens":3173,"timeMs":67561,"cost":0.004},{"modelName":"Gemini 3 Flash","score":84.8,"functional":80.6,"quality":74.6,"passed":true,"tokens":1938,"timeMs":31854,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4429,"timeMs":39515,"cost":0.062},{"modelName":"Claude Sonnet 4.5","score":87.2,"functional":82.9,"quality":78.7,"passed":true,"tokens":3491,"timeMs":37866,"cost":0.0365},{"modelName":"Claude Opus 4.5","score":96.9,"functional":92,"quality":81.4,"passed":true,"tokens":3272,"timeMs":53266,"cost":0.0808},{"modelName":"Claude Haiku 4.5","score":95.9,"functional":91.1,"quality":80.7,"passed":true,"tokens":4515,"timeMs":20932,"cost":0.015},{"modelName":"DeepSeek v3.2","score":95.3,"functional":90.5,"quality":81.2,"passed":true,"tokens":2819,"timeMs":78757,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":87.3,"functional":82.9,"quality":78.2,"passed":true,"tokens":3147,"timeMs":31073,"cost":0.0272},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.3,"functional":86.8,"quality":79.9,"passed":true,"tokens":3308,"timeMs":58248,"cost":0.0014},{"modelName":"Grok 4","score":95.6,"functional":90.9,"quality":81.4,"passed":true,"tokens":2677,"timeMs":72394,"cost":0.0307},{"modelName":"Grok 4.1 Fast","score":86.2,"functional":81.9,"quality":77.6,"passed":true,"tokens":3324,"timeMs":114755,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.7,"functional":85,"quality":80,"passed":true,"tokens":5810,"timeMs":139340,"cost":0.0332}]},{"taskId":"frontend/animation/task-1","category":"frontend","subcategory":"animation","results":[{"modelName":"GLM 4-Plus","score":93.4,"functional":88.7,"quality":80.7,"passed":true,"tokens":4621,"timeMs":93250,"cost":0.0053},{"modelName":"MiniMax M2.1","score":88.9,"functional":84.5,"quality":79.5,"passed":true,"tokens":12395,"timeMs":132845,"cost":0.0142},{"modelName":"GLM-4.7","score":83.2,"functional":79,"quality":78.5,"passed":true,"tokens":3227,"timeMs":64157,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":87.3,"functional":82.9,"quality":75.4,"passed":true,"tokens":2062,"timeMs":27399,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4412,"timeMs":42453,"cost":0.0618},{"modelName":"Claude Sonnet 4.5","score":89,"functional":84.5,"quality":79.2,"passed":true,"tokens":3955,"timeMs":50497,"cost":0.0373},{"modelName":"Claude Opus 4.5","score":97.1,"functional":92.2,"quality":81.5,"passed":true,"tokens":3306,"timeMs":38427,"cost":0.0589},{"modelName":"Claude Haiku 4.5","score":93.9,"functional":89.3,"quality":80.2,"passed":true,"tokens":4166,"timeMs":22681,"cost":0.0151},{"modelName":"DeepSeek v3.2","score":96.2,"functional":91.3,"quality":81.5,"passed":true,"tokens":2883,"timeMs":108792,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":86.8,"functional":82.4,"quality":78.1,"passed":true,"tokens":3169,"timeMs":27034,"cost":0.0274},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89,"functional":84.6,"quality":79.2,"passed":true,"tokens":2438,"timeMs":51261,"cost":0.001},{"modelName":"Grok 4","score":94.2,"functional":89.5,"quality":81,"passed":true,"tokens":2761,"timeMs":65888,"cost":0.025},{"modelName":"Grok 4.1 Fast","score":85,"functional":80.7,"quality":77.3,"passed":true,"tokens":2658,"timeMs":92407,"cost":0.0015},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2802,"timeMs":52457,"cost":0.0153}]},{"taskId":"frontend/animation/task-2","category":"frontend","subcategory":"animation","results":[{"modelName":"GLM 4-Plus","score":95.3,"functional":90.5,"quality":81.2,"passed":true,"tokens":4227,"timeMs":120138,"cost":0.0042},{"modelName":"MiniMax M2.1","score":86.8,"functional":82.5,"quality":78.9,"passed":true,"tokens":14187,"timeMs":158345,"cost":0.012},{"modelName":"GLM-4.7","score":85.3,"functional":81,"quality":79.1,"passed":true,"tokens":3338,"timeMs":49469,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":89.5,"functional":85.1,"quality":76,"passed":true,"tokens":1858,"timeMs":20215,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4412,"timeMs":42453,"cost":0.0618},{"modelName":"Claude Sonnet 4.5","score":91.3,"functional":86.8,"quality":79.9,"passed":true,"tokens":3744,"timeMs":29962,"cost":0.0412},{"modelName":"Claude Opus 4.5","score":96.1,"functional":91.3,"quality":81.2,"passed":true,"tokens":3240,"timeMs":46304,"cost":0.0788},{"modelName":"Claude Haiku 4.5","score":91.5,"functional":86.9,"quality":79.4,"passed":true,"tokens":3730,"timeMs":19842,"cost":0.0174},{"modelName":"DeepSeek v3.2","score":95.8,"functional":91,"quality":81.4,"passed":true,"tokens":2864,"timeMs":103265,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":87.4,"functional":83,"quality":78.3,"passed":true,"tokens":2920,"timeMs":21749,"cost":0.0306},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.4,"functional":83,"quality":78.7,"passed":true,"tokens":3245,"timeMs":79793,"cost":0.0013},{"modelName":"Grok 4","score":91.9,"functional":87.3,"quality":80.3,"passed":true,"tokens":2667,"timeMs":85497,"cost":0.0301},{"modelName":"Grok 4.1 Fast","score":84.9,"functional":80.7,"quality":77.2,"passed":true,"tokens":3856,"timeMs":77240,"cost":0.0013},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2802,"timeMs":52457,"cost":0.0153}]},{"taskId":"frontend/animation/task-3","category":"frontend","subcategory":"animation","results":[{"modelName":"GLM 4-Plus","score":96.2,"functional":91.4,"quality":81.5,"passed":true,"tokens":4295,"timeMs":124976,"cost":0.0055},{"modelName":"MiniMax M2.1","score":85.6,"functional":81.3,"quality":78.6,"passed":true,"tokens":17474,"timeMs":128665,"cost":0.0116},{"modelName":"GLM-4.7","score":87.7,"functional":83.3,"quality":79.8,"passed":true,"tokens":2931,"timeMs":42344,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":91,"functional":86.5,"quality":76.5,"passed":true,"tokens":2277,"timeMs":19637,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4412,"timeMs":42453,"cost":0.0618},{"modelName":"Claude Sonnet 4.5","score":93.7,"functional":89.1,"quality":80.7,"passed":true,"tokens":3749,"timeMs":52065,"cost":0.0396},{"modelName":"Claude Opus 4.5","score":94.1,"functional":89.4,"quality":80.6,"passed":true,"tokens":3784,"timeMs":35872,"cost":0.0795},{"modelName":"Claude Haiku 4.5","score":89.2,"functional":84.7,"quality":78.7,"passed":true,"tokens":4266,"timeMs":17036,"cost":0.0144},{"modelName":"DeepSeek v3.2","score":94.4,"functional":89.7,"quality":81,"passed":true,"tokens":2988,"timeMs":109380,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":89.1,"functional":84.7,"quality":78.8,"passed":true,"tokens":2794,"timeMs":32781,"cost":0.0313},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.8,"functional":82.5,"quality":78.5,"passed":true,"tokens":2752,"timeMs":70272,"cost":0.0011},{"modelName":"Grok 4","score":89.5,"functional":85,"quality":79.5,"passed":true,"tokens":2910,"timeMs":63529,"cost":0.025},{"modelName":"Grok 4.1 Fast","score":86.1,"functional":81.8,"quality":77.6,"passed":true,"tokens":2616,"timeMs":78060,"cost":0.0011},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2802,"timeMs":52457,"cost":0.0153}]},{"taskId":"frontend/animation/task-4","category":"frontend","subcategory":"animation","results":[{"modelName":"GLM 4-Plus","score":95.8,"functional":91.1,"quality":81.4,"passed":true,"tokens":4875,"timeMs":79075,"cost":0.0048},{"modelName":"MiniMax M2.1","score":85.6,"functional":81.3,"quality":78.5,"passed":true,"tokens":16607,"timeMs":139605,"cost":0.0118},{"modelName":"GLM-4.7","score":90,"functional":85.5,"quality":80.5,"passed":true,"tokens":2789,"timeMs":51268,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":91.4,"functional":86.8,"quality":76.6,"passed":true,"tokens":2103,"timeMs":24182,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4412,"timeMs":42453,"cost":0.0618},{"modelName":"Claude Sonnet 4.5","score":95.6,"functional":90.8,"quality":81.2,"passed":true,"tokens":3726,"timeMs":49887,"cost":0.0331},{"modelName":"Claude Opus 4.5","score":91.7,"functional":87.1,"quality":79.9,"passed":true,"tokens":2986,"timeMs":33871,"cost":0.0802},{"modelName":"Claude Haiku 4.5","score":87.5,"functional":83.2,"quality":78.2,"passed":true,"tokens":5202,"timeMs":15595,"cost":0.0176},{"modelName":"DeepSeek v3.2","score":92.1,"functional":87.5,"quality":80.3,"passed":true,"tokens":3235,"timeMs":68840,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":91.5,"functional":86.9,"quality":79.5,"passed":true,"tokens":2869,"timeMs":35310,"cost":0.0228},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.5,"functional":83.1,"quality":78.7,"passed":true,"tokens":2805,"timeMs":74103,"cost":0.0012},{"modelName":"Grok 4","score":87.4,"functional":83,"quality":78.9,"passed":true,"tokens":3167,"timeMs":56673,"cost":0.0291},{"modelName":"Grok 4.1 Fast","score":88.2,"functional":83.8,"quality":78.2,"passed":true,"tokens":3664,"timeMs":86891,"cost":0.0014},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2802,"timeMs":52457,"cost":0.0153}]},{"taskId":"frontend/animation/task-5","category":"frontend","subcategory":"animation","results":[{"modelName":"GLM 4-Plus","score":94.4,"functional":89.7,"quality":81,"passed":true,"tokens":3832,"timeMs":70094,"cost":0.0045},{"modelName":"MiniMax M2.1","score":86.7,"functional":82.4,"quality":78.9,"passed":true,"tokens":17950,"timeMs":158888,"cost":0.0144},{"modelName":"GLM-4.7","score":91.5,"functional":86.9,"quality":80.9,"passed":true,"tokens":2877,"timeMs":50293,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":90.6,"functional":86,"quality":76.4,"passed":true,"tokens":1871,"timeMs":34158,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4412,"timeMs":42453,"cost":0.0618},{"modelName":"Claude Sonnet 4.5","score":96.5,"functional":91.7,"quality":81.5,"passed":true,"tokens":3047,"timeMs":32479,"cost":0.0423},{"modelName":"Claude Opus 4.5","score":89.4,"functional":84.9,"quality":79.2,"passed":true,"tokens":3567,"timeMs":45595,"cost":0.0615},{"modelName":"Claude Haiku 4.5","score":87,"functional":82.6,"quality":78.1,"passed":true,"tokens":4888,"timeMs":17038,"cost":0.0149},{"modelName":"DeepSeek v3.2","score":89.7,"functional":85.2,"quality":79.5,"passed":true,"tokens":2833,"timeMs":90311,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":93.9,"functional":89.2,"quality":80.2,"passed":true,"tokens":3198,"timeMs":32098,"cost":0.0299},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.2,"functional":84.7,"quality":79.2,"passed":true,"tokens":2864,"timeMs":70502,"cost":0.0014},{"modelName":"Grok 4","score":86.2,"functional":81.9,"quality":78.6,"passed":true,"tokens":3186,"timeMs":74667,"cost":0.0347},{"modelName":"Grok 4.1 Fast","score":90.6,"functional":86.1,"quality":79,"passed":true,"tokens":3376,"timeMs":101477,"cost":0.0015},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2802,"timeMs":52457,"cost":0.0153}]},{"taskId":"frontend/performance/task-1","category":"frontend","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":92.1,"functional":87.5,"quality":80.3,"passed":true,"tokens":4792,"timeMs":68410,"cost":0.0059},{"modelName":"MiniMax M2.1","score":88.8,"functional":84.4,"quality":79.5,"passed":true,"tokens":13505,"timeMs":159390,"cost":0.0144},{"modelName":"GLM-4.7","score":91.9,"functional":87.3,"quality":81.1,"passed":true,"tokens":3662,"timeMs":42732,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":88.7,"functional":84.3,"quality":75.8,"passed":true,"tokens":1807,"timeMs":21307,"cost":0.0045},{"modelName":"Gemini 3 Pro Preview","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":41379,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":96.2,"functional":91.4,"quality":81.4,"passed":true,"tokens":2806,"timeMs":48597,"cost":0.0323},{"modelName":"Claude Opus 4.5","score":87.7,"functional":83.3,"quality":78.7,"passed":true,"tokens":4010,"timeMs":41404,"cost":0.0725},{"modelName":"Claude Haiku 4.5","score":87.6,"functional":83.3,"quality":78.3,"passed":true,"tokens":4848,"timeMs":27856,"cost":0.0147},{"modelName":"DeepSeek v3.2","score":87.6,"functional":83.2,"quality":78.9,"passed":true,"tokens":2480,"timeMs":100516,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":95.8,"functional":91,"quality":80.8,"passed":true,"tokens":2755,"timeMs":32567,"cost":0.0259},{"modelName":"GLM 4.7 Flash","score":80.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.6,"functional":87,"quality":79.9,"passed":true,"tokens":3337,"timeMs":59183,"cost":0.0013},{"modelName":"Grok 4","score":86.1,"functional":81.8,"quality":78.5,"passed":true,"tokens":2659,"timeMs":88990,"cost":0.0278},{"modelName":"Grok 4.1 Fast","score":92.9,"functional":88.3,"quality":79.6,"passed":true,"tokens":3066,"timeMs":85616,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.8,"functional":85,"quality":80,"passed":true,"tokens":3113,"timeMs":65638,"cost":0.0171}]},{"taskId":"frontend/performance/task-2","category":"frontend","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":89.7,"functional":85.2,"quality":79.5,"passed":true,"tokens":3779,"timeMs":119093,"cost":0.0051},{"modelName":"MiniMax M2.1","score":91.3,"functional":86.7,"quality":80.3,"passed":true,"tokens":13822,"timeMs":133230,"cost":0.0128},{"modelName":"GLM-4.7","score":91,"functional":86.5,"quality":80.8,"passed":true,"tokens":4113,"timeMs":60450,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":86.3,"functional":82,"quality":75.1,"passed":true,"tokens":2510,"timeMs":32607,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":41379,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":94.7,"functional":90,"quality":81,"passed":true,"tokens":2922,"timeMs":48512,"cost":0.039},{"modelName":"Claude Opus 4.5","score":87.2,"functional":82.8,"quality":78.5,"passed":true,"tokens":4019,"timeMs":38214,"cost":0.0568},{"modelName":"Claude Haiku 4.5","score":89.4,"functional":84.9,"quality":78.8,"passed":true,"tokens":4745,"timeMs":25481,"cost":0.0169},{"modelName":"DeepSeek v3.2","score":86.4,"functional":82,"quality":78.6,"passed":true,"tokens":3537,"timeMs":83678,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":96.7,"functional":91.9,"quality":81,"passed":true,"tokens":2338,"timeMs":35264,"cost":0.0279},{"modelName":"GLM 4.7 Flash","score":80.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94,"functional":89.3,"quality":80.7,"passed":true,"tokens":2742,"timeMs":81188,"cost":0.0011},{"modelName":"Grok 4","score":87.3,"functional":82.9,"quality":78.9,"passed":true,"tokens":2500,"timeMs":79012,"cost":0.0257},{"modelName":"Grok 4.1 Fast","score":94.4,"functional":89.7,"quality":80.1,"passed":true,"tokens":3831,"timeMs":99621,"cost":0.0016},{"modelName":"Qwen3 Max","score":87.8,"functional":85,"quality":80,"passed":true,"tokens":3113,"timeMs":65638,"cost":0.0171}]},{"taskId":"frontend/performance/task-3","category":"frontend","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":87.6,"functional":83.2,"quality":78.9,"passed":true,"tokens":4467,"timeMs":98239,"cost":0.0054},{"modelName":"MiniMax M2.1","score":93.5,"functional":88.8,"quality":80.9,"passed":true,"tokens":12665,"timeMs":165276,"cost":0.0134},{"modelName":"GLM-4.7","score":89.2,"functional":84.7,"quality":80.2,"passed":true,"tokens":4022,"timeMs":43701,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":83.9,"functional":79.7,"quality":74.4,"passed":true,"tokens":2000,"timeMs":31542,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":41379,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":92.5,"functional":87.9,"quality":80.3,"passed":true,"tokens":3353,"timeMs":34143,"cost":0.0414},{"modelName":"Claude Opus 4.5","score":87.8,"functional":83.4,"quality":78.7,"passed":true,"tokens":3130,"timeMs":54659,"cost":0.0701},{"modelName":"Claude Haiku 4.5","score":91.7,"functional":87.1,"quality":79.5,"passed":true,"tokens":3791,"timeMs":24365,"cost":0.0163},{"modelName":"DeepSeek v3.2","score":86.3,"functional":82,"quality":78.5,"passed":true,"tokens":3286,"timeMs":95820,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":96.4,"functional":91.6,"quality":81,"passed":true,"tokens":2728,"timeMs":28132,"cost":0.0307},{"modelName":"GLM 4.7 Flash","score":80.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":95.9,"functional":91.1,"quality":81.2,"passed":true,"tokens":3022,"timeMs":56023,"cost":0.0013},{"modelName":"Grok 4","score":89.4,"functional":84.9,"quality":79.5,"passed":true,"tokens":2820,"timeMs":60642,"cost":0.0255},{"modelName":"Grok 4.1 Fast","score":94.8,"functional":90,"quality":80.2,"passed":true,"tokens":2914,"timeMs":84705,"cost":0.0016},{"modelName":"Qwen3 Max","score":87.8,"functional":85,"quality":80,"passed":true,"tokens":3113,"timeMs":65638,"cost":0.0171}]},{"taskId":"frontend/performance/task-4","category":"frontend","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":86.4,"functional":82.1,"quality":78.6,"passed":true,"tokens":4909,"timeMs":73740,"cost":0.0053},{"modelName":"MiniMax M2.1","score":95,"functional":90.3,"quality":81.4,"passed":true,"tokens":18286,"timeMs":176415,"cost":0.0136},{"modelName":"GLM-4.7","score":86.8,"functional":82.4,"quality":79.5,"passed":true,"tokens":3695,"timeMs":63851,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":82.2,"functional":78.1,"quality":73.8,"passed":true,"tokens":1845,"timeMs":26895,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":41379,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":90,"functional":85.5,"quality":79.5,"passed":true,"tokens":3648,"timeMs":35850,"cost":0.0398},{"modelName":"Claude Opus 4.5","score":89.5,"functional":85.1,"quality":79.2,"passed":true,"tokens":4006,"timeMs":41763,"cost":0.0758},{"modelName":"Claude Haiku 4.5","score":94.2,"functional":89.4,"quality":80.2,"passed":true,"tokens":4394,"timeMs":19411,"cost":0.0139},{"modelName":"DeepSeek v3.2","score":87.5,"functional":83.1,"quality":78.9,"passed":true,"tokens":2520,"timeMs":101937,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":94.9,"functional":90.2,"quality":80.5,"passed":true,"tokens":3221,"timeMs":36033,"cost":0.0267},{"modelName":"GLM 4.7 Flash","score":80.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":96.8,"functional":91.9,"quality":81.5,"passed":true,"tokens":3254,"timeMs":76565,"cost":0.0014},{"modelName":"Grok 4","score":91.8,"functional":87.2,"quality":80.3,"passed":true,"tokens":2931,"timeMs":81952,"cost":0.026},{"modelName":"Grok 4.1 Fast","score":93.9,"functional":89.2,"quality":79.9,"passed":true,"tokens":2778,"timeMs":106587,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.8,"functional":85,"quality":80,"passed":true,"tokens":3113,"timeMs":65638,"cost":0.0171}]},{"taskId":"frontend/performance/task-5","category":"frontend","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":86.3,"functional":82,"quality":78.5,"passed":true,"tokens":4831,"timeMs":73963,"cost":0.0053},{"modelName":"MiniMax M2.1","score":95.4,"functional":90.6,"quality":81.5,"passed":true,"tokens":13376,"timeMs":126785,"cost":0.0125},{"modelName":"GLM-4.7","score":84.4,"functional":80.2,"quality":78.8,"passed":true,"tokens":3692,"timeMs":52885,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":81.4,"functional":77.4,"quality":73.6,"passed":true,"tokens":2303,"timeMs":31597,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":41379,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":87.9,"functional":83.5,"quality":78.9,"passed":true,"tokens":3849,"timeMs":47956,"cost":0.0321},{"modelName":"Claude Opus 4.5","score":91.9,"functional":87.3,"quality":79.9,"passed":true,"tokens":3580,"timeMs":54039,"cost":0.0679},{"modelName":"Claude Haiku 4.5","score":96,"functional":91.2,"quality":80.8,"passed":true,"tokens":4500,"timeMs":22775,"cost":0.0172},{"modelName":"DeepSeek v3.2","score":89.6,"functional":85.1,"quality":79.5,"passed":true,"tokens":2858,"timeMs":91323,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":92.7,"functional":88.1,"quality":79.8,"passed":true,"tokens":2362,"timeMs":27687,"cost":0.0289},{"modelName":"GLM 4.7 Flash","score":80.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":96.4,"functional":91.6,"quality":81.4,"passed":true,"tokens":2912,"timeMs":66210,"cost":0.0012},{"modelName":"Grok 4","score":94.1,"functional":89.4,"quality":80.9,"passed":true,"tokens":2886,"timeMs":74163,"cost":0.0307},{"modelName":"Grok 4.1 Fast","score":92.1,"functional":87.5,"quality":79.4,"passed":true,"tokens":3518,"timeMs":112024,"cost":0.0014},{"modelName":"Qwen3 Max","score":87.8,"functional":85,"quality":80,"passed":true,"tokens":3113,"timeMs":65638,"cost":0.0171}]},{"taskId":"frontend/forms/task-1","category":"frontend","subcategory":"forms","results":[{"modelName":"GLM 4-Plus","score":87.5,"functional":83.1,"quality":78.9,"passed":true,"tokens":4369,"timeMs":121997,"cost":0.0057},{"modelName":"MiniMax M2.1","score":94.6,"functional":89.8,"quality":81.2,"passed":true,"tokens":12503,"timeMs":208125,"cost":0.0159},{"modelName":"GLM-4.7","score":82.6,"functional":78.5,"quality":78.3,"passed":true,"tokens":2796,"timeMs":53694,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":82,"functional":77.9,"quality":73.8,"passed":true,"tokens":2318,"timeMs":31669,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":82.6,"functional":78.5,"quality":78.5,"passed":true,"tokens":3278,"timeMs":40223,"cost":0.0356},{"modelName":"Claude Sonnet 4.5","score":86.7,"functional":82.4,"quality":78.6,"passed":true,"tokens":3350,"timeMs":40584,"cost":0.0437},{"modelName":"Claude Opus 4.5","score":94.3,"functional":89.6,"quality":80.7,"passed":true,"tokens":2958,"timeMs":41035,"cost":0.0658},{"modelName":"Claude Haiku 4.5","score":96.9,"functional":92.1,"quality":81,"passed":true,"tokens":5250,"timeMs":17652,"cost":0.0141},{"modelName":"DeepSeek v3.2","score":92,"functional":87.4,"quality":80.3,"passed":true,"tokens":3273,"timeMs":94785,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":90.2,"functional":85.7,"quality":79.1,"passed":true,"tokens":2832,"timeMs":26363,"cost":0.0334},{"modelName":"GLM 4.7 Flash","score":64.1,"functional":60.9,"quality":81.2,"passed":true,"tokens":1242,"timeMs":6308,"cost":0.0004},{"modelName":"Grok 4 Fast","score":95,"functional":90.2,"quality":81,"passed":true,"tokens":2814,"timeMs":57904,"cost":0.0011},{"modelName":"Grok 4","score":95.6,"functional":90.8,"quality":81.4,"passed":true,"tokens":2804,"timeMs":78610,"cost":0.031},{"modelName":"Grok 4.1 Fast","score":89.7,"functional":85.2,"quality":78.7,"passed":true,"tokens":3768,"timeMs":82826,"cost":0.0014}]},{"taskId":"frontend/forms/task-2","category":"frontend","subcategory":"forms","results":[{"modelName":"GLM 4-Plus","score":89.6,"functional":85.1,"quality":79.5,"passed":true,"tokens":4268,"timeMs":87353,"cost":0.0049},{"modelName":"MiniMax M2.1","score":92.7,"functional":88.1,"quality":80.7,"passed":true,"tokens":15068,"timeMs":146761,"cost":0.0122},{"modelName":"GLM-4.7","score":81.9,"functional":77.8,"quality":78.1,"passed":true,"tokens":2790,"timeMs":73330,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":83.6,"functional":79.4,"quality":74.2,"passed":true,"tokens":1757,"timeMs":24129,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":85,"functional":80.8,"quality":79.2,"passed":true,"tokens":2795,"timeMs":33499,"cost":0.0252},{"modelName":"Claude Sonnet 4.5","score":86.7,"functional":82.4,"quality":78.5,"passed":true,"tokens":2954,"timeMs":35645,"cost":0.0354},{"modelName":"Claude Opus 4.5","score":96.2,"functional":91.4,"quality":81.2,"passed":true,"tokens":3450,"timeMs":37747,"cost":0.0759},{"modelName":"Claude Haiku 4.5","score":96.6,"functional":91.8,"quality":81,"passed":true,"tokens":5192,"timeMs":21180,"cost":0.0152},{"modelName":"DeepSeek v3.2","score":94.3,"functional":89.6,"quality":80.9,"passed":true,"tokens":2574,"timeMs":65983,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":88.1,"functional":83.7,"quality":78.5,"passed":true,"tokens":2690,"timeMs":19628,"cost":0.0251},{"modelName":"GLM 4.7 Flash","score":65.2,"functional":61.9,"quality":81.5,"passed":true,"tokens":1401,"timeMs":7760,"cost":0.0004},{"modelName":"Grok 4 Fast","score":92.7,"functional":88.1,"quality":80.3,"passed":true,"tokens":3243,"timeMs":76485,"cost":0.0012},{"modelName":"Grok 4","score":96,"functional":91.2,"quality":81.5,"passed":true,"tokens":2703,"timeMs":95475,"cost":0.0327},{"modelName":"Grok 4.1 Fast","score":87.3,"functional":82.9,"quality":77.9,"passed":true,"tokens":3213,"timeMs":78134,"cost":0.0015}]},{"taskId":"frontend/forms/task-3","category":"frontend","subcategory":"forms","results":[{"modelName":"GLM 4-Plus","score":92,"functional":87.4,"quality":80.3,"passed":true,"tokens":5231,"timeMs":108810,"cost":0.0051},{"modelName":"MiniMax M2.1","score":90.3,"functional":85.8,"quality":80,"passed":true,"tokens":17199,"timeMs":146106,"cost":0.0126},{"modelName":"GLM-4.7","score":82.4,"functional":78.3,"quality":78.2,"passed":true,"tokens":3519,"timeMs":57313,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":85.9,"functional":81.6,"quality":74.9,"passed":true,"tokens":1928,"timeMs":34680,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":87,"functional":82.7,"quality":79.8,"passed":true,"tokens":3494,"timeMs":23105,"cost":0.0346},{"modelName":"Claude Sonnet 4.5","score":87.9,"functional":83.5,"quality":78.9,"passed":true,"tokens":3572,"timeMs":47373,"cost":0.0405},{"modelName":"Claude Opus 4.5","score":97.1,"functional":92.3,"quality":81.5,"passed":true,"tokens":4054,"timeMs":37451,"cost":0.0569},{"modelName":"Claude Haiku 4.5","score":95.2,"functional":90.4,"quality":80.5,"passed":true,"tokens":5085,"timeMs":15416,"cost":0.0154},{"modelName":"DeepSeek v3.2","score":95.8,"functional":91,"quality":81.4,"passed":true,"tokens":3242,"timeMs":109966,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":86.9,"functional":82.6,"quality":78.1,"passed":true,"tokens":2245,"timeMs":34812,"cost":0.0262},{"modelName":"GLM 4.7 Flash","score":65,"functional":61.8,"quality":81.4,"passed":true,"tokens":1601,"timeMs":9175,"cost":0.0004},{"modelName":"Grok 4 Fast","score":90.3,"functional":85.8,"quality":79.5,"passed":true,"tokens":2930,"timeMs":68267,"cost":0.0013},{"modelName":"Grok 4","score":95.1,"functional":90.4,"quality":81.2,"passed":true,"tokens":2992,"timeMs":93687,"cost":0.0313},{"modelName":"Grok 4.1 Fast","score":85.5,"functional":81.2,"quality":77.4,"passed":true,"tokens":3551,"timeMs":64054,"cost":0.0014}]},{"taskId":"frontend/forms/task-4","category":"frontend","subcategory":"forms","results":[{"modelName":"GLM 4-Plus","score":94.3,"functional":89.6,"quality":80.9,"passed":true,"tokens":4565,"timeMs":118650,"cost":0.0048},{"modelName":"MiniMax M2.1","score":87.9,"functional":83.5,"quality":79.2,"passed":true,"tokens":15978,"timeMs":164844,"cost":0.0146},{"modelName":"GLM-4.7","score":84,"functional":79.8,"quality":78.7,"passed":true,"tokens":3636,"timeMs":52296,"cost":0.0036},{"modelName":"Gemini 3 Flash","score":88.3,"functional":83.9,"quality":75.7,"passed":true,"tokens":2259,"timeMs":30263,"cost":0.0048},{"modelName":"Gemini 3 Pro Preview","score":88.1,"functional":83.7,"quality":80.1,"passed":true,"tokens":3402,"timeMs":41254,"cost":0.0361},{"modelName":"Claude Sonnet 4.5","score":89.9,"functional":85.4,"quality":79.5,"passed":true,"tokens":3399,"timeMs":32413,"cost":0.0436},{"modelName":"Claude Opus 4.5","score":96.8,"functional":92,"quality":81.4,"passed":true,"tokens":3236,"timeMs":51657,"cost":0.067},{"modelName":"Claude Haiku 4.5","score":92.9,"functional":88.3,"quality":79.8,"passed":true,"tokens":4756,"timeMs":23067,"cost":0.0143},{"modelName":"DeepSeek v3.2","score":96.2,"functional":91.4,"quality":81.5,"passed":true,"tokens":2622,"timeMs":84345,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":86.9,"functional":82.5,"quality":78.1,"passed":true,"tokens":3033,"timeMs":20219,"cost":0.0225},{"modelName":"GLM 4.7 Flash","score":63.7,"functional":60.5,"quality":81,"passed":true,"tokens":1537,"timeMs":10373,"cost":0.0004},{"modelName":"Grok 4 Fast","score":88.2,"functional":83.8,"quality":78.9,"passed":true,"tokens":2414,"timeMs":59301,"cost":0.001},{"modelName":"Grok 4","score":93.3,"functional":88.6,"quality":80.7,"passed":true,"tokens":3188,"timeMs":84350,"cost":0.0295},{"modelName":"Grok 4.1 Fast","score":84.8,"functional":80.6,"quality":77.2,"passed":true,"tokens":3342,"timeMs":78489,"cost":0.0014}]},{"taskId":"frontend/forms/task-5","category":"frontend","subcategory":"forms","results":[{"modelName":"GLM 4-Plus","score":95.8,"functional":91,"quality":81.4,"passed":true,"tokens":5017,"timeMs":112102,"cost":0.006},{"modelName":"MiniMax M2.1","score":86.1,"functional":81.8,"quality":78.7,"passed":true,"tokens":15832,"timeMs":200459,"cost":0.0127},{"modelName":"GLM-4.7","score":86.3,"functional":82,"quality":79.4,"passed":true,"tokens":3321,"timeMs":73361,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":90.3,"functional":85.8,"quality":76.3,"passed":true,"tokens":2348,"timeMs":30844,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":87.9,"functional":83.5,"quality":80.1,"passed":true,"tokens":4050,"timeMs":40990,"cost":0.0278},{"modelName":"Claude Sonnet 4.5","score":92.4,"functional":87.8,"quality":80.3,"passed":true,"tokens":3027,"timeMs":33034,"cost":0.0333},{"modelName":"Claude Opus 4.5","score":95.3,"functional":90.6,"quality":81,"passed":true,"tokens":3712,"timeMs":36820,"cost":0.0585},{"modelName":"Claude Haiku 4.5","score":90.4,"functional":85.9,"quality":79.1,"passed":true,"tokens":4481,"timeMs":22380,"cost":0.017},{"modelName":"DeepSeek v3.2","score":95.3,"functional":90.6,"quality":81.2,"passed":true,"tokens":2455,"timeMs":65288,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":88.1,"functional":83.7,"quality":78.5,"passed":true,"tokens":2536,"timeMs":30189,"cost":0.0302},{"modelName":"GLM 4.7 Flash","score":61.5,"functional":58.5,"quality":80.4,"passed":true,"tokens":1563,"timeMs":10227,"cost":0.0004},{"modelName":"Grok 4 Fast","score":87,"functional":82.6,"quality":78.6,"passed":true,"tokens":2743,"timeMs":53406,"cost":0.0011},{"modelName":"Grok 4","score":90.9,"functional":86.3,"quality":80,"passed":true,"tokens":2324,"timeMs":91893,"cost":0.0344},{"modelName":"Grok 4.1 Fast","score":85.3,"functional":81,"quality":77.4,"passed":true,"tokens":3409,"timeMs":94316,"cost":0.0013}]},{"taskId":"frontend/advanced/task-1","category":"frontend","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":96.2,"functional":91.4,"quality":81.5,"passed":true,"tokens":4919,"timeMs":114415,"cost":0.0049},{"modelName":"MiniMax M2.1","score":85.4,"functional":81.2,"quality":78.5,"passed":true,"tokens":14782,"timeMs":204187,"cost":0.0131},{"modelName":"GLM-4.7","score":88.8,"functional":84.3,"quality":80.1,"passed":true,"tokens":2915,"timeMs":50759,"cost":0.0047},{"modelName":"Gemini 3 Flash","score":91.4,"functional":86.8,"quality":76.6,"passed":true,"tokens":2373,"timeMs":28390,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4239,"timeMs":41648,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":94.7,"functional":89.9,"quality":80.9,"passed":true,"tokens":3558,"timeMs":42597,"cost":0.0423},{"modelName":"Claude Opus 4.5","score":93.1,"functional":88.4,"quality":80.3,"passed":true,"tokens":3528,"timeMs":54616,"cost":0.0738},{"modelName":"Claude Haiku 4.5","score":88.3,"functional":83.9,"quality":78.5,"passed":true,"tokens":5171,"timeMs":22748,"cost":0.0199},{"modelName":"DeepSeek v3.2","score":93.5,"functional":88.8,"quality":80.7,"passed":true,"tokens":2987,"timeMs":63858,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":90.1,"functional":85.6,"quality":79.1,"passed":true,"tokens":3030,"timeMs":29942,"cost":0.0245},{"modelName":"GLM 4.7 Flash","score":85.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.9,"functional":82.6,"quality":78.5,"passed":true,"tokens":2753,"timeMs":83969,"cost":0.0012},{"modelName":"Grok 4","score":88.5,"functional":84.1,"quality":79.2,"passed":true,"tokens":2257,"timeMs":93490,"cost":0.0285},{"modelName":"Grok 4.1 Fast","score":86.9,"functional":82.6,"quality":77.8,"passed":true,"tokens":2686,"timeMs":66339,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":7610,"timeMs":142576,"cost":0.0449}]},{"taskId":"frontend/advanced/task-2","category":"frontend","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":95.3,"functional":90.6,"quality":81.2,"passed":true,"tokens":4710,"timeMs":118004,"cost":0.0058},{"modelName":"MiniMax M2.1","score":85.9,"functional":81.6,"quality":78.7,"passed":true,"tokens":13490,"timeMs":194748,"cost":0.0115},{"modelName":"GLM-4.7","score":90.8,"functional":86.2,"quality":80.7,"passed":true,"tokens":3926,"timeMs":51750,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":91.2,"functional":86.6,"quality":76.5,"passed":true,"tokens":2429,"timeMs":35116,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4239,"timeMs":41648,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":96.2,"functional":91.4,"quality":81.4,"passed":true,"tokens":3850,"timeMs":34779,"cost":0.0396},{"modelName":"Claude Opus 4.5","score":90.6,"functional":86.1,"quality":79.5,"passed":true,"tokens":4148,"timeMs":53213,"cost":0.0754},{"modelName":"Claude Haiku 4.5","score":87.1,"functional":82.8,"quality":78.1,"passed":true,"tokens":3989,"timeMs":21763,"cost":0.0156},{"modelName":"DeepSeek v3.2","score":91.1,"functional":86.5,"quality":80,"passed":true,"tokens":3277,"timeMs":110027,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":92.6,"functional":88,"quality":79.8,"passed":true,"tokens":2896,"timeMs":35684,"cost":0.0307},{"modelName":"GLM 4.7 Flash","score":85.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.1,"functional":83.7,"quality":78.9,"passed":true,"tokens":2462,"timeMs":82535,"cost":0.0011},{"modelName":"Grok 4","score":86.7,"functional":82.4,"quality":78.7,"passed":true,"tokens":2790,"timeMs":66415,"cost":0.0304},{"modelName":"Grok 4.1 Fast","score":89.2,"functional":84.8,"quality":78.5,"passed":true,"tokens":3624,"timeMs":77505,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":7610,"timeMs":142576,"cost":0.0449}]},{"taskId":"frontend/advanced/task-3","category":"frontend","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":93.5,"functional":88.8,"quality":80.7,"passed":true,"tokens":3755,"timeMs":123091,"cost":0.0061},{"modelName":"MiniMax M2.1","score":87.5,"functional":83.2,"quality":79.1,"passed":true,"tokens":12467,"timeMs":188556,"cost":0.0128},{"modelName":"GLM-4.7","score":91.8,"functional":87.2,"quality":81,"passed":true,"tokens":3753,"timeMs":47524,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":89.9,"functional":85.4,"quality":76.1,"passed":true,"tokens":1752,"timeMs":22761,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4239,"timeMs":41648,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":96.5,"functional":91.7,"quality":81.5,"passed":true,"tokens":3268,"timeMs":52293,"cost":0.0459},{"modelName":"Claude Opus 4.5","score":88.5,"functional":84.1,"quality":78.9,"passed":true,"tokens":3176,"timeMs":43283,"cost":0.0759},{"modelName":"Claude Haiku 4.5","score":87.1,"functional":82.8,"quality":78.1,"passed":true,"tokens":4693,"timeMs":16456,"cost":0.0158},{"modelName":"DeepSeek v3.2","score":88.7,"functional":84.2,"quality":79.2,"passed":true,"tokens":2870,"timeMs":106160,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":94.8,"functional":90.1,"quality":80.5,"passed":true,"tokens":2549,"timeMs":32729,"cost":0.0291},{"modelName":"GLM 4.7 Flash","score":85.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.2,"functional":85.7,"quality":79.5,"passed":true,"tokens":3462,"timeMs":61124,"cost":0.0009},{"modelName":"Grok 4","score":86,"functional":81.7,"quality":78.5,"passed":true,"tokens":2163,"timeMs":71168,"cost":0.0327},{"modelName":"Grok 4.1 Fast","score":91.7,"functional":87.1,"quality":79.3,"passed":true,"tokens":2746,"timeMs":114126,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":7610,"timeMs":142576,"cost":0.0449}]},{"taskId":"frontend/advanced/task-4","category":"frontend","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":91.1,"functional":86.5,"quality":80,"passed":true,"tokens":3881,"timeMs":107743,"cost":0.0041},{"modelName":"MiniMax M2.1","score":89.8,"functional":85.4,"quality":79.8,"passed":true,"tokens":17702,"timeMs":118221,"cost":0.0121},{"modelName":"GLM-4.7","score":91.7,"functional":87.1,"quality":81,"passed":true,"tokens":3755,"timeMs":72420,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":87.7,"functional":83.3,"quality":75.5,"passed":true,"tokens":1952,"timeMs":35479,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4239,"timeMs":41648,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":95.7,"functional":90.9,"quality":81.2,"passed":true,"tokens":3730,"timeMs":47296,"cost":0.0374},{"modelName":"Claude Opus 4.5","score":87.3,"functional":83,"quality":78.6,"passed":true,"tokens":3149,"timeMs":47638,"cost":0.0805},{"modelName":"Claude Haiku 4.5","score":88.3,"functional":83.9,"quality":78.5,"passed":true,"tokens":4082,"timeMs":23996,"cost":0.0175},{"modelName":"DeepSeek v3.2","score":86.9,"functional":82.6,"quality":78.7,"passed":true,"tokens":2496,"timeMs":115354,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":96.4,"functional":91.5,"quality":80.9,"passed":true,"tokens":2462,"timeMs":32532,"cost":0.0251},{"modelName":"GLM 4.7 Flash","score":85.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.6,"functional":88,"quality":80.3,"passed":true,"tokens":3091,"timeMs":87672,"cost":0.001},{"modelName":"Grok 4","score":86.5,"functional":82.2,"quality":78.7,"passed":true,"tokens":3090,"timeMs":53811,"cost":0.0243},{"modelName":"Grok 4.1 Fast","score":93.7,"functional":89,"quality":79.9,"passed":true,"tokens":3577,"timeMs":68771,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":7610,"timeMs":142576,"cost":0.0449}]},{"taskId":"frontend/advanced/task-5","category":"frontend","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":88.7,"functional":84.3,"quality":79.2,"passed":true,"tokens":4344,"timeMs":124057,"cost":0.0059},{"modelName":"MiniMax M2.1","score":92.3,"functional":87.7,"quality":80.6,"passed":true,"tokens":16465,"timeMs":208911,"cost":0.0147},{"modelName":"GLM-4.7","score":90.3,"functional":85.8,"quality":80.6,"passed":true,"tokens":2856,"timeMs":62154,"cost":0.004},{"modelName":"Gemini 3 Flash","score":85.2,"functional":81,"quality":74.7,"passed":true,"tokens":2016,"timeMs":28418,"cost":0.0052},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4239,"timeMs":41648,"cost":0.0594},{"modelName":"Claude Sonnet 4.5","score":93.8,"functional":89.1,"quality":80.7,"passed":true,"tokens":3462,"timeMs":42706,"cost":0.0449},{"modelName":"Claude Opus 4.5","score":87.3,"functional":82.9,"quality":78.5,"passed":true,"tokens":3556,"timeMs":46562,"cost":0.0612},{"modelName":"Claude Haiku 4.5","score":90.3,"functional":85.8,"quality":79.1,"passed":true,"tokens":4968,"timeMs":23678,"cost":0.018},{"modelName":"DeepSeek v3.2","score":86.2,"functional":81.9,"quality":78.5,"passed":true,"tokens":2849,"timeMs":92127,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":96.7,"functional":91.9,"quality":81.1,"passed":true,"tokens":2760,"timeMs":26820,"cost":0.0251},{"modelName":"GLM 4.7 Flash","score":85.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.9,"functional":90.2,"quality":80.9,"passed":true,"tokens":2515,"timeMs":70363,"cost":0.001},{"modelName":"Grok 4","score":88.1,"functional":83.7,"quality":79.1,"passed":true,"tokens":3185,"timeMs":91650,"cost":0.0285},{"modelName":"Grok 4.1 Fast","score":94.7,"functional":90,"quality":80.2,"passed":true,"tokens":2629,"timeMs":89860,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":7610,"timeMs":142576,"cost":0.0449}]},{"taskId":"api-integrations/communication/task-1","category":"api-integrations","subcategory":"communication","results":[{"modelName":"GLM 4-Plus","score":84.9,"functional":80.7,"quality":78.7,"passed":true,"tokens":4804,"timeMs":107875,"cost":0.0048},{"modelName":"MiniMax M2.1","score":92.3,"functional":87.7,"quality":81.2,"passed":true,"tokens":13832,"timeMs":153597,"cost":0.0118},{"modelName":"GLM-4.7","score":86.2,"functional":81.9,"quality":79.9,"passed":true,"tokens":3265,"timeMs":67898,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":81.1,"functional":77,"quality":74.1,"passed":true,"tokens":2486,"timeMs":35339,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":89.4,"functional":85,"quality":80,"passed":true,"tokens":2865,"timeMs":40467,"cost":0.043},{"modelName":"Claude Opus 4.5","score":86.5,"functional":82.1,"quality":78.9,"passed":true,"tokens":3919,"timeMs":34761,"cost":0.0716},{"modelName":"Claude Haiku 4.5","score":90.8,"functional":86.3,"quality":79.8,"passed":true,"tokens":4991,"timeMs":26304,"cost":0.0141},{"modelName":"DeepSeek v3.2","score":84.7,"functional":80.5,"quality":78.7,"passed":true,"tokens":3280,"timeMs":115795,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":93.9,"functional":89.2,"quality":80.8,"passed":true,"tokens":2985,"timeMs":27569,"cost":0.0272},{"modelName":"GLM 4.7 Flash","score":47.2,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.4,"functional":89.7,"quality":81.4,"passed":true,"tokens":2335,"timeMs":55193,"cost":0.0012},{"modelName":"Grok 4","score":88.4,"functional":84,"quality":79.8,"passed":true,"tokens":2290,"timeMs":62506,"cost":0.0337},{"modelName":"Grok 4.1 Fast","score":92.6,"functional":87.9,"quality":80.1,"passed":true,"tokens":3765,"timeMs":79721,"cost":0.0011},{"modelName":"Qwen3 Max","score":86.8,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":95439,"cost":0.0254}]},{"taskId":"api-integrations/communication/task-2","category":"api-integrations","subcategory":"communication","results":[{"modelName":"GLM 4-Plus","score":84.2,"functional":80,"quality":78.5,"passed":true,"tokens":4441,"timeMs":117152,"cost":0.006},{"modelName":"MiniMax M2.1","score":93.3,"functional":88.7,"quality":81.5,"passed":true,"tokens":14937,"timeMs":144442,"cost":0.013},{"modelName":"GLM-4.7","score":83.7,"functional":79.5,"quality":79.2,"passed":true,"tokens":3033,"timeMs":44560,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":79.7,"functional":75.7,"quality":73.7,"passed":true,"tokens":1857,"timeMs":21273,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":87,"functional":82.7,"quality":79.2,"passed":true,"tokens":3040,"timeMs":40612,"cost":0.0365},{"modelName":"Claude Opus 4.5","score":88.5,"functional":84.1,"quality":79.5,"passed":true,"tokens":4227,"timeMs":34190,"cost":0.0791},{"modelName":"Claude Haiku 4.5","score":93.1,"functional":88.4,"quality":80.5,"passed":true,"tokens":3717,"timeMs":22027,"cost":0.0162},{"modelName":"DeepSeek v3.2","score":86.3,"functional":82,"quality":79.1,"passed":true,"tokens":2683,"timeMs":112434,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":92,"functional":87.4,"quality":80.2,"passed":true,"tokens":3029,"timeMs":27397,"cost":0.0293},{"modelName":"GLM 4.7 Flash","score":47.2,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.8,"functional":90,"quality":81.5,"passed":true,"tokens":3453,"timeMs":56254,"cost":0.0013},{"modelName":"Grok 4","score":90.9,"functional":86.3,"quality":80.6,"passed":true,"tokens":3126,"timeMs":78868,"cost":0.0249},{"modelName":"Grok 4.1 Fast","score":91.2,"functional":86.7,"quality":79.7,"passed":true,"tokens":3411,"timeMs":79325,"cost":0.0013},{"modelName":"Qwen3 Max","score":86.8,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":95439,"cost":0.0254}]},{"taskId":"api-integrations/communication/task-3","category":"api-integrations","subcategory":"communication","results":[{"modelName":"GLM 4-Plus","score":84.7,"functional":80.5,"quality":78.7,"passed":true,"tokens":4690,"timeMs":117406,"cost":0.006},{"modelName":"MiniMax M2.1","score":93.2,"functional":88.5,"quality":81.4,"passed":true,"tokens":18021,"timeMs":182366,"cost":0.0112},{"modelName":"GLM-4.7","score":81.5,"functional":77.4,"quality":78.5,"passed":true,"tokens":3002,"timeMs":71650,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":79.5,"functional":75.5,"quality":73.6,"passed":true,"tokens":2101,"timeMs":35451,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":0,"functional":0,"quality":0,"passed":false,"tokens":0,"timeMs":0,"cost":0},{"modelName":"Claude Sonnet 4.5","score":85.3,"functional":81,"quality":78.7,"passed":true,"tokens":3261,"timeMs":33705,"cost":0.0438},{"modelName":"Claude Opus 4.5","score":91,"functional":86.4,"quality":80.3,"passed":true,"tokens":3152,"timeMs":32728,"cost":0.0628},{"modelName":"Claude Haiku 4.5","score":94.6,"functional":89.8,"quality":80.9,"passed":true,"tokens":3876,"timeMs":24531,"cost":0.0169},{"modelName":"DeepSeek v3.2","score":88.6,"functional":84.2,"quality":79.8,"passed":true,"tokens":3574,"timeMs":97262,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":89.6,"functional":85.1,"quality":79.5,"passed":true,"tokens":2439,"timeMs":22922,"cost":0.033},{"modelName":"GLM 4.7 Flash","score":47.2,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":93.9,"functional":89.2,"quality":81.2,"passed":true,"tokens":2998,"timeMs":53205,"cost":0.0012},{"modelName":"Grok 4","score":92.9,"functional":88.2,"quality":81.2,"passed":true,"tokens":2849,"timeMs":53742,"cost":0.025},{"modelName":"Grok 4.1 Fast","score":89.1,"functional":84.6,"quality":79.1,"passed":true,"tokens":3048,"timeMs":95471,"cost":0.0011},{"modelName":"Qwen3 Max","score":86.8,"functional":85,"quality":80,"passed":true,"tokens":4483,"timeMs":95439,"cost":0.0254}]},{"taskId":"api-integrations/analytics/task-1","category":"api-integrations","subcategory":"analytics","results":[{"modelName":"GLM 4-Plus","score":86.3,"functional":82,"quality":79.1,"passed":true,"tokens":3686,"timeMs":90356,"cost":0.0042},{"modelName":"MiniMax M2.1","score":91.9,"functional":87.3,"quality":81,"passed":true,"tokens":13411,"timeMs":133924,"cost":0.0139},{"modelName":"GLM-4.7","score":80.2,"functional":76.2,"quality":78.1,"passed":true,"tokens":3360,"timeMs":40688,"cost":0.0036},{"modelName":"Gemini 3 Flash","score":80.5,"functional":76.5,"quality":73.9,"passed":true,"tokens":2401,"timeMs":27052,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4403,"timeMs":38593,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":84.6,"functional":80.3,"quality":78.5,"passed":true,"tokens":2734,"timeMs":39592,"cost":0.0395},{"modelName":"Claude Opus 4.5","score":93.2,"functional":88.6,"quality":80.9,"passed":true,"tokens":3052,"timeMs":36827,"cost":0.0602},{"modelName":"Claude Haiku 4.5","score":94.9,"functional":90.2,"quality":81.1,"passed":true,"tokens":4416,"timeMs":20743,"cost":0.0171},{"modelName":"DeepSeek v3.2","score":91.1,"functional":86.5,"quality":80.6,"passed":true,"tokens":2640,"timeMs":92682,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":87.2,"functional":82.9,"quality":78.8,"passed":true,"tokens":3100,"timeMs":29762,"cost":0.0299},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.1,"functional":87.5,"quality":80.7,"passed":true,"tokens":3464,"timeMs":77180,"cost":0.001},{"modelName":"Grok 4","score":93.9,"functional":89.2,"quality":81.5,"passed":true,"tokens":2394,"timeMs":70741,"cost":0.0286},{"modelName":"Grok 4.1 Fast","score":86.6,"functional":82.3,"quality":78.3,"passed":true,"tokens":3770,"timeMs":84738,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3508,"timeMs":70341,"cost":0.0196}]},{"taskId":"api-integrations/analytics/task-2","category":"api-integrations","subcategory":"analytics","results":[{"modelName":"GLM 4-Plus","score":88.6,"functional":84.2,"quality":79.8,"passed":true,"tokens":4230,"timeMs":88452,"cost":0.0052},{"modelName":"MiniMax M2.1","score":89.7,"functional":85.2,"quality":80.4,"passed":true,"tokens":18482,"timeMs":211603,"cost":0.0126},{"modelName":"GLM-4.7","score":80,"functional":76,"quality":78.1,"passed":true,"tokens":2855,"timeMs":53088,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":82.5,"functional":78.4,"quality":74.5,"passed":true,"tokens":2229,"timeMs":22108,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4403,"timeMs":38593,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":85.1,"functional":80.8,"quality":78.7,"passed":true,"tokens":3625,"timeMs":45394,"cost":0.0428},{"modelName":"Claude Opus 4.5","score":94.8,"functional":90,"quality":81.4,"passed":true,"tokens":3307,"timeMs":36861,"cost":0.0709},{"modelName":"Claude Haiku 4.5","score":94.1,"functional":89.4,"quality":80.8,"passed":true,"tokens":5231,"timeMs":19592,"cost":0.0187},{"modelName":"DeepSeek v3.2","score":93.1,"functional":88.4,"quality":81.2,"passed":true,"tokens":3512,"timeMs":65543,"cost":0.0022},{"modelName":"OpenAI GPT-5.2","score":85.5,"functional":81.2,"quality":78.3,"passed":true,"tokens":2922,"timeMs":30291,"cost":0.0278},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.7,"functional":85.2,"quality":80,"passed":true,"tokens":3006,"timeMs":88289,"cost":0.0012},{"modelName":"Grok 4","score":93.8,"functional":89.1,"quality":81.4,"passed":true,"tokens":2227,"timeMs":79762,"cost":0.0325},{"modelName":"Grok 4.1 Fast","score":84.4,"functional":80.2,"quality":77.7,"passed":true,"tokens":2602,"timeMs":99352,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3508,"timeMs":70341,"cost":0.0196}]},{"taskId":"api-integrations/analytics/task-3","category":"api-integrations","subcategory":"analytics","results":[{"modelName":"GLM 4-Plus","score":91.1,"functional":86.5,"quality":80.6,"passed":true,"tokens":3862,"timeMs":102961,"cost":0.0057},{"modelName":"MiniMax M2.1","score":87.2,"functional":82.9,"quality":79.6,"passed":true,"tokens":17527,"timeMs":123104,"cost":0.0155},{"modelName":"GLM-4.7","score":81,"functional":76.9,"quality":78.4,"passed":true,"tokens":2979,"timeMs":46825,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":84.9,"functional":80.7,"quality":75.3,"passed":true,"tokens":2540,"timeMs":26907,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4403,"timeMs":38593,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":86.7,"functional":82.3,"quality":79.1,"passed":true,"tokens":3620,"timeMs":41220,"cost":0.0409},{"modelName":"Claude Opus 4.5","score":95.1,"functional":90.4,"quality":81.5,"passed":true,"tokens":3573,"timeMs":38205,"cost":0.0719},{"modelName":"Claude Haiku 4.5","score":92.2,"functional":87.6,"quality":80.2,"passed":true,"tokens":4322,"timeMs":15838,"cost":0.0201},{"modelName":"DeepSeek v3.2","score":94.1,"functional":89.4,"quality":81.5,"passed":true,"tokens":3567,"timeMs":71958,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":84.8,"functional":80.5,"quality":78.1,"passed":true,"tokens":2517,"timeMs":33985,"cost":0.0265},{"modelName":"GLM 4.7 Flash","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.3,"functional":82.9,"quality":79.2,"passed":true,"tokens":3422,"timeMs":81995,"cost":0.0012},{"modelName":"Grok 4","score":92.4,"functional":87.8,"quality":81,"passed":true,"tokens":2758,"timeMs":93637,"cost":0.0255},{"modelName":"Grok 4.1 Fast","score":83.1,"functional":78.9,"quality":77.3,"passed":true,"tokens":2981,"timeMs":64490,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3508,"timeMs":70341,"cost":0.0196}]},{"taskId":"api-integrations/auth-provider/task-1","category":"api-integrations","subcategory":"auth-provider","results":[{"modelName":"GLM 4-Plus","score":93.1,"functional":88.4,"quality":81.2,"passed":true,"tokens":4231,"timeMs":107010,"cost":0.0053},{"modelName":"MiniMax M2.1","score":85,"functional":80.8,"quality":79,"passed":true,"tokens":18006,"timeMs":159285,"cost":0.0144},{"modelName":"GLM-4.7","score":83,"functional":78.8,"quality":79,"passed":true,"tokens":4038,"timeMs":51291,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":87.3,"functional":82.9,"quality":76,"passed":true,"tokens":2435,"timeMs":29602,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4403,"timeMs":39172,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":89,"functional":84.5,"quality":79.8,"passed":true,"tokens":4038,"timeMs":30686,"cost":0.0349},{"modelName":"Claude Opus 4.5","score":94.3,"functional":89.6,"quality":81.2,"passed":true,"tokens":3042,"timeMs":38292,"cost":0.0786},{"modelName":"Claude Haiku 4.5","score":89.8,"functional":85.3,"quality":79.5,"passed":true,"tokens":4414,"timeMs":17678,"cost":0.0196},{"modelName":"DeepSeek v3.2","score":93.9,"functional":89.2,"quality":81.4,"passed":true,"tokens":2882,"timeMs":64014,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":85.3,"functional":81,"quality":78.2,"passed":true,"tokens":2706,"timeMs":24853,"cost":0.0333},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.5,"functional":81.2,"quality":78.7,"passed":true,"tokens":3271,"timeMs":55457,"cost":0.0011},{"modelName":"Grok 4","score":90.3,"functional":85.8,"quality":80.4,"passed":true,"tokens":3010,"timeMs":76965,"cost":0.026},{"modelName":"Grok 4.1 Fast","score":82.9,"functional":78.7,"quality":77.2,"passed":true,"tokens":2584,"timeMs":84448,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":5239,"timeMs":107819,"cost":0.03}]},{"taskId":"api-integrations/auth-provider/task-2","category":"api-integrations","subcategory":"auth-provider","results":[{"modelName":"GLM 4-Plus","score":94.1,"functional":89.4,"quality":81.5,"passed":true,"tokens":3840,"timeMs":101469,"cost":0.0056},{"modelName":"MiniMax M2.1","score":83.7,"functional":79.5,"quality":78.6,"passed":true,"tokens":17302,"timeMs":173763,"cost":0.0131},{"modelName":"GLM-4.7","score":85.4,"functional":81.1,"quality":79.7,"passed":true,"tokens":3369,"timeMs":43632,"cost":0.0036},{"modelName":"Gemini 3 Flash","score":88.9,"functional":84.5,"quality":76.4,"passed":true,"tokens":1782,"timeMs":34441,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4403,"timeMs":39172,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":91.4,"functional":86.9,"quality":80.6,"passed":true,"tokens":3147,"timeMs":36440,"cost":0.0328},{"modelName":"Claude Opus 4.5","score":92.4,"functional":87.8,"quality":80.7,"passed":true,"tokens":3932,"timeMs":41168,"cost":0.0668},{"modelName":"Claude Haiku 4.5","score":87.5,"functional":83.1,"quality":78.8,"passed":true,"tokens":5196,"timeMs":16256,"cost":0.0153},{"modelName":"DeepSeek v3.2","score":92.6,"functional":88,"quality":81,"passed":true,"tokens":3296,"timeMs":78884,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":86.9,"functional":82.5,"quality":78.7,"passed":true,"tokens":2390,"timeMs":20021,"cost":0.028},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":84.8,"functional":80.6,"quality":78.5,"passed":true,"tokens":2368,"timeMs":72267,"cost":0.001},{"modelName":"Grok 4","score":87.8,"functional":83.4,"quality":79.6,"passed":true,"tokens":2391,"timeMs":95630,"cost":0.0309},{"modelName":"Grok 4.1 Fast","score":83.9,"functional":79.7,"quality":77.5,"passed":true,"tokens":2691,"timeMs":74048,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":5239,"timeMs":107819,"cost":0.03}]},{"taskId":"api-integrations/auth-provider/task-3","category":"api-integrations","subcategory":"auth-provider","results":[{"modelName":"GLM 4-Plus","score":94,"functional":89.3,"quality":81.4,"passed":true,"tokens":4494,"timeMs":121721,"cost":0.0062},{"modelName":"MiniMax M2.1","score":83.5,"functional":79.3,"quality":78.5,"passed":true,"tokens":18515,"timeMs":131343,"cost":0.0154},{"modelName":"GLM-4.7","score":87.7,"functional":83.3,"quality":80.4,"passed":true,"tokens":2980,"timeMs":56086,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":89.4,"functional":85,"quality":76.6,"passed":true,"tokens":1807,"timeMs":32330,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4403,"timeMs":39172,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":93.4,"functional":88.8,"quality":81.2,"passed":true,"tokens":4033,"timeMs":43922,"cost":0.0364},{"modelName":"Claude Opus 4.5","score":90,"functional":85.5,"quality":80,"passed":true,"tokens":3249,"timeMs":55083,"cost":0.0639},{"modelName":"Claude Haiku 4.5","score":85.7,"functional":81.4,"quality":78.3,"passed":true,"tokens":3952,"timeMs":22101,"cost":0.0195},{"modelName":"DeepSeek v3.2","score":90.5,"functional":85.9,"quality":80.4,"passed":true,"tokens":3049,"timeMs":111693,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":89.2,"functional":84.7,"quality":79.4,"passed":true,"tokens":3036,"timeMs":32824,"cost":0.0272},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.3,"functional":81,"quality":78.7,"passed":true,"tokens":3249,"timeMs":76533,"cost":0.0012},{"modelName":"Grok 4","score":85.6,"functional":81.3,"quality":79,"passed":true,"tokens":2840,"timeMs":88397,"cost":0.0353},{"modelName":"Grok 4.1 Fast","score":85.9,"functional":81.6,"quality":78.1,"passed":true,"tokens":2709,"timeMs":105440,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":5239,"timeMs":107819,"cost":0.03}]},{"taskId":"api-integrations/email/task-1","category":"api-integrations","subcategory":"email","results":[{"modelName":"GLM 4-Plus","score":92.6,"functional":88,"quality":81,"passed":true,"tokens":4005,"timeMs":88560,"cost":0.0058},{"modelName":"MiniMax M2.1","score":84.5,"functional":80.3,"quality":78.8,"passed":true,"tokens":16277,"timeMs":143911,"cost":0.0123},{"modelName":"GLM-4.7","score":89.4,"functional":84.9,"quality":80.9,"passed":true,"tokens":2795,"timeMs":54974,"cost":0.0042},{"modelName":"Gemini 3 Flash","score":88.8,"functional":84.3,"quality":76.4,"passed":true,"tokens":1863,"timeMs":30626,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4612,"timeMs":38775,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":94.5,"functional":89.8,"quality":81.5,"passed":true,"tokens":3691,"timeMs":52176,"cost":0.0366},{"modelName":"Claude Opus 4.5","score":87.6,"functional":83.3,"quality":79.2,"passed":true,"tokens":4202,"timeMs":46402,"cost":0.0605},{"modelName":"Claude Haiku 4.5","score":85,"functional":80.7,"quality":78.1,"passed":true,"tokens":4733,"timeMs":23277,"cost":0.0179},{"modelName":"DeepSeek v3.2","score":88,"functional":83.6,"quality":79.6,"passed":true,"tokens":2560,"timeMs":102457,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":91.6,"functional":87,"quality":80.1,"passed":true,"tokens":2720,"timeMs":20028,"cost":0.0308},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.9,"functional":82.6,"quality":79.1,"passed":true,"tokens":2836,"timeMs":62056,"cost":0.0014},{"modelName":"Grok 4","score":84.3,"functional":80.1,"quality":78.6,"passed":true,"tokens":2615,"timeMs":62396,"cost":0.0295},{"modelName":"Grok 4.1 Fast","score":88.3,"functional":83.9,"quality":78.9,"passed":true,"tokens":3373,"timeMs":86849,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.5,"functional":85,"quality":80,"passed":true,"tokens":7501,"timeMs":149108,"cost":0.0426}]},{"taskId":"api-integrations/email/task-2","category":"api-integrations","subcategory":"email","results":[{"modelName":"GLM 4-Plus","score":90.5,"functional":85.9,"quality":80.4,"passed":true,"tokens":5242,"timeMs":119340,"cost":0.0056},{"modelName":"MiniMax M2.1","score":86.5,"functional":82.2,"quality":79.4,"passed":true,"tokens":14810,"timeMs":126434,"cost":0.0148},{"modelName":"GLM-4.7","score":89.9,"functional":85.4,"quality":81.1,"passed":true,"tokens":3957,"timeMs":53632,"cost":0.004},{"modelName":"Gemini 3 Flash","score":87,"functional":82.7,"quality":75.9,"passed":true,"tokens":1727,"timeMs":28829,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4612,"timeMs":38775,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":94.3,"functional":89.6,"quality":81.4,"passed":true,"tokens":2897,"timeMs":52220,"cost":0.0442},{"modelName":"Claude Opus 4.5","score":85.9,"functional":81.6,"quality":78.7,"passed":true,"tokens":2939,"timeMs":54690,"cost":0.0817},{"modelName":"Claude Haiku 4.5","score":85.5,"functional":81.2,"quality":78.2,"passed":true,"tokens":4258,"timeMs":17414,"cost":0.0197},{"modelName":"DeepSeek v3.2","score":85.8,"functional":81.5,"quality":79,"passed":true,"tokens":3246,"timeMs":75207,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":93.6,"functional":88.9,"quality":80.7,"passed":true,"tokens":2966,"timeMs":32670,"cost":0.0295},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.2,"functional":84.8,"quality":79.8,"passed":true,"tokens":3267,"timeMs":50087,"cost":0.0013},{"modelName":"Grok 4","score":84.1,"functional":79.9,"quality":78.5,"passed":true,"tokens":2731,"timeMs":96944,"cost":0.0304},{"modelName":"Grok 4.1 Fast","score":90.6,"functional":86.1,"quality":79.5,"passed":true,"tokens":3552,"timeMs":112387,"cost":0.0012},{"modelName":"Qwen3 Max","score":87.5,"functional":85,"quality":80,"passed":true,"tokens":7501,"timeMs":149108,"cost":0.0426}]},{"taskId":"api-integrations/email/task-3","category":"api-integrations","subcategory":"email","results":[{"modelName":"GLM 4-Plus","score":88,"functional":83.6,"quality":79.6,"passed":true,"tokens":5058,"timeMs":81205,"cost":0.0059},{"modelName":"MiniMax M2.1","score":88.9,"functional":84.5,"quality":80.2,"passed":true,"tokens":13785,"timeMs":172079,"cost":0.0148},{"modelName":"GLM-4.7","score":89.2,"functional":84.8,"quality":80.9,"passed":true,"tokens":3122,"timeMs":59427,"cost":0.004},{"modelName":"Gemini 3 Flash","score":84.6,"functional":80.4,"quality":75.2,"passed":true,"tokens":1791,"timeMs":27353,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4612,"timeMs":38775,"cost":0.0646},{"modelName":"Claude Sonnet 4.5","score":93,"functional":88.3,"quality":81,"passed":true,"tokens":3776,"timeMs":35708,"cost":0.043},{"modelName":"Claude Opus 4.5","score":85.2,"functional":80.9,"quality":78.5,"passed":true,"tokens":3234,"timeMs":37692,"cost":0.077},{"modelName":"Claude Haiku 4.5","score":87.1,"functional":82.7,"quality":78.7,"passed":true,"tokens":4385,"timeMs":16614,"cost":0.0158},{"modelName":"DeepSeek v3.2","score":84.5,"functional":80.2,"quality":78.6,"passed":true,"tokens":2467,"timeMs":69796,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":94.7,"functional":89.9,"quality":81,"passed":true,"tokens":3083,"timeMs":24022,"cost":0.0291},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.7,"functional":87.1,"quality":80.6,"passed":true,"tokens":3401,"timeMs":75797,"cost":0.0013},{"modelName":"Grok 4","score":85.1,"functional":80.8,"quality":78.8,"passed":true,"tokens":2811,"timeMs":92653,"cost":0.0276},{"modelName":"Grok 4.1 Fast","score":92.3,"functional":87.6,"quality":80,"passed":true,"tokens":3200,"timeMs":65657,"cost":0.0013},{"modelName":"Qwen3 Max","score":87.5,"functional":85,"quality":80,"passed":true,"tokens":7501,"timeMs":149108,"cost":0.0426}]},{"taskId":"api-integrations/maps/task-1","category":"api-integrations","subcategory":"maps","results":[{"modelName":"GLM 4-Plus","score":85.8,"functional":81.5,"quality":79,"passed":true,"tokens":3872,"timeMs":108035,"cost":0.0059},{"modelName":"MiniMax M2.1","score":91.3,"functional":86.7,"quality":80.8,"passed":true,"tokens":13922,"timeMs":122795,"cost":0.0159},{"modelName":"GLM-4.7","score":87.5,"functional":83.1,"quality":80.3,"passed":true,"tokens":3684,"timeMs":43791,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":82.2,"functional":78.1,"quality":74.4,"passed":true,"tokens":2012,"timeMs":27053,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4419,"timeMs":38455,"cost":0.0619},{"modelName":"Claude Sonnet 4.5","score":90.8,"functional":86.3,"quality":80.4,"passed":true,"tokens":3622,"timeMs":48330,"cost":0.0386},{"modelName":"Claude Opus 4.5","score":85.7,"functional":81.4,"quality":78.7,"passed":true,"tokens":4067,"timeMs":31247,"cost":0.0641},{"modelName":"Claude Haiku 4.5","score":89.4,"functional":84.9,"quality":79.4,"passed":true,"tokens":4286,"timeMs":17885,"cost":0.0168},{"modelName":"DeepSeek v3.2","score":84.3,"functional":80.1,"quality":78.5,"passed":true,"tokens":2829,"timeMs":84427,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":94.5,"functional":89.8,"quality":81,"passed":true,"tokens":2901,"timeMs":24049,"cost":0.0301},{"modelName":"GLM 4.7 Flash","score":79.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":93.7,"functional":89,"quality":81.2,"passed":true,"tokens":3306,"timeMs":70830,"cost":0.0013},{"modelName":"Grok 4","score":87.1,"functional":82.7,"quality":79.4,"passed":true,"tokens":2480,"timeMs":69804,"cost":0.0314},{"modelName":"Grok 4.1 Fast","score":92.8,"functional":88.2,"quality":80.2,"passed":true,"tokens":3249,"timeMs":98536,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":3038,"timeMs":54471,"cost":0.0167}]},{"taskId":"api-integrations/maps/task-2","category":"api-integrations","subcategory":"maps","results":[{"modelName":"GLM 4-Plus","score":84.5,"functional":80.2,"quality":78.6,"passed":true,"tokens":3998,"timeMs":120598,"cost":0.0051},{"modelName":"MiniMax M2.1","score":92.9,"functional":88.2,"quality":81.3,"passed":true,"tokens":13899,"timeMs":142395,"cost":0.0159},{"modelName":"GLM-4.7","score":85.1,"functional":80.8,"quality":79.6,"passed":true,"tokens":4097,"timeMs":52225,"cost":0.0041},{"modelName":"Gemini 3 Flash","score":80.3,"functional":76.3,"quality":73.9,"passed":true,"tokens":1809,"timeMs":34787,"cost":0.0052},{"modelName":"Gemini 3 Pro Preview","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4419,"timeMs":38455,"cost":0.0619},{"modelName":"Claude Sonnet 4.5","score":88.4,"functional":83.9,"quality":79.6,"passed":true,"tokens":2883,"timeMs":50120,"cost":0.0382},{"modelName":"Claude Opus 4.5","score":87.3,"functional":82.9,"quality":79.1,"passed":true,"tokens":3617,"timeMs":33898,"cost":0.0549},{"modelName":"Claude Haiku 4.5","score":91.8,"functional":87.3,"quality":80.1,"passed":true,"tokens":4391,"timeMs":26928,"cost":0.0187},{"modelName":"DeepSeek v3.2","score":85.3,"functional":81,"quality":78.8,"passed":true,"tokens":3022,"timeMs":64881,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":93.2,"functional":88.5,"quality":80.6,"passed":true,"tokens":2995,"timeMs":33210,"cost":0.0324},{"modelName":"GLM 4.7 Flash","score":79.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.7,"functional":90,"quality":81.5,"passed":true,"tokens":3229,"timeMs":81338,"cost":0.0014},{"modelName":"Grok 4","score":89.5,"functional":85,"quality":80.2,"passed":true,"tokens":2803,"timeMs":71097,"cost":0.0281},{"modelName":"Grok 4.1 Fast","score":92.1,"functional":87.5,"quality":80,"passed":true,"tokens":3763,"timeMs":87959,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":3038,"timeMs":54471,"cost":0.0167}]},{"taskId":"api-integrations/maps/task-3","category":"api-integrations","subcategory":"maps","results":[{"modelName":"GLM 4-Plus","score":84.3,"functional":80.1,"quality":78.5,"passed":true,"tokens":3964,"timeMs":111181,"cost":0.0056},{"modelName":"MiniMax M2.1","score":93.4,"functional":88.7,"quality":81.5,"passed":true,"tokens":15686,"timeMs":161628,"cost":0.0152},{"modelName":"GLM-4.7","score":82.7,"functional":78.5,"quality":78.9,"passed":true,"tokens":3493,"timeMs":49211,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":79.5,"functional":75.5,"quality":73.6,"passed":true,"tokens":2155,"timeMs":29639,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":4419,"timeMs":38455,"cost":0.0619},{"modelName":"Claude Sonnet 4.5","score":86.2,"functional":81.9,"quality":79,"passed":true,"tokens":3494,"timeMs":34396,"cost":0.0439},{"modelName":"Claude Opus 4.5","score":89.6,"functional":85.1,"quality":79.8,"passed":true,"tokens":3458,"timeMs":32590,"cost":0.0725},{"modelName":"Claude Haiku 4.5","score":93.8,"functional":89.1,"quality":80.7,"passed":true,"tokens":4462,"timeMs":16979,"cost":0.0179},{"modelName":"DeepSeek v3.2","score":87.3,"functional":82.9,"quality":79.4,"passed":true,"tokens":2770,"timeMs":104492,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":91,"functional":86.5,"quality":79.9,"passed":true,"tokens":3158,"timeMs":31070,"cost":0.0277},{"modelName":"GLM 4.7 Flash","score":79.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.6,"functional":89.8,"quality":81.4,"passed":true,"tokens":2821,"timeMs":88177,"cost":0.0011},{"modelName":"Grok 4","score":91.8,"functional":87.2,"quality":80.8,"passed":true,"tokens":2367,"timeMs":84206,"cost":0.0352},{"modelName":"Grok 4.1 Fast","score":90.4,"functional":85.9,"quality":79.5,"passed":true,"tokens":3727,"timeMs":70292,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.2,"functional":85,"quality":80,"passed":true,"tokens":3038,"timeMs":54471,"cost":0.0167}]},{"taskId":"api-integrations/payment/task-1","category":"api-integrations","subcategory":"payment","results":[{"modelName":"GLM 4-Plus","score":85.3,"functional":81,"quality":78.8,"passed":true,"tokens":5128,"timeMs":112259,"cost":0.0055},{"modelName":"MiniMax M2.1","score":92.7,"functional":88.1,"quality":81.3,"passed":true,"tokens":17162,"timeMs":165785,"cost":0.0111},{"modelName":"GLM-4.7","score":80.8,"functional":76.8,"quality":78.3,"passed":true,"tokens":3631,"timeMs":58506,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":79.8,"functional":75.8,"quality":73.7,"passed":true,"tokens":2541,"timeMs":32700,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4390,"timeMs":38749,"cost":0.0615},{"modelName":"Claude Sonnet 4.5","score":84.8,"functional":80.6,"quality":78.6,"passed":true,"tokens":3267,"timeMs":46842,"cost":0.0347},{"modelName":"Claude Opus 4.5","score":92,"functional":87.4,"quality":80.6,"passed":true,"tokens":3436,"timeMs":36003,"cost":0.0603},{"modelName":"Claude Haiku 4.5","score":94.9,"functional":90.1,"quality":81,"passed":true,"tokens":3618,"timeMs":20661,"cost":0.0172},{"modelName":"DeepSeek v3.2","score":89.7,"functional":85.2,"quality":80.2,"passed":true,"tokens":2881,"timeMs":75483,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":88.5,"functional":84.1,"quality":79.2,"passed":true,"tokens":3039,"timeMs":26806,"cost":0.0334},{"modelName":"GLM 4.7 Flash","score":83.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":93.2,"functional":88.6,"quality":81,"passed":true,"tokens":2510,"timeMs":58799,"cost":0.0014},{"modelName":"Grok 4","score":93.5,"functional":88.8,"quality":81.3,"passed":true,"tokens":3031,"timeMs":74633,"cost":0.0331},{"modelName":"Grok 4.1 Fast","score":88,"functional":83.6,"quality":78.8,"passed":true,"tokens":2818,"timeMs":81238,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4446,"timeMs":91986,"cost":0.0252}]},{"taskId":"api-integrations/payment/task-2","category":"api-integrations","subcategory":"payment","results":[{"modelName":"GLM 4-Plus","score":87.3,"functional":82.9,"quality":79.4,"passed":true,"tokens":4519,"timeMs":122711,"cost":0.0058},{"modelName":"MiniMax M2.1","score":91,"functional":86.4,"quality":80.8,"passed":true,"tokens":13797,"timeMs":204145,"cost":0.0137},{"modelName":"GLM-4.7","score":79.9,"functional":75.9,"quality":78.1,"passed":true,"tokens":3687,"timeMs":42397,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":81.3,"functional":77.2,"quality":74.2,"passed":true,"tokens":2506,"timeMs":26911,"cost":0.0057},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4390,"timeMs":38749,"cost":0.0615},{"modelName":"Claude Sonnet 4.5","score":84.6,"functional":80.4,"quality":78.5,"passed":true,"tokens":3964,"timeMs":51898,"cost":0.034},{"modelName":"Claude Opus 4.5","score":94,"functional":89.3,"quality":81.2,"passed":true,"tokens":3125,"timeMs":31051,"cost":0.0793},{"modelName":"Claude Haiku 4.5","score":94.7,"functional":90,"quality":81,"passed":true,"tokens":5306,"timeMs":22548,"cost":0.0198},{"modelName":"DeepSeek v3.2","score":92,"functional":87.4,"quality":80.8,"passed":true,"tokens":3230,"timeMs":97998,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":86.4,"functional":82,"quality":78.5,"passed":true,"tokens":2924,"timeMs":35708,"cost":0.0266},{"modelName":"GLM 4.7 Flash","score":83.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":91.1,"functional":86.5,"quality":80.4,"passed":true,"tokens":2596,"timeMs":79045,"cost":0.0012},{"modelName":"Grok 4","score":94,"functional":89.3,"quality":81.5,"passed":true,"tokens":2219,"timeMs":60032,"cost":0.0253},{"modelName":"Grok 4.1 Fast","score":85.6,"functional":81.3,"quality":78,"passed":true,"tokens":2742,"timeMs":107978,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4446,"timeMs":91986,"cost":0.0252}]},{"taskId":"api-integrations/payment/task-3","category":"api-integrations","subcategory":"payment","results":[{"modelName":"GLM 4-Plus","score":89.7,"functional":85.2,"quality":80.2,"passed":true,"tokens":4921,"timeMs":109523,"cost":0.0044},{"modelName":"MiniMax M2.1","score":88.6,"functional":84.2,"quality":80.1,"passed":true,"tokens":13283,"timeMs":173666,"cost":0.0157},{"modelName":"GLM-4.7","score":80.3,"functional":76.3,"quality":78.2,"passed":true,"tokens":2981,"timeMs":50653,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":83.5,"functional":79.4,"quality":74.8,"passed":true,"tokens":1782,"timeMs":28444,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4390,"timeMs":38749,"cost":0.0615},{"modelName":"Claude Sonnet 4.5","score":85.6,"functional":81.4,"quality":78.8,"passed":true,"tokens":3796,"timeMs":34936,"cost":0.0405},{"modelName":"Claude Opus 4.5","score":95.1,"functional":90.3,"quality":81.5,"passed":true,"tokens":3311,"timeMs":32578,"cost":0.0793},{"modelName":"Claude Haiku 4.5","score":93.4,"functional":88.7,"quality":80.6,"passed":true,"tokens":4817,"timeMs":23957,"cost":0.0161},{"modelName":"DeepSeek v3.2","score":93.7,"functional":89,"quality":81.3,"passed":true,"tokens":3086,"timeMs":86355,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":85,"functional":80.8,"quality":78.1,"passed":true,"tokens":2988,"timeMs":24437,"cost":0.023},{"modelName":"GLM 4.7 Flash","score":83.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.6,"functional":84.2,"quality":79.6,"passed":true,"tokens":2775,"timeMs":61664,"cost":0.0012},{"modelName":"Grok 4","score":93.3,"functional":88.6,"quality":81.3,"passed":true,"tokens":3013,"timeMs":79810,"cost":0.0328},{"modelName":"Grok 4.1 Fast","score":83.7,"functional":79.5,"quality":77.5,"passed":true,"tokens":2716,"timeMs":73828,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4446,"timeMs":91986,"cost":0.0252}]},{"taskId":"api-integrations/social/task-1","category":"api-integrations","subcategory":"social","results":[{"modelName":"GLM 4-Plus","score":92,"functional":87.4,"quality":80.8,"passed":true,"tokens":3670,"timeMs":108069,"cost":0.0056},{"modelName":"MiniMax M2.1","score":86.2,"functional":81.9,"quality":79.3,"passed":true,"tokens":16422,"timeMs":165756,"cost":0.0142},{"modelName":"GLM-4.7","score":81.8,"functional":77.7,"quality":78.6,"passed":true,"tokens":3157,"timeMs":70508,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":86,"functional":81.7,"quality":75.6,"passed":true,"tokens":1978,"timeMs":28433,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4381,"timeMs":37240,"cost":0.0614},{"modelName":"Claude Sonnet 4.5","score":87.6,"functional":83.2,"quality":79.4,"passed":true,"tokens":3300,"timeMs":51478,"cost":0.0396},{"modelName":"Claude Opus 4.5","score":94.9,"functional":90.2,"quality":81.4,"passed":true,"tokens":3340,"timeMs":37676,"cost":0.0552},{"modelName":"Claude Haiku 4.5","score":91.2,"functional":86.7,"quality":79.9,"passed":true,"tokens":4906,"timeMs":22647,"cost":0.0164},{"modelName":"DeepSeek v3.2","score":94.2,"functional":89.5,"quality":81.5,"passed":true,"tokens":3368,"timeMs":73608,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":84.8,"functional":80.6,"quality":78.1,"passed":true,"tokens":2538,"timeMs":29279,"cost":0.0315},{"modelName":"GLM 4.7 Flash","score":53.9,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.4,"functional":82.1,"quality":79,"passed":true,"tokens":2846,"timeMs":79064,"cost":0.0012},{"modelName":"Grok 4","score":91.6,"functional":87,"quality":80.8,"passed":true,"tokens":2798,"timeMs":70603,"cost":0.0295},{"modelName":"Grok 4.1 Fast","score":82.8,"functional":78.7,"quality":77.2,"passed":true,"tokens":3146,"timeMs":109792,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3608,"timeMs":66282,"cost":0.0203}]},{"taskId":"api-integrations/social/task-2","category":"api-integrations","subcategory":"social","results":[{"modelName":"GLM 4-Plus","score":93.7,"functional":89,"quality":81.3,"passed":true,"tokens":5216,"timeMs":89607,"cost":0.0058},{"modelName":"MiniMax M2.1","score":84.3,"functional":80.1,"quality":78.8,"passed":true,"tokens":16142,"timeMs":178983,"cost":0.0159},{"modelName":"GLM-4.7","score":84,"functional":79.8,"quality":79.3,"passed":true,"tokens":3311,"timeMs":66142,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":88.1,"functional":83.7,"quality":76.2,"passed":true,"tokens":1898,"timeMs":32104,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4381,"timeMs":37240,"cost":0.0614},{"modelName":"Claude Sonnet 4.5","score":90.1,"functional":85.6,"quality":80.2,"passed":true,"tokens":3460,"timeMs":33738,"cost":0.0449},{"modelName":"Claude Opus 4.5","score":93.6,"functional":88.9,"quality":81,"passed":true,"tokens":4173,"timeMs":31418,"cost":0.0798},{"modelName":"Claude Haiku 4.5","score":88.8,"functional":84.3,"quality":79.2,"passed":true,"tokens":4368,"timeMs":15439,"cost":0.0199},{"modelName":"DeepSeek v3.2","score":93.5,"functional":88.8,"quality":81.3,"passed":true,"tokens":2786,"timeMs":96731,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":85.8,"functional":81.5,"quality":78.4,"passed":true,"tokens":2982,"timeMs":34649,"cost":0.0298},{"modelName":"GLM 4.7 Flash","score":53.9,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.1,"functional":80.8,"quality":78.6,"passed":true,"tokens":3042,"timeMs":62660,"cost":0.0011},{"modelName":"Grok 4","score":89.2,"functional":84.7,"quality":80.1,"passed":true,"tokens":2661,"timeMs":53938,"cost":0.0309},{"modelName":"Grok 4.1 Fast","score":83.2,"functional":79,"quality":77.3,"passed":true,"tokens":3133,"timeMs":81782,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3608,"timeMs":66282,"cost":0.0203}]},{"taskId":"api-integrations/social/task-3","category":"api-integrations","subcategory":"social","results":[{"modelName":"GLM 4-Plus","score":94.2,"functional":89.5,"quality":81.5,"passed":true,"tokens":4383,"timeMs":89002,"cost":0.0056},{"modelName":"MiniMax M2.1","score":83.5,"functional":79.3,"quality":78.5,"passed":true,"tokens":14921,"timeMs":183948,"cost":0.0117},{"modelName":"GLM-4.7","score":86.5,"functional":82.1,"quality":80,"passed":true,"tokens":2783,"timeMs":45949,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":89.3,"functional":84.8,"quality":76.6,"passed":true,"tokens":2098,"timeMs":24286,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4381,"timeMs":37240,"cost":0.0614},{"modelName":"Claude Sonnet 4.5","score":92.4,"functional":87.8,"quality":80.8,"passed":true,"tokens":3391,"timeMs":48446,"cost":0.0367},{"modelName":"Claude Opus 4.5","score":91.4,"functional":86.8,"quality":80.4,"passed":true,"tokens":4209,"timeMs":45830,"cost":0.076},{"modelName":"Claude Haiku 4.5","score":86.6,"functional":82.3,"quality":78.5,"passed":true,"tokens":4267,"timeMs":24707,"cost":0.016},{"modelName":"DeepSeek v3.2","score":91.8,"functional":87.2,"quality":80.8,"passed":true,"tokens":3281,"timeMs":65410,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":87.8,"functional":83.4,"quality":79,"passed":true,"tokens":3210,"timeMs":32853,"cost":0.024},{"modelName":"GLM 4.7 Flash","score":53.9,"functional":0,"quality":80,"passed":false,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":84.9,"functional":80.6,"quality":78.5,"passed":true,"tokens":2700,"timeMs":60171,"cost":0.001},{"modelName":"Grok 4","score":86.8,"functional":82.4,"quality":79.3,"passed":true,"tokens":2559,"timeMs":71383,"cost":0.0297},{"modelName":"Grok 4.1 Fast","score":84.7,"functional":80.4,"quality":77.8,"passed":true,"tokens":3056,"timeMs":67232,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3608,"timeMs":66282,"cost":0.0203}]},{"taskId":"api-integrations/storage/task-1","category":"api-integrations","subcategory":"storage","results":[{"modelName":"GLM 4-Plus","score":93.5,"functional":88.8,"quality":81.3,"passed":true,"tokens":4884,"timeMs":88876,"cost":0.0048},{"modelName":"MiniMax M2.1","score":83.8,"functional":79.6,"quality":78.6,"passed":true,"tokens":18056,"timeMs":136221,"cost":0.0137},{"modelName":"GLM-4.7","score":88.6,"functional":84.1,"quality":80.7,"passed":true,"tokens":2924,"timeMs":65007,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":89.3,"functional":84.8,"quality":76.6,"passed":true,"tokens":1982,"timeMs":29451,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4386,"timeMs":36949,"cost":0.0614},{"modelName":"Claude Sonnet 4.5","score":94,"functional":89.3,"quality":81.3,"passed":true,"tokens":2873,"timeMs":31062,"cost":0.0391},{"modelName":"Claude Opus 4.5","score":88.9,"functional":84.5,"quality":79.6,"passed":true,"tokens":4143,"timeMs":33255,"cost":0.0626},{"modelName":"Claude Haiku 4.5","score":85.2,"functional":81,"quality":78.1,"passed":true,"tokens":5181,"timeMs":18562,"cost":0.0187},{"modelName":"DeepSeek v3.2","score":89.4,"functional":84.9,"quality":80.1,"passed":true,"tokens":2428,"timeMs":76972,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":90.3,"functional":85.7,"quality":79.7,"passed":true,"tokens":2242,"timeMs":20830,"cost":0.0272},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":85.9,"functional":81.6,"quality":78.8,"passed":true,"tokens":2317,"timeMs":63467,"cost":0.0013},{"modelName":"Grok 4","score":84.9,"functional":80.7,"quality":78.8,"passed":true,"tokens":2345,"timeMs":76866,"cost":0.0351},{"modelName":"Grok 4.1 Fast","score":86.9,"functional":82.6,"quality":78.4,"passed":true,"tokens":3034,"timeMs":113632,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4344,"timeMs":88543,"cost":0.0247}]},{"taskId":"api-integrations/storage/task-2","category":"api-integrations","subcategory":"storage","results":[{"modelName":"GLM 4-Plus","score":91.8,"functional":87.2,"quality":80.8,"passed":true,"tokens":5146,"timeMs":85564,"cost":0.0042},{"modelName":"MiniMax M2.1","score":85.3,"functional":81,"quality":79.1,"passed":true,"tokens":15464,"timeMs":145308,"cost":0.011},{"modelName":"GLM-4.7","score":89.7,"functional":85.3,"quality":81,"passed":true,"tokens":4089,"timeMs":65168,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":88.1,"functional":83.7,"quality":76.2,"passed":true,"tokens":2006,"timeMs":20892,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4386,"timeMs":36949,"cost":0.0614},{"modelName":"Claude Sonnet 4.5","score":94.6,"functional":89.8,"quality":81.5,"passed":true,"tokens":3382,"timeMs":50856,"cost":0.0353},{"modelName":"Claude Opus 4.5","score":86.8,"functional":82.4,"quality":79,"passed":true,"tokens":3108,"timeMs":54271,"cost":0.0672},{"modelName":"Claude Haiku 4.5","score":85,"functional":80.8,"quality":78.1,"passed":true,"tokens":3686,"timeMs":24179,"cost":0.0145},{"modelName":"DeepSeek v3.2","score":87,"functional":82.6,"quality":79.3,"passed":true,"tokens":2477,"timeMs":63041,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":92.6,"functional":88,"quality":80.4,"passed":true,"tokens":2741,"timeMs":31920,"cost":0.0317},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.9,"functional":83.5,"quality":79.4,"passed":true,"tokens":2596,"timeMs":85001,"cost":0.0014},{"modelName":"Grok 4","score":84,"functional":79.8,"quality":78.5,"passed":true,"tokens":2335,"timeMs":97495,"cost":0.0329},{"modelName":"Grok 4.1 Fast","score":89.4,"functional":84.9,"quality":79.2,"passed":true,"tokens":3323,"timeMs":77672,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4344,"timeMs":88543,"cost":0.0247}]},{"taskId":"api-integrations/storage/task-3","category":"api-integrations","subcategory":"storage","results":[{"modelName":"GLM 4-Plus","score":89.4,"functional":84.9,"quality":80.1,"passed":true,"tokens":4050,"timeMs":102319,"cost":0.0056},{"modelName":"MiniMax M2.1","score":87.5,"functional":83.1,"quality":79.7,"passed":true,"tokens":12545,"timeMs":183111,"cost":0.0149},{"modelName":"GLM-4.7","score":89.7,"functional":85.3,"quality":81,"passed":true,"tokens":3402,"timeMs":53175,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":86,"functional":81.7,"quality":75.6,"passed":true,"tokens":2201,"timeMs":34448,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4386,"timeMs":36949,"cost":0.0614},{"modelName":"Claude Sonnet 4.5","score":93.9,"functional":89.2,"quality":81.3,"passed":true,"tokens":2990,"timeMs":43773,"cost":0.0317},{"modelName":"Claude Opus 4.5","score":85.4,"functional":81.1,"quality":78.6,"passed":true,"tokens":4136,"timeMs":41561,"cost":0.0793},{"modelName":"Claude Haiku 4.5","score":86.1,"functional":81.8,"quality":78.4,"passed":true,"tokens":3725,"timeMs":20935,"cost":0.0186},{"modelName":"DeepSeek v3.2","score":85.1,"functional":80.8,"quality":78.8,"passed":true,"tokens":3609,"timeMs":106821,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":94.2,"functional":89.5,"quality":80.9,"passed":true,"tokens":3070,"timeMs":35599,"cost":0.0237},{"modelName":"GLM 4.7 Flash","score":85.2,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.3,"functional":85.8,"quality":80.2,"passed":true,"tokens":3203,"timeMs":71629,"cost":0.0011},{"modelName":"Grok 4","score":84.4,"functional":80.2,"quality":78.6,"passed":true,"tokens":2253,"timeMs":61378,"cost":0.0269},{"modelName":"Grok 4.1 Fast","score":91.5,"functional":86.9,"quality":79.8,"passed":true,"tokens":3203,"timeMs":105445,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4344,"timeMs":88543,"cost":0.0247}]},{"taskId":"api-integrations/stripe/task-1","category":"api-integrations","subcategory":"stripe","results":[{"modelName":"GLM 4-Plus","score":87,"functional":82.6,"quality":79.3,"passed":true,"tokens":3924,"timeMs":117192,"cost":0.0057},{"modelName":"MiniMax M2.1","score":90,"functional":85.5,"quality":80.5,"passed":true,"tokens":13766,"timeMs":207639,"cost":0.0154},{"modelName":"GLM-4.7","score":88.6,"functional":84.1,"quality":80.7,"passed":true,"tokens":3388,"timeMs":56464,"cost":0.0042},{"modelName":"Gemini 3 Flash","score":83.6,"functional":79.4,"quality":74.8,"passed":true,"tokens":2185,"timeMs":23845,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":40445,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":92.1,"functional":87.5,"quality":80.8,"passed":true,"tokens":2933,"timeMs":29447,"cost":0.0391},{"modelName":"Claude Opus 4.5","score":85.2,"functional":81,"quality":78.5,"passed":true,"tokens":2965,"timeMs":36191,"cost":0.0569},{"modelName":"Claude Haiku 4.5","score":88,"functional":83.6,"quality":79,"passed":true,"tokens":4970,"timeMs":24390,"cost":0.0144},{"modelName":"DeepSeek v3.2","score":84.2,"functional":80,"quality":78.5,"passed":true,"tokens":2981,"timeMs":78804,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":94.7,"functional":90,"quality":81.1,"passed":true,"tokens":2322,"timeMs":34444,"cost":0.0304},{"modelName":"GLM 4.7 Flash","score":83.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.6,"functional":88,"quality":80.8,"passed":true,"tokens":2368,"timeMs":60299,"cost":0.0014},{"modelName":"Grok 4","score":85.9,"functional":81.6,"quality":79.1,"passed":true,"tokens":2431,"timeMs":70857,"cost":0.0254},{"modelName":"Grok 4.1 Fast","score":92.6,"functional":88,"quality":80.2,"passed":true,"tokens":3628,"timeMs":107904,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3731,"timeMs":73731,"cost":0.0209}]},{"taskId":"api-integrations/stripe/task-2","category":"api-integrations","subcategory":"stripe","results":[{"modelName":"GLM 4-Plus","score":85.1,"functional":80.8,"quality":78.8,"passed":true,"tokens":4878,"timeMs":106406,"cost":0.0049},{"modelName":"MiniMax M2.1","score":92.1,"functional":87.5,"quality":81.1,"passed":true,"tokens":16843,"timeMs":165041,"cost":0.0151},{"modelName":"GLM-4.7","score":86.5,"functional":82.2,"quality":80,"passed":true,"tokens":4070,"timeMs":41809,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":81.3,"functional":77.2,"quality":74.2,"passed":true,"tokens":1824,"timeMs":19833,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":40445,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":89.8,"functional":85.3,"quality":80.1,"passed":true,"tokens":3913,"timeMs":53213,"cost":0.0454},{"modelName":"Claude Opus 4.5","score":86.2,"functional":81.9,"quality":78.8,"passed":true,"tokens":2970,"timeMs":37908,"cost":0.069},{"modelName":"Claude Haiku 4.5","score":90.5,"functional":86,"quality":79.7,"passed":true,"tokens":5157,"timeMs":21261,"cost":0.0202},{"modelName":"DeepSeek v3.2","score":84.6,"functional":80.3,"quality":78.6,"passed":true,"tokens":2847,"timeMs":109001,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":94.1,"functional":89.4,"quality":80.9,"passed":true,"tokens":2414,"timeMs":23284,"cost":0.0252},{"modelName":"GLM 4.7 Flash","score":83.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.3,"functional":89.5,"quality":81.3,"passed":true,"tokens":3035,"timeMs":53520,"cost":0.001},{"modelName":"Grok 4","score":88.1,"functional":83.7,"quality":79.7,"passed":true,"tokens":3069,"timeMs":93504,"cost":0.0362},{"modelName":"Grok 4.1 Fast","score":92.6,"functional":88,"quality":80.2,"passed":true,"tokens":3424,"timeMs":67586,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3731,"timeMs":73731,"cost":0.0209}]},{"taskId":"api-integrations/stripe/task-3","category":"api-integrations","subcategory":"stripe","results":[{"modelName":"GLM 4-Plus","score":84.2,"functional":80,"quality":78.5,"passed":true,"tokens":5240,"timeMs":80482,"cost":0.0047},{"modelName":"MiniMax M2.1","score":93.3,"functional":88.6,"quality":81.5,"passed":true,"tokens":12544,"timeMs":177525,"cost":0.0122},{"modelName":"GLM-4.7","score":84,"functional":79.8,"quality":79.3,"passed":true,"tokens":3300,"timeMs":58466,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":79.8,"functional":75.8,"quality":73.7,"passed":true,"tokens":2224,"timeMs":35193,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.2,"functional":85,"quality":80,"passed":true,"tokens":4408,"timeMs":40445,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":87.3,"functional":83,"quality":79.3,"passed":true,"tokens":3907,"timeMs":42803,"cost":0.0334},{"modelName":"Claude Opus 4.5","score":88.2,"functional":83.8,"quality":79.4,"passed":true,"tokens":3935,"timeMs":38497,"cost":0.0697},{"modelName":"Claude Haiku 4.5","score":92.8,"functional":88.2,"quality":80.4,"passed":true,"tokens":4073,"timeMs":21847,"cost":0.0176},{"modelName":"DeepSeek v3.2","score":86,"functional":81.7,"quality":79.1,"passed":true,"tokens":3267,"timeMs":111161,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":92.3,"functional":87.7,"quality":80.3,"passed":true,"tokens":2290,"timeMs":30307,"cost":0.0266},{"modelName":"GLM 4.7 Flash","score":83.8,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.8,"functional":90.1,"quality":81.5,"passed":true,"tokens":3140,"timeMs":67097,"cost":0.0011},{"modelName":"Grok 4","score":90.6,"functional":86,"quality":80.5,"passed":true,"tokens":3068,"timeMs":62927,"cost":0.0288},{"modelName":"Grok 4.1 Fast","score":91.5,"functional":86.9,"quality":79.8,"passed":true,"tokens":2583,"timeMs":67977,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.8,"functional":85,"quality":80,"passed":true,"tokens":3731,"timeMs":73731,"cost":0.0209}]},{"taskId":"api-integrations/advanced/task-1","category":"api-integrations","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":84.6,"functional":80.3,"quality":78.6,"passed":true,"tokens":4707,"timeMs":90012,"cost":0.0054},{"modelName":"MiniMax M2.1","score":93.3,"functional":88.6,"quality":81.5,"passed":true,"tokens":14321,"timeMs":198191,"cost":0.0138},{"modelName":"GLM-4.7","score":81.8,"functional":77.7,"quality":78.6,"passed":true,"tokens":3543,"timeMs":58010,"cost":0.004},{"modelName":"Gemini 3 Flash","score":79.5,"functional":75.5,"quality":73.6,"passed":true,"tokens":1879,"timeMs":27204,"cost":0.0052},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":43901,"cost":0.0592},{"modelName":"Claude Sonnet 4.5","score":85.5,"functional":81.2,"quality":78.8,"passed":true,"tokens":3679,"timeMs":53861,"cost":0.0366},{"modelName":"Claude Opus 4.5","score":90.7,"functional":86.1,"quality":80.2,"passed":true,"tokens":3628,"timeMs":56111,"cost":0.0772},{"modelName":"Claude Haiku 4.5","score":94.4,"functional":89.7,"quality":80.9,"passed":true,"tokens":4703,"timeMs":21011,"cost":0.0175},{"modelName":"DeepSeek v3.2","score":88.3,"functional":83.9,"quality":79.7,"passed":true,"tokens":3383,"timeMs":77637,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":89.9,"functional":85.5,"quality":79.6,"passed":true,"tokens":2425,"timeMs":25142,"cost":0.0326},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":94.1,"functional":89.4,"quality":81.3,"passed":true,"tokens":2532,"timeMs":76888,"cost":0.001},{"modelName":"Grok 4","score":92.7,"functional":88,"quality":81.1,"passed":true,"tokens":2929,"timeMs":66217,"cost":0.0343},{"modelName":"Grok 4.1 Fast","score":89.4,"functional":84.9,"quality":79.2,"passed":true,"tokens":3099,"timeMs":98459,"cost":0.0014},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":5915,"timeMs":118218,"cost":0.0348}]},{"taskId":"api-integrations/advanced/task-2","category":"api-integrations","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":86.1,"functional":81.7,"quality":79.1,"passed":true,"tokens":4756,"timeMs":98440,"cost":0.0056},{"modelName":"MiniMax M2.1","score":92.1,"functional":87.5,"quality":81.1,"passed":true,"tokens":13570,"timeMs":178064,"cost":0.0134},{"modelName":"GLM-4.7","score":80.3,"functional":76.3,"quality":78.2,"passed":true,"tokens":2796,"timeMs":41063,"cost":0.0037},{"modelName":"Gemini 3 Flash","score":80.3,"functional":76.3,"quality":73.9,"passed":true,"tokens":2069,"timeMs":25330,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":43901,"cost":0.0592},{"modelName":"Claude Sonnet 4.5","score":84.6,"functional":80.4,"quality":78.5,"passed":true,"tokens":3140,"timeMs":40262,"cost":0.0349},{"modelName":"Claude Opus 4.5","score":93,"functional":88.3,"quality":80.8,"passed":true,"tokens":4109,"timeMs":42660,"cost":0.0792},{"modelName":"Claude Haiku 4.5","score":95,"functional":90.2,"quality":81.1,"passed":true,"tokens":4426,"timeMs":15793,"cost":0.0167},{"modelName":"DeepSeek v3.2","score":90.8,"functional":86.2,"quality":80.5,"passed":true,"tokens":3361,"timeMs":96700,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":87.5,"functional":83.2,"quality":78.9,"passed":true,"tokens":2819,"timeMs":26733,"cost":0.0264},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":92.4,"functional":87.8,"quality":80.8,"passed":true,"tokens":2404,"timeMs":74115,"cost":0.0009},{"modelName":"Grok 4","score":93.8,"functional":89.1,"quality":81.5,"passed":true,"tokens":2316,"timeMs":78963,"cost":0.0315},{"modelName":"Grok 4.1 Fast","score":86.9,"functional":82.6,"quality":78.4,"passed":true,"tokens":3586,"timeMs":112785,"cost":0.0012},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":5915,"timeMs":118218,"cost":0.0348}]},{"taskId":"api-integrations/advanced/task-3","category":"api-integrations","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":88.3,"functional":83.9,"quality":79.7,"passed":true,"tokens":3642,"timeMs":70494,"cost":0.0054},{"modelName":"MiniMax M2.1","score":90,"functional":85.5,"quality":80.5,"passed":true,"tokens":18324,"timeMs":185722,"cost":0.0149},{"modelName":"GLM-4.7","score":79.9,"functional":75.9,"quality":78.1,"passed":true,"tokens":4148,"timeMs":45870,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":82.2,"functional":78.1,"quality":74.4,"passed":true,"tokens":1919,"timeMs":32938,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":89.5,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":43901,"cost":0.0592},{"modelName":"Claude Sonnet 4.5","score":84.9,"functional":80.7,"quality":78.6,"passed":true,"tokens":3257,"timeMs":34545,"cost":0.0344},{"modelName":"Claude Opus 4.5","score":94.6,"functional":89.9,"quality":81.3,"passed":true,"tokens":3444,"timeMs":46565,"cost":0.0647},{"modelName":"Claude Haiku 4.5","score":94.3,"functional":89.6,"quality":80.9,"passed":true,"tokens":3800,"timeMs":16951,"cost":0.0197},{"modelName":"DeepSeek v3.2","score":92.8,"functional":88.2,"quality":81.1,"passed":true,"tokens":2510,"timeMs":113644,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":85.7,"functional":81.4,"quality":78.3,"passed":true,"tokens":2866,"timeMs":30335,"cost":0.0273},{"modelName":"GLM 4.7 Flash","score":89.9,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90,"functional":85.5,"quality":80.1,"passed":true,"tokens":2984,"timeMs":81939,"cost":0.0014},{"modelName":"Grok 4","score":93.8,"functional":89.2,"quality":81.5,"passed":true,"tokens":2593,"timeMs":82219,"cost":0.0257},{"modelName":"Grok 4.1 Fast","score":84.7,"functional":80.4,"quality":77.8,"passed":true,"tokens":3772,"timeMs":98069,"cost":0.0013},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":5915,"timeMs":118218,"cost":0.0348}]},{"taskId":"code-evolution/legacy-migration/task-1","category":"code-evolution","subcategory":"legacy-migration","results":[{"modelName":"GLM 4-Plus","score":86.8,"functional":82.4,"quality":80.5,"passed":true,"tokens":3769,"timeMs":123109,"cost":0.0055},{"modelName":"MiniMax M2.1","score":83.5,"functional":79.4,"quality":79.7,"passed":true,"tokens":14264,"timeMs":123048,"cost":0.0149},{"modelName":"GLM-4.7","score":76.8,"functional":73,"quality":78.3,"passed":true,"tokens":3912,"timeMs":66526,"cost":0.0034},{"modelName":"Gemini 3 Flash","score":80.6,"functional":76.6,"quality":75.2,"passed":true,"tokens":2304,"timeMs":24611,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3819,"timeMs":35065,"cost":0.0535},{"modelName":"Claude Sonnet 4.5","score":82.4,"functional":78.3,"quality":79.1,"passed":true,"tokens":3985,"timeMs":40787,"cost":0.0334},{"modelName":"Claude Opus 4.5","score":91.1,"functional":86.6,"quality":81.5,"passed":true,"tokens":3198,"timeMs":54749,"cost":0.0581},{"modelName":"Claude Haiku 4.5","score":88.5,"functional":84.1,"quality":80.3,"passed":true,"tokens":5206,"timeMs":17487,"cost":0.0147},{"modelName":"DeepSeek v3.2","score":90,"functional":85.5,"quality":81.5,"passed":true,"tokens":2427,"timeMs":93515,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":80.8,"functional":76.7,"quality":78.1,"passed":true,"tokens":3200,"timeMs":28549,"cost":0.0284},{"modelName":"GLM 4.7 Flash","score":87,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83.6,"functional":79.4,"quality":79.3,"passed":true,"tokens":2788,"timeMs":55308,"cost":0.0013},{"modelName":"Grok 4","score":88.7,"functional":84.2,"quality":81.1,"passed":true,"tokens":2387,"timeMs":63144,"cost":0.0361},{"modelName":"Grok 4.1 Fast","score":79.2,"functional":75.2,"quality":77.3,"passed":true,"tokens":3651,"timeMs":67606,"cost":0.0015},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2779,"timeMs":54634,"cost":0.0151}]},{"taskId":"code-evolution/legacy-migration/task-2","category":"code-evolution","subcategory":"legacy-migration","results":[{"modelName":"GLM 4-Plus","score":88.9,"functional":84.4,"quality":81.1,"passed":true,"tokens":3857,"timeMs":116990,"cost":0.0042},{"modelName":"MiniMax M2.1","score":81.3,"functional":77.2,"quality":79.1,"passed":true,"tokens":13392,"timeMs":135392,"cost":0.0127},{"modelName":"GLM-4.7","score":78.7,"functional":74.7,"quality":78.9,"passed":true,"tokens":2900,"timeMs":55798,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":83,"functional":78.8,"quality":75.9,"passed":true,"tokens":2541,"timeMs":21140,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3819,"timeMs":35065,"cost":0.0535},{"modelName":"Claude Sonnet 4.5","score":84.7,"functional":80.4,"quality":79.7,"passed":true,"tokens":3557,"timeMs":46966,"cost":0.0377},{"modelName":"Claude Opus 4.5","score":90.5,"functional":85.9,"quality":81.3,"passed":true,"tokens":2939,"timeMs":38094,"cost":0.0757},{"modelName":"Claude Haiku 4.5","score":86.2,"functional":81.9,"quality":79.6,"passed":true,"tokens":4597,"timeMs":18464,"cost":0.0165},{"modelName":"DeepSeek v3.2","score":90,"functional":85.5,"quality":81.5,"passed":true,"tokens":2989,"timeMs":88721,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":81.1,"functional":77.1,"quality":78.2,"passed":true,"tokens":2910,"timeMs":25092,"cost":0.0318},{"modelName":"GLM 4.7 Flash","score":87,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.7,"functional":77.6,"quality":78.8,"passed":true,"tokens":2503,"timeMs":72801,"cost":0.0011},{"modelName":"Grok 4","score":86.6,"functional":82.3,"quality":80.5,"passed":true,"tokens":2857,"timeMs":86404,"cost":0.0255},{"modelName":"Grok 4.1 Fast","score":78.8,"functional":74.9,"quality":77.2,"passed":true,"tokens":2815,"timeMs":70370,"cost":0.0014},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2779,"timeMs":54634,"cost":0.0151}]},{"taskId":"code-evolution/legacy-migration/task-3","category":"code-evolution","subcategory":"legacy-migration","results":[{"modelName":"GLM 4-Plus","score":90,"functional":85.5,"quality":81.5,"passed":true,"tokens":4113,"timeMs":121990,"cost":0.0047},{"modelName":"MiniMax M2.1","score":79.8,"functional":75.8,"quality":78.6,"passed":true,"tokens":13125,"timeMs":177645,"cost":0.0116},{"modelName":"GLM-4.7","score":81.1,"functional":77,"quality":79.6,"passed":true,"tokens":3316,"timeMs":72617,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":84.7,"functional":80.5,"quality":76.4,"passed":true,"tokens":2304,"timeMs":22110,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3819,"timeMs":35065,"cost":0.0535},{"modelName":"Claude Sonnet 4.5","score":87.1,"functional":82.8,"quality":80.5,"passed":true,"tokens":3374,"timeMs":36215,"cost":0.0413},{"modelName":"Claude Opus 4.5","score":88.7,"functional":84.3,"quality":80.8,"passed":true,"tokens":4099,"timeMs":39553,"cost":0.07},{"modelName":"Claude Haiku 4.5","score":83.7,"functional":79.6,"quality":78.9,"passed":true,"tokens":4873,"timeMs":24649,"cost":0.0173},{"modelName":"DeepSeek v3.2","score":88.9,"functional":84.4,"quality":81.1,"passed":true,"tokens":3040,"timeMs":89219,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":82.6,"functional":78.5,"quality":78.6,"passed":true,"tokens":2319,"timeMs":22760,"cost":0.033},{"modelName":"GLM 4.7 Flash","score":87,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":80.8,"functional":76.8,"quality":78.5,"passed":true,"tokens":2872,"timeMs":88125,"cost":0.001},{"modelName":"Grok 4","score":84.1,"functional":79.9,"quality":79.7,"passed":true,"tokens":2141,"timeMs":83239,"cost":0.0252},{"modelName":"Grok 4.1 Fast","score":79.7,"functional":75.7,"quality":77.5,"passed":true,"tokens":3575,"timeMs":90858,"cost":0.0014},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2779,"timeMs":54634,"cost":0.0151}]},{"taskId":"code-evolution/legacy-migration/task-4","category":"code-evolution","subcategory":"legacy-migration","results":[{"modelName":"GLM 4-Plus","score":90,"functional":85.5,"quality":81.5,"passed":true,"tokens":4816,"timeMs":112252,"cost":0.0059},{"modelName":"MiniMax M2.1","score":79.4,"functional":75.5,"quality":78.5,"passed":true,"tokens":12397,"timeMs":138895,"cost":0.0126},{"modelName":"GLM-4.7","score":83.5,"functional":79.3,"quality":80.3,"passed":true,"tokens":4007,"timeMs":49509,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":85.4,"functional":81.2,"quality":76.6,"passed":true,"tokens":2197,"timeMs":21668,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3819,"timeMs":35065,"cost":0.0535},{"modelName":"Claude Sonnet 4.5","score":89.2,"functional":84.8,"quality":81.1,"passed":true,"tokens":3048,"timeMs":42353,"cost":0.0434},{"modelName":"Claude Opus 4.5","score":86.3,"functional":82,"quality":80.1,"passed":true,"tokens":3267,"timeMs":51906,"cost":0.0585},{"modelName":"Claude Haiku 4.5","score":81.9,"functional":77.8,"quality":78.3,"passed":true,"tokens":3659,"timeMs":19829,"cost":0.0137},{"modelName":"DeepSeek v3.2","score":86.8,"functional":82.4,"quality":80.5,"passed":true,"tokens":3290,"timeMs":111941,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":84.8,"functional":80.6,"quality":79.3,"passed":true,"tokens":3180,"timeMs":19689,"cost":0.0263},{"modelName":"GLM 4.7 Flash","score":87,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.2,"functional":77.1,"quality":78.6,"passed":true,"tokens":3031,"timeMs":89591,"cost":0.001},{"modelName":"Grok 4","score":81.9,"functional":77.8,"quality":79.1,"passed":true,"tokens":3040,"timeMs":63667,"cost":0.032},{"modelName":"Grok 4.1 Fast","score":81.6,"functional":77.5,"quality":78,"passed":true,"tokens":3282,"timeMs":99253,"cost":0.0011},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2779,"timeMs":54634,"cost":0.0151}]},{"taskId":"code-evolution/legacy-migration/task-5","category":"code-evolution","subcategory":"legacy-migration","results":[{"modelName":"GLM 4-Plus","score":88.9,"functional":84.4,"quality":81.1,"passed":true,"tokens":3920,"timeMs":91296,"cost":0.0044},{"modelName":"MiniMax M2.1","score":80.3,"functional":76.3,"quality":78.8,"passed":true,"tokens":13639,"timeMs":165903,"cost":0.0143},{"modelName":"GLM-4.7","score":85.2,"functional":80.9,"quality":80.9,"passed":true,"tokens":3327,"timeMs":40819,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":84.9,"functional":80.7,"quality":76.5,"passed":true,"tokens":2190,"timeMs":29353,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3819,"timeMs":35065,"cost":0.0535},{"modelName":"Claude Sonnet 4.5","score":90.4,"functional":85.9,"quality":81.5,"passed":true,"tokens":3562,"timeMs":48535,"cost":0.0436},{"modelName":"Claude Opus 4.5","score":83.9,"functional":79.7,"quality":79.3,"passed":true,"tokens":3171,"timeMs":37996,"cost":0.0708},{"modelName":"Claude Haiku 4.5","score":81,"functional":77,"quality":78.1,"passed":true,"tokens":3812,"timeMs":26470,"cost":0.015},{"modelName":"DeepSeek v3.2","score":84.3,"functional":80.1,"quality":79.7,"passed":true,"tokens":3051,"timeMs":109932,"cost":0.0025},{"modelName":"OpenAI GPT-5.2","score":87.3,"functional":83,"quality":80,"passed":true,"tokens":2877,"timeMs":27192,"cost":0.0317},{"modelName":"GLM 4.7 Flash","score":87,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":82.7,"functional":78.5,"quality":79.1,"passed":true,"tokens":2484,"timeMs":73306,"cost":0.0014},{"modelName":"Grok 4","score":80.4,"functional":76.4,"quality":78.6,"passed":true,"tokens":3151,"timeMs":94062,"cost":0.0288},{"modelName":"Grok 4.1 Fast","score":84,"functional":79.8,"quality":78.8,"passed":true,"tokens":2872,"timeMs":89209,"cost":0.0014},{"modelName":"Qwen3 Max","score":89.1,"functional":85,"quality":80,"passed":true,"tokens":2779,"timeMs":54634,"cost":0.0151}]},{"taskId":"code-evolution/performance/task-1","category":"code-evolution","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":86.8,"functional":82.4,"quality":80.5,"passed":true,"tokens":4921,"timeMs":71338,"cost":0.0058},{"modelName":"MiniMax M2.1","score":82.2,"functional":78.1,"quality":79.3,"passed":true,"tokens":14183,"timeMs":186026,"cost":0.0145},{"modelName":"GLM-4.7","score":85.9,"functional":81.6,"quality":81.1,"passed":true,"tokens":4084,"timeMs":73799,"cost":0.0036},{"modelName":"Gemini 3 Flash","score":83.3,"functional":79.1,"quality":76,"passed":true,"tokens":2149,"timeMs":24544,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4406,"timeMs":38931,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":90.4,"functional":85.9,"quality":81.5,"passed":true,"tokens":3207,"timeMs":29601,"cost":0.032},{"modelName":"Claude Opus 4.5","score":82.1,"functional":78,"quality":78.8,"passed":true,"tokens":3705,"timeMs":49495,"cost":0.0769},{"modelName":"Claude Haiku 4.5","score":81.3,"functional":77.3,"quality":78.2,"passed":true,"tokens":4401,"timeMs":15480,"cost":0.0156},{"modelName":"DeepSeek v3.2","score":82.1,"functional":78,"quality":79.1,"passed":true,"tokens":3559,"timeMs":78007,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":89.4,"functional":84.9,"quality":80.7,"passed":true,"tokens":2487,"timeMs":20693,"cost":0.025},{"modelName":"GLM 4.7 Flash","score":86.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":84.9,"functional":80.7,"quality":79.7,"passed":true,"tokens":2554,"timeMs":88647,"cost":0.0011},{"modelName":"Grok 4","score":80,"functional":76,"quality":78.5,"passed":true,"tokens":2185,"timeMs":62001,"cost":0.0339},{"modelName":"Grok 4.1 Fast","score":86.4,"functional":82,"quality":79.5,"passed":true,"tokens":3820,"timeMs":71429,"cost":0.0015},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":5190,"timeMs":88945,"cost":0.0296}]},{"taskId":"code-evolution/performance/task-2","category":"code-evolution","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":84.3,"functional":80.1,"quality":79.7,"passed":true,"tokens":4879,"timeMs":80965,"cost":0.0048},{"modelName":"MiniMax M2.1","score":84.6,"functional":80.4,"quality":80.1,"passed":true,"tokens":16705,"timeMs":200830,"cost":0.0131},{"modelName":"GLM-4.7","score":85.4,"functional":81.1,"quality":80.9,"passed":true,"tokens":3946,"timeMs":49226,"cost":0.0045},{"modelName":"Gemini 3 Flash","score":81,"functional":76.9,"quality":75.3,"passed":true,"tokens":2218,"timeMs":34686,"cost":0.004},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4406,"timeMs":38931,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":89.2,"functional":84.8,"quality":81.1,"passed":true,"tokens":3608,"timeMs":40334,"cost":0.0411},{"modelName":"Claude Opus 4.5","score":81.2,"functional":77.1,"quality":78.5,"passed":true,"tokens":3491,"timeMs":44253,"cost":0.0608},{"modelName":"Claude Haiku 4.5","score":82.8,"functional":78.7,"quality":78.6,"passed":true,"tokens":3976,"timeMs":22208,"cost":0.0139},{"modelName":"DeepSeek v3.2","score":80.6,"functional":76.5,"quality":78.6,"passed":true,"tokens":2964,"timeMs":62991,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":90.6,"functional":86.1,"quality":81,"passed":true,"tokens":3007,"timeMs":28039,"cost":0.0279},{"modelName":"GLM 4.7 Flash","score":86.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.4,"functional":83,"quality":80.5,"passed":true,"tokens":2786,"timeMs":63813,"cost":0.0013},{"modelName":"Grok 4","score":80.9,"functional":76.8,"quality":78.8,"passed":true,"tokens":2390,"timeMs":88071,"cost":0.0248},{"modelName":"Grok 4.1 Fast","score":88.1,"functional":83.7,"quality":80,"passed":true,"tokens":3721,"timeMs":90582,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":5190,"timeMs":88945,"cost":0.0296}]},{"taskId":"code-evolution/performance/task-3","category":"code-evolution","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":82.1,"functional":78,"quality":79.1,"passed":true,"tokens":5155,"timeMs":116501,"cost":0.0042},{"modelName":"MiniMax M2.1","score":87,"functional":82.6,"quality":80.8,"passed":true,"tokens":15924,"timeMs":162127,"cost":0.0154},{"modelName":"GLM-4.7","score":83.7,"functional":79.6,"quality":80.4,"passed":true,"tokens":2774,"timeMs":64075,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":78.5,"functional":74.6,"quality":74.5,"passed":true,"tokens":1802,"timeMs":32259,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4406,"timeMs":38931,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":87.1,"functional":82.8,"quality":80.5,"passed":true,"tokens":3340,"timeMs":34023,"cost":0.0378},{"modelName":"Claude Opus 4.5","score":81.5,"functional":77.4,"quality":78.6,"passed":true,"tokens":3140,"timeMs":34914,"cost":0.0622},{"modelName":"Claude Haiku 4.5","score":85.1,"functional":80.8,"quality":79.3,"passed":true,"tokens":5022,"timeMs":19477,"cost":0.0161},{"modelName":"DeepSeek v3.2","score":80.2,"functional":76.2,"quality":78.5,"passed":true,"tokens":2623,"timeMs":107672,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":90.6,"functional":86.1,"quality":81,"passed":true,"tokens":3079,"timeMs":34544,"cost":0.0315},{"modelName":"GLM 4.7 Flash","score":86.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.5,"functional":85,"quality":81.1,"passed":true,"tokens":3033,"timeMs":85248,"cost":0.0014},{"modelName":"Grok 4","score":82.8,"functional":78.6,"quality":79.3,"passed":true,"tokens":3025,"timeMs":66736,"cost":0.0339},{"modelName":"Grok 4.1 Fast","score":88.8,"functional":84.4,"quality":80.2,"passed":true,"tokens":3473,"timeMs":79285,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":5190,"timeMs":88945,"cost":0.0296}]},{"taskId":"code-evolution/performance/task-4","category":"code-evolution","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":80.6,"functional":76.6,"quality":78.6,"passed":true,"tokens":4369,"timeMs":116002,"cost":0.006},{"modelName":"MiniMax M2.1","score":88.7,"functional":84.3,"quality":81.3,"passed":true,"tokens":17224,"timeMs":153491,"cost":0.0135},{"modelName":"GLM-4.7","score":81.4,"functional":77.4,"quality":79.7,"passed":true,"tokens":3381,"timeMs":58976,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":76.5,"functional":72.7,"quality":73.9,"passed":true,"tokens":2038,"timeMs":31690,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4406,"timeMs":38931,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":84.7,"functional":80.4,"quality":79.7,"passed":true,"tokens":3716,"timeMs":47992,"cost":0.0343},{"modelName":"Claude Opus 4.5","score":83,"functional":78.9,"quality":79.1,"passed":true,"tokens":4122,"timeMs":34565,"cost":0.0689},{"modelName":"Claude Haiku 4.5","score":87.5,"functional":83.2,"quality":80,"passed":true,"tokens":4826,"timeMs":27648,"cost":0.0199},{"modelName":"DeepSeek v3.2","score":81.1,"functional":77,"quality":78.8,"passed":true,"tokens":3566,"timeMs":103232,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":89.4,"functional":84.9,"quality":80.7,"passed":true,"tokens":2688,"timeMs":21555,"cost":0.0311},{"modelName":"GLM 4.7 Flash","score":86.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.6,"functional":86.1,"quality":81.5,"passed":true,"tokens":2916,"timeMs":60083,"cost":0.0013},{"modelName":"Grok 4","score":85.2,"functional":80.9,"quality":80.1,"passed":true,"tokens":2208,"timeMs":55255,"cost":0.0363},{"modelName":"Grok 4.1 Fast","score":88.3,"functional":83.9,"quality":80,"passed":true,"tokens":2679,"timeMs":82608,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":5190,"timeMs":88945,"cost":0.0296}]},{"taskId":"code-evolution/performance/task-5","category":"code-evolution","subcategory":"performance","results":[{"modelName":"GLM 4-Plus","score":80.2,"functional":76.2,"quality":78.5,"passed":true,"tokens":4601,"timeMs":106958,"cost":0.0062},{"modelName":"MiniMax M2.1","score":89.4,"functional":84.9,"quality":81.5,"passed":true,"tokens":12623,"timeMs":181807,"cost":0.0152},{"modelName":"GLM-4.7","score":79,"functional":75,"quality":79,"passed":true,"tokens":3741,"timeMs":68870,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":75.5,"functional":71.7,"quality":73.6,"passed":true,"tokens":2325,"timeMs":30758,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4406,"timeMs":38931,"cost":0.0617},{"modelName":"Claude Sonnet 4.5","score":82.4,"functional":78.3,"quality":79.1,"passed":true,"tokens":3139,"timeMs":35797,"cost":0.0405},{"modelName":"Claude Opus 4.5","score":85.2,"functional":81,"quality":79.7,"passed":true,"tokens":4241,"timeMs":36667,"cost":0.0635},{"modelName":"Claude Haiku 4.5","score":89.6,"functional":85.1,"quality":80.7,"passed":true,"tokens":4893,"timeMs":21606,"cost":0.0186},{"modelName":"DeepSeek v3.2","score":82.9,"functional":78.8,"quality":79.3,"passed":true,"tokens":2652,"timeMs":78586,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":87.3,"functional":83,"quality":80,"passed":true,"tokens":3096,"timeMs":20707,"cost":0.0312},{"modelName":"GLM 4.7 Flash","score":86.1,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.6,"functional":86.1,"quality":81.5,"passed":true,"tokens":2906,"timeMs":84031,"cost":0.0011},{"modelName":"Grok 4","score":87.6,"functional":83.2,"quality":80.8,"passed":true,"tokens":2603,"timeMs":56124,"cost":0.0349},{"modelName":"Grok 4.1 Fast","score":86.6,"functional":82.3,"quality":79.6,"passed":true,"tokens":3305,"timeMs":66004,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":5190,"timeMs":88945,"cost":0.0296}]},{"taskId":"code-evolution/refactoring/task-1","category":"code-evolution","subcategory":"refactoring","results":[{"modelName":"GLM 4-Plus","score":81.1,"functional":77,"quality":78.8,"passed":true,"tokens":3777,"timeMs":96857,"cost":0.0045},{"modelName":"MiniMax M2.1","score":88.9,"functional":84.4,"quality":81.3,"passed":true,"tokens":17274,"timeMs":188838,"cost":0.011},{"modelName":"GLM-4.7","score":77,"functional":73.2,"quality":78.4,"passed":true,"tokens":2906,"timeMs":53242,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":75.7,"functional":71.9,"quality":73.7,"passed":true,"tokens":2153,"timeMs":22773,"cost":0.0051},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4743,"timeMs":38594,"cost":0.0664},{"modelName":"Claude Sonnet 4.5","score":80.9,"functional":76.9,"quality":78.6,"passed":true,"tokens":2924,"timeMs":33986,"cost":0.0406},{"modelName":"Claude Opus 4.5","score":87.7,"functional":83.3,"quality":80.5,"passed":true,"tokens":2913,"timeMs":52740,"cost":0.0589},{"modelName":"Claude Haiku 4.5","score":90.8,"functional":86.3,"quality":81,"passed":true,"tokens":4840,"timeMs":26219,"cost":0.0183},{"modelName":"DeepSeek v3.2","score":85.4,"functional":81.1,"quality":80.1,"passed":true,"tokens":3130,"timeMs":105758,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":84.9,"functional":80.6,"quality":79.3,"passed":true,"tokens":2164,"timeMs":33290,"cost":0.0223},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":89.5,"functional":85,"quality":81.1,"passed":true,"tokens":3347,"timeMs":58041,"cost":0.0011},{"modelName":"Grok 4","score":89.3,"functional":84.8,"quality":81.3,"passed":true,"tokens":2230,"timeMs":53504,"cost":0.0326},{"modelName":"Grok 4.1 Fast","score":84.3,"functional":80.1,"quality":78.9,"passed":true,"tokens":3612,"timeMs":109247,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":77256,"cost":0.0224}]},{"taskId":"code-evolution/refactoring/task-2","category":"code-evolution","subcategory":"refactoring","results":[{"modelName":"GLM 4-Plus","score":83,"functional":78.8,"quality":79.3,"passed":true,"tokens":4959,"timeMs":95743,"cost":0.0059},{"modelName":"MiniMax M2.1","score":87.3,"functional":82.9,"quality":80.9,"passed":true,"tokens":14551,"timeMs":186312,"cost":0.0146},{"modelName":"GLM-4.7","score":76,"functional":72.2,"quality":78.1,"passed":true,"tokens":2988,"timeMs":72608,"cost":0.0046},{"modelName":"Gemini 3 Flash","score":77,"functional":73.2,"quality":74.1,"passed":true,"tokens":1869,"timeMs":20383,"cost":0.0055},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4743,"timeMs":38594,"cost":0.0664},{"modelName":"Claude Sonnet 4.5","score":80.6,"functional":76.6,"quality":78.5,"passed":true,"tokens":4028,"timeMs":44302,"cost":0.0332},{"modelName":"Claude Opus 4.5","score":89.8,"functional":85.3,"quality":81.1,"passed":true,"tokens":4111,"timeMs":32259,"cost":0.0805},{"modelName":"Claude Haiku 4.5","score":90.8,"functional":86.3,"quality":81,"passed":true,"tokens":4550,"timeMs":24877,"cost":0.0154},{"modelName":"DeepSeek v3.2","score":87.7,"functional":83.4,"quality":80.8,"passed":true,"tokens":2832,"timeMs":64388,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":82.6,"functional":78.5,"quality":78.6,"passed":true,"tokens":2507,"timeMs":34254,"cost":0.032},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":87.4,"functional":83,"quality":80.5,"passed":true,"tokens":2349,"timeMs":52063,"cost":0.0013},{"modelName":"Grok 4","score":90,"functional":85.5,"quality":81.5,"passed":true,"tokens":3135,"timeMs":55618,"cost":0.027},{"modelName":"Grok 4.1 Fast","score":81.9,"functional":77.8,"quality":78.1,"passed":true,"tokens":3246,"timeMs":76540,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":77256,"cost":0.0224}]},{"taskId":"code-evolution/refactoring/task-3","category":"code-evolution","subcategory":"refactoring","results":[{"modelName":"GLM 4-Plus","score":85.4,"functional":81.1,"quality":80.1,"passed":true,"tokens":4999,"timeMs":119465,"cost":0.0046},{"modelName":"MiniMax M2.1","score":84.9,"functional":80.7,"quality":80.2,"passed":true,"tokens":17098,"timeMs":130597,"cost":0.0122},{"modelName":"GLM-4.7","score":76.2,"functional":72.4,"quality":78.1,"passed":true,"tokens":3192,"timeMs":52268,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":79.2,"functional":75.3,"quality":74.7,"passed":true,"tokens":1812,"timeMs":30790,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4743,"timeMs":38594,"cost":0.0664},{"modelName":"Claude Sonnet 4.5","score":81.5,"functional":77.4,"quality":78.8,"passed":true,"tokens":3988,"timeMs":52616,"cost":0.0393},{"modelName":"Claude Opus 4.5","score":91,"functional":86.4,"quality":81.5,"passed":true,"tokens":3413,"timeMs":54845,"cost":0.0708},{"modelName":"Claude Haiku 4.5","score":89.6,"functional":85.2,"quality":80.7,"passed":true,"tokens":4071,"timeMs":26776,"cost":0.0165},{"modelName":"DeepSeek v3.2","score":89.5,"functional":85,"quality":81.3,"passed":true,"tokens":2748,"timeMs":80899,"cost":0.0026},{"modelName":"OpenAI GPT-5.2","score":81.1,"functional":77.1,"quality":78.2,"passed":true,"tokens":2460,"timeMs":26793,"cost":0.0333},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":84.9,"functional":80.7,"quality":79.7,"passed":true,"tokens":3178,"timeMs":83129,"cost":0.0012},{"modelName":"Grok 4","score":89.5,"functional":85,"quality":81.3,"passed":true,"tokens":2214,"timeMs":61513,"cost":0.0347},{"modelName":"Grok 4.1 Fast","score":79.9,"functional":75.9,"quality":77.5,"passed":true,"tokens":3380,"timeMs":83638,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":77256,"cost":0.0224}]},{"taskId":"code-evolution/refactoring/task-4","category":"code-evolution","subcategory":"refactoring","results":[{"modelName":"GLM 4-Plus","score":87.8,"functional":83.4,"quality":80.8,"passed":true,"tokens":4536,"timeMs":93521,"cost":0.0046},{"modelName":"MiniMax M2.1","score":82.5,"functional":78.4,"quality":79.4,"passed":true,"tokens":18116,"timeMs":156331,"cost":0.0139},{"modelName":"GLM-4.7","score":77.5,"functional":73.6,"quality":78.5,"passed":true,"tokens":2978,"timeMs":58808,"cost":0.0039},{"modelName":"Gemini 3 Flash","score":81.7,"functional":77.6,"quality":75.5,"passed":true,"tokens":2428,"timeMs":25929,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4743,"timeMs":38594,"cost":0.0664},{"modelName":"Claude Sonnet 4.5","score":83.3,"functional":79.2,"quality":79.3,"passed":true,"tokens":2913,"timeMs":49505,"cost":0.046},{"modelName":"Claude Opus 4.5","score":91,"functional":86.4,"quality":81.5,"passed":true,"tokens":3464,"timeMs":34800,"cost":0.0548},{"modelName":"Claude Haiku 4.5","score":87.6,"functional":83.2,"quality":80,"passed":true,"tokens":4367,"timeMs":20235,"cost":0.019},{"modelName":"DeepSeek v3.2","score":90.2,"functional":85.7,"quality":81.5,"passed":true,"tokens":3069,"timeMs":112569,"cost":0.0024},{"modelName":"OpenAI GPT-5.2","score":80.8,"functional":76.7,"quality":78.1,"passed":true,"tokens":2297,"timeMs":35786,"cost":0.0226},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":82.7,"functional":78.5,"quality":79.1,"passed":true,"tokens":2446,"timeMs":80315,"cost":0.0014},{"modelName":"Grok 4","score":87.8,"functional":83.5,"quality":80.9,"passed":true,"tokens":2308,"timeMs":85606,"cost":0.0319},{"modelName":"Grok 4.1 Fast","score":78.9,"functional":74.9,"quality":77.2,"passed":true,"tokens":2861,"timeMs":111191,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":77256,"cost":0.0224}]},{"taskId":"code-evolution/refactoring/task-5","category":"code-evolution","subcategory":"refactoring","results":[{"modelName":"GLM 4-Plus","score":89.5,"functional":85,"quality":81.3,"passed":true,"tokens":4660,"timeMs":99332,"cost":0.0057},{"modelName":"MiniMax M2.1","score":80.5,"functional":76.5,"quality":78.8,"passed":true,"tokens":17407,"timeMs":129738,"cost":0.0123},{"modelName":"GLM-4.7","score":79.7,"functional":75.7,"quality":79.2,"passed":true,"tokens":3759,"timeMs":67917,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":83.9,"functional":79.7,"quality":76.1,"passed":true,"tokens":2130,"timeMs":19659,"cost":0.0046},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":4743,"timeMs":38594,"cost":0.0664},{"modelName":"Claude Sonnet 4.5","score":85.7,"functional":81.5,"quality":80.1,"passed":true,"tokens":3856,"timeMs":29593,"cost":0.0459},{"modelName":"Claude Opus 4.5","score":89.8,"functional":85.3,"quality":81.1,"passed":true,"tokens":3570,"timeMs":42636,"cost":0.0632},{"modelName":"Claude Haiku 4.5","score":85.1,"functional":80.8,"quality":79.3,"passed":true,"tokens":4878,"timeMs":20344,"cost":0.0174},{"modelName":"DeepSeek v3.2","score":89.7,"functional":85.2,"quality":81.3,"passed":true,"tokens":2928,"timeMs":87110,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":81.6,"functional":77.6,"quality":78.3,"passed":true,"tokens":3221,"timeMs":33806,"cost":0.0264},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.2,"functional":77.1,"quality":78.6,"passed":true,"tokens":3197,"timeMs":86979,"cost":0.0012},{"modelName":"Grok 4","score":85.5,"functional":81.3,"quality":80.2,"passed":true,"tokens":2835,"timeMs":70970,"cost":0.035},{"modelName":"Grok 4.1 Fast","score":79.1,"functional":75.1,"quality":77.3,"passed":true,"tokens":3819,"timeMs":77153,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.7,"functional":85,"quality":80,"passed":true,"tokens":4226,"timeMs":77256,"cost":0.0224}]},{"taskId":"code-evolution/security/task-1","category":"code-evolution","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":90.2,"functional":85.7,"quality":81.5,"passed":true,"tokens":4385,"timeMs":69653,"cost":0.0053},{"modelName":"MiniMax M2.1","score":79.5,"functional":75.5,"quality":78.5,"passed":true,"tokens":12960,"timeMs":159615,"cost":0.0116},{"modelName":"GLM-4.7","score":82.1,"functional":78,"quality":79.9,"passed":true,"tokens":3466,"timeMs":63957,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":85.2,"functional":80.9,"quality":76.5,"passed":true,"tokens":2117,"timeMs":28108,"cost":0.0041},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3899,"timeMs":35798,"cost":0.0546},{"modelName":"Claude Sonnet 4.5","score":88.1,"functional":83.7,"quality":80.8,"passed":true,"tokens":3216,"timeMs":42058,"cost":0.0389},{"modelName":"Claude Opus 4.5","score":87.7,"functional":83.4,"quality":80.5,"passed":true,"tokens":4038,"timeMs":55174,"cost":0.0664},{"modelName":"Claude Haiku 4.5","score":82.8,"functional":78.7,"quality":78.6,"passed":true,"tokens":4312,"timeMs":18577,"cost":0.0147},{"modelName":"DeepSeek v3.2","score":88,"functional":83.6,"quality":80.9,"passed":true,"tokens":3465,"timeMs":104899,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":83.5,"functional":79.3,"quality":78.9,"passed":true,"tokens":3232,"timeMs":33351,"cost":0.0283},{"modelName":"GLM 4.7 Flash","score":86.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":80.8,"functional":76.8,"quality":78.5,"passed":true,"tokens":3437,"timeMs":63349,"cost":0.001},{"modelName":"Grok 4","score":83.1,"functional":78.9,"quality":79.4,"passed":true,"tokens":2330,"timeMs":85216,"cost":0.0272},{"modelName":"Grok 4.1 Fast","score":80.4,"functional":76.4,"quality":77.7,"passed":true,"tokens":3623,"timeMs":104272,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3061,"timeMs":64052,"cost":0.0167}]},{"taskId":"code-evolution/security/task-2","category":"code-evolution","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":89.7,"functional":85.2,"quality":81.3,"passed":true,"tokens":5063,"timeMs":75986,"cost":0.006},{"modelName":"MiniMax M2.1","score":79.7,"functional":75.7,"quality":78.6,"passed":true,"tokens":14304,"timeMs":190225,"cost":0.0149},{"modelName":"GLM-4.7","score":84.3,"functional":80.1,"quality":80.6,"passed":true,"tokens":3562,"timeMs":64931,"cost":0.0035},{"modelName":"Gemini 3 Flash","score":85.4,"functional":81.1,"quality":76.6,"passed":true,"tokens":2395,"timeMs":22433,"cost":0.0044},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3899,"timeMs":35798,"cost":0.0546},{"modelName":"Claude Sonnet 4.5","score":89.9,"functional":85.4,"quality":81.3,"passed":true,"tokens":2750,"timeMs":30843,"cost":0.0367},{"modelName":"Claude Opus 4.5","score":85.3,"functional":81,"quality":79.7,"passed":true,"tokens":4318,"timeMs":37057,"cost":0.0635},{"modelName":"Claude Haiku 4.5","score":81.4,"functional":77.3,"quality":78.2,"passed":true,"tokens":4338,"timeMs":20952,"cost":0.0183},{"modelName":"DeepSeek v3.2","score":85.7,"functional":81.4,"quality":80.2,"passed":true,"tokens":3045,"timeMs":94802,"cost":0.0031},{"modelName":"OpenAI GPT-5.2","score":85.9,"functional":81.6,"quality":79.6,"passed":true,"tokens":2586,"timeMs":27666,"cost":0.0252},{"modelName":"GLM 4.7 Flash","score":86.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.7,"functional":77.6,"quality":78.8,"passed":true,"tokens":2447,"timeMs":81413,"cost":0.0014},{"modelName":"Grok 4","score":81.1,"functional":77,"quality":78.8,"passed":true,"tokens":2243,"timeMs":78204,"cost":0.0333},{"modelName":"Grok 4.1 Fast","score":82.6,"functional":78.4,"quality":78.3,"passed":true,"tokens":3317,"timeMs":88429,"cost":0.0014},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3061,"timeMs":64052,"cost":0.0167}]},{"taskId":"code-evolution/security/task-3","category":"code-evolution","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":88,"functional":83.6,"quality":80.9,"passed":true,"tokens":4275,"timeMs":94280,"cost":0.0048},{"modelName":"MiniMax M2.1","score":81,"functional":77,"quality":79,"passed":true,"tokens":16185,"timeMs":184090,"cost":0.0148},{"modelName":"GLM-4.7","score":85.6,"functional":81.4,"quality":81,"passed":true,"tokens":3356,"timeMs":58953,"cost":0.0033},{"modelName":"Gemini 3 Flash","score":84.3,"functional":80.1,"quality":76.3,"passed":true,"tokens":1990,"timeMs":21415,"cost":0.0043},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3899,"timeMs":35798,"cost":0.0546},{"modelName":"Claude Sonnet 4.5","score":90.6,"functional":86,"quality":81.5,"passed":true,"tokens":3328,"timeMs":41381,"cost":0.0317},{"modelName":"Claude Opus 4.5","score":83,"functional":78.9,"quality":79.1,"passed":true,"tokens":3302,"timeMs":52649,"cost":0.0576},{"modelName":"Claude Haiku 4.5","score":81,"functional":76.9,"quality":78.1,"passed":true,"tokens":4631,"timeMs":18758,"cost":0.0155},{"modelName":"DeepSeek v3.2","score":83.3,"functional":79.1,"quality":79.4,"passed":true,"tokens":3347,"timeMs":84566,"cost":0.0032},{"modelName":"OpenAI GPT-5.2","score":88.3,"functional":83.9,"quality":80.3,"passed":true,"tokens":3158,"timeMs":31790,"cost":0.0279},{"modelName":"GLM 4.7 Flash","score":86.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83.6,"functional":79.4,"quality":79.3,"passed":true,"tokens":3446,"timeMs":68832,"cost":0.0013},{"modelName":"Grok 4","score":80.1,"functional":76.1,"quality":78.5,"passed":true,"tokens":3142,"timeMs":56456,"cost":0.0319},{"modelName":"Grok 4.1 Fast","score":85,"functional":80.8,"quality":79.1,"passed":true,"tokens":3260,"timeMs":84618,"cost":0.0013},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3061,"timeMs":64052,"cost":0.0167}]},{"taskId":"code-evolution/security/task-4","category":"code-evolution","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":85.7,"functional":81.4,"quality":80.2,"passed":true,"tokens":5085,"timeMs":86424,"cost":0.0041},{"modelName":"MiniMax M2.1","score":83.2,"functional":79,"quality":79.6,"passed":true,"tokens":14956,"timeMs":170792,"cost":0.0122},{"modelName":"GLM-4.7","score":85.8,"functional":81.5,"quality":81,"passed":true,"tokens":4068,"timeMs":68063,"cost":0.0041},{"modelName":"Gemini 3 Flash","score":82.3,"functional":78.2,"quality":75.7,"passed":true,"tokens":2546,"timeMs":25133,"cost":0.0056},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3899,"timeMs":35798,"cost":0.0546},{"modelName":"Claude Sonnet 4.5","score":90,"functional":85.5,"quality":81.3,"passed":true,"tokens":3359,"timeMs":44840,"cost":0.0452},{"modelName":"Claude Opus 4.5","score":81.5,"functional":77.5,"quality":78.6,"passed":true,"tokens":3568,"timeMs":48691,"cost":0.0677},{"modelName":"Claude Haiku 4.5","score":81.9,"functional":77.8,"quality":78.3,"passed":true,"tokens":4600,"timeMs":20971,"cost":0.0149},{"modelName":"DeepSeek v3.2","score":81.3,"functional":77.2,"quality":78.8,"passed":true,"tokens":3190,"timeMs":98272,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":90.1,"functional":85.5,"quality":80.9,"passed":true,"tokens":2255,"timeMs":31538,"cost":0.0253},{"modelName":"GLM 4.7 Flash","score":86.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86,"functional":81.7,"quality":80.1,"passed":true,"tokens":3101,"timeMs":74622,"cost":0.0011},{"modelName":"Grok 4","score":80.3,"functional":76.2,"quality":78.6,"passed":true,"tokens":2299,"timeMs":94957,"cost":0.0254},{"modelName":"Grok 4.1 Fast","score":87.2,"functional":82.9,"quality":79.7,"passed":true,"tokens":2628,"timeMs":100300,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3061,"timeMs":64052,"cost":0.0167}]},{"taskId":"code-evolution/security/task-5","category":"code-evolution","subcategory":"security","results":[{"modelName":"GLM 4-Plus","score":83.3,"functional":79.1,"quality":79.4,"passed":true,"tokens":5201,"timeMs":124359,"cost":0.0045},{"modelName":"MiniMax M2.1","score":85.7,"functional":81.4,"quality":80.4,"passed":true,"tokens":15023,"timeMs":153593,"cost":0.0134},{"modelName":"GLM-4.7","score":84.8,"functional":80.5,"quality":80.7,"passed":true,"tokens":3744,"timeMs":51835,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":79.9,"functional":75.9,"quality":74.9,"passed":true,"tokens":2495,"timeMs":27984,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3899,"timeMs":35798,"cost":0.0546},{"modelName":"Claude Sonnet 4.5","score":88.4,"functional":84,"quality":80.9,"passed":true,"tokens":3639,"timeMs":36886,"cost":0.0394},{"modelName":"Claude Opus 4.5","score":81.2,"functional":77.1,"quality":78.5,"passed":true,"tokens":3188,"timeMs":37978,"cost":0.0759},{"modelName":"Claude Haiku 4.5","score":83.7,"functional":79.5,"quality":78.9,"passed":true,"tokens":4279,"timeMs":19938,"cost":0.0183},{"modelName":"DeepSeek v3.2","score":80.3,"functional":76.3,"quality":78.5,"passed":true,"tokens":3363,"timeMs":96605,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":90.7,"functional":86.2,"quality":81.1,"passed":true,"tokens":2667,"timeMs":32514,"cost":0.0307},{"modelName":"GLM 4.7 Flash","score":86.7,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.4,"functional":83.9,"quality":80.8,"passed":true,"tokens":2491,"timeMs":80384,"cost":0.001},{"modelName":"Grok 4","score":81.6,"functional":77.5,"quality":79,"passed":true,"tokens":3044,"timeMs":60259,"cost":0.0244},{"modelName":"Grok 4.1 Fast","score":88.5,"functional":84.1,"quality":80.1,"passed":true,"tokens":3766,"timeMs":74857,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.9,"functional":85,"quality":80,"passed":true,"tokens":3061,"timeMs":64052,"cost":0.0167}]},{"taskId":"code-evolution/testing/task-1","category":"code-evolution","subcategory":"testing","results":[{"modelName":"GLM 4-Plus","score":81.3,"functional":77.2,"quality":78.8,"passed":true,"tokens":4704,"timeMs":124586,"cost":0.0055},{"modelName":"MiniMax M2.1","score":87.8,"functional":83.4,"quality":81,"passed":true,"tokens":18374,"timeMs":127156,"cost":0.0113},{"modelName":"GLM-4.7","score":82.8,"functional":78.7,"quality":80.1,"passed":true,"tokens":2947,"timeMs":43371,"cost":0.0038},{"modelName":"Gemini 3 Flash","score":77.6,"functional":73.7,"quality":74.2,"passed":true,"tokens":2188,"timeMs":30947,"cost":0.005},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3829,"timeMs":35733,"cost":0.0536},{"modelName":"Claude Sonnet 4.5","score":86.1,"functional":81.8,"quality":80.2,"passed":true,"tokens":3308,"timeMs":40303,"cost":0.0357},{"modelName":"Claude Opus 4.5","score":82,"functional":77.9,"quality":78.8,"passed":true,"tokens":4030,"timeMs":41543,"cost":0.0557},{"modelName":"Claude Haiku 4.5","score":86.1,"functional":81.8,"quality":79.6,"passed":true,"tokens":3685,"timeMs":16603,"cost":0.0193},{"modelName":"DeepSeek v3.2","score":80.4,"functional":76.4,"quality":78.6,"passed":true,"tokens":3193,"timeMs":99314,"cost":0.0027},{"modelName":"OpenAI GPT-5.2","score":90.2,"functional":85.7,"quality":80.9,"passed":true,"tokens":2801,"timeMs":36242,"cost":0.0292},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.1,"functional":85.6,"quality":81.3,"passed":true,"tokens":2836,"timeMs":69815,"cost":0.001},{"modelName":"Grok 4","score":83.8,"functional":79.6,"quality":79.6,"passed":true,"tokens":2308,"timeMs":65171,"cost":0.0312},{"modelName":"Grok 4.1 Fast","score":88.7,"functional":84.3,"quality":80.2,"passed":true,"tokens":3400,"timeMs":98041,"cost":0.0011},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4758,"timeMs":87371,"cost":0.027}]},{"taskId":"code-evolution/testing/task-2","category":"code-evolution","subcategory":"testing","results":[{"modelName":"GLM 4-Plus","score":80.3,"functional":76.3,"quality":78.5,"passed":true,"tokens":3888,"timeMs":104283,"cost":0.0041},{"modelName":"MiniMax M2.1","score":89.2,"functional":84.7,"quality":81.4,"passed":true,"tokens":12495,"timeMs":141278,"cost":0.0148},{"modelName":"GLM-4.7","score":80.3,"functional":76.3,"quality":79.4,"passed":true,"tokens":2942,"timeMs":63269,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":76,"functional":72.2,"quality":73.8,"passed":true,"tokens":2210,"timeMs":21925,"cost":0.0053},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3829,"timeMs":35733,"cost":0.0536},{"modelName":"Claude Sonnet 4.5","score":83.6,"functional":79.5,"quality":79.4,"passed":true,"tokens":3024,"timeMs":43528,"cost":0.0325},{"modelName":"Claude Opus 4.5","score":83.9,"functional":79.7,"quality":79.3,"passed":true,"tokens":4118,"timeMs":44446,"cost":0.0779},{"modelName":"Claude Haiku 4.5","score":88.5,"functional":84.1,"quality":80.3,"passed":true,"tokens":3758,"timeMs":23301,"cost":0.0165},{"modelName":"DeepSeek v3.2","score":81.8,"functional":77.7,"quality":79,"passed":true,"tokens":2747,"timeMs":99580,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":88.6,"functional":84.2,"quality":80.4,"passed":true,"tokens":3147,"timeMs":21566,"cost":0.0295},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.8,"functional":86.3,"quality":81.5,"passed":true,"tokens":3014,"timeMs":83875,"cost":0.0011},{"modelName":"Grok 4","score":86.2,"functional":81.9,"quality":80.4,"passed":true,"tokens":3004,"timeMs":91834,"cost":0.0321},{"modelName":"Grok 4.1 Fast","score":87.7,"functional":83.3,"quality":79.9,"passed":true,"tokens":3614,"timeMs":81721,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4758,"timeMs":87371,"cost":0.027}]},{"taskId":"code-evolution/testing/task-3","category":"code-evolution","subcategory":"testing","results":[{"modelName":"GLM 4-Plus","score":80.5,"functional":76.4,"quality":78.6,"passed":true,"tokens":3905,"timeMs":107680,"cost":0.0058},{"modelName":"MiniMax M2.1","score":89.3,"functional":84.9,"quality":81.5,"passed":true,"tokens":12654,"timeMs":136683,"cost":0.0137},{"modelName":"GLM-4.7","score":78,"functional":74.1,"quality":78.7,"passed":true,"tokens":3929,"timeMs":62074,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":75.4,"functional":71.7,"quality":73.6,"passed":true,"tokens":2365,"timeMs":35454,"cost":0.0049},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3829,"timeMs":35733,"cost":0.0536},{"modelName":"Claude Sonnet 4.5","score":81.7,"functional":77.6,"quality":78.8,"passed":true,"tokens":3330,"timeMs":40934,"cost":0.0387},{"modelName":"Claude Opus 4.5","score":86.3,"functional":82,"quality":80.1,"passed":true,"tokens":3827,"timeMs":44088,"cost":0.0674},{"modelName":"Claude Haiku 4.5","score":90.3,"functional":85.8,"quality":80.9,"passed":true,"tokens":5258,"timeMs":23706,"cost":0.0188},{"modelName":"DeepSeek v3.2","score":84,"functional":79.8,"quality":79.6,"passed":true,"tokens":3540,"timeMs":97949,"cost":0.003},{"modelName":"OpenAI GPT-5.2","score":86.3,"functional":82,"quality":79.7,"passed":true,"tokens":2570,"timeMs":34319,"cost":0.0263},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":90.3,"functional":85.8,"quality":81.3,"passed":true,"tokens":3142,"timeMs":74126,"cost":0.0013},{"modelName":"Grok 4","score":88.4,"functional":84,"quality":81,"passed":true,"tokens":2168,"timeMs":69377,"cost":0.0316},{"modelName":"Grok 4.1 Fast","score":85.7,"functional":81.4,"quality":79.3,"passed":true,"tokens":3614,"timeMs":112730,"cost":0.0012},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4758,"timeMs":87371,"cost":0.027}]},{"taskId":"code-evolution/testing/task-4","category":"code-evolution","subcategory":"testing","results":[{"modelName":"GLM 4-Plus","score":81.8,"functional":77.7,"quality":79,"passed":true,"tokens":4500,"timeMs":68153,"cost":0.0044},{"modelName":"MiniMax M2.1","score":88.3,"functional":83.9,"quality":81.2,"passed":true,"tokens":18413,"timeMs":182098,"cost":0.0121},{"modelName":"GLM-4.7","score":76.4,"functional":72.6,"quality":78.2,"passed":true,"tokens":2844,"timeMs":54770,"cost":0.0044},{"modelName":"Gemini 3 Flash","score":76.2,"functional":72.3,"quality":73.8,"passed":true,"tokens":2527,"timeMs":34061,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3829,"timeMs":35733,"cost":0.0536},{"modelName":"Claude Sonnet 4.5","score":80.6,"functional":76.6,"quality":78.5,"passed":true,"tokens":2722,"timeMs":50047,"cost":0.0335},{"modelName":"Claude Opus 4.5","score":88.7,"functional":84.3,"quality":80.8,"passed":true,"tokens":3615,"timeMs":37209,"cost":0.0611},{"modelName":"Claude Haiku 4.5","score":91,"functional":86.4,"quality":81.1,"passed":true,"tokens":5203,"timeMs":22652,"cost":0.0139},{"modelName":"DeepSeek v3.2","score":86.4,"functional":82.1,"quality":80.4,"passed":true,"tokens":2680,"timeMs":93067,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":83.8,"functional":79.6,"quality":79,"passed":true,"tokens":2530,"timeMs":20393,"cost":0.0291},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":88.6,"functional":84.2,"quality":80.9,"passed":true,"tokens":2890,"timeMs":76154,"cost":0.0014},{"modelName":"Grok 4","score":89.7,"functional":85.3,"quality":81.4,"passed":true,"tokens":2257,"timeMs":87791,"cost":0.0295},{"modelName":"Grok 4.1 Fast","score":83.2,"functional":79.1,"quality":78.5,"passed":true,"tokens":3080,"timeMs":85635,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4758,"timeMs":87371,"cost":0.027}]},{"taskId":"code-evolution/testing/task-5","category":"code-evolution","subcategory":"testing","results":[{"modelName":"GLM 4-Plus","score":84,"functional":79.8,"quality":79.6,"passed":true,"tokens":4104,"timeMs":74748,"cost":0.0061},{"modelName":"MiniMax M2.1","score":86.3,"functional":82,"quality":80.6,"passed":true,"tokens":16574,"timeMs":176142,"cost":0.0154},{"modelName":"GLM-4.7","score":75.9,"functional":72.1,"quality":78.1,"passed":true,"tokens":2783,"timeMs":53945,"cost":0.0036},{"modelName":"Gemini 3 Flash","score":77.9,"functional":74,"quality":74.4,"passed":true,"tokens":2179,"timeMs":36091,"cost":0.0039},{"modelName":"Gemini 3 Pro Preview","score":89.3,"functional":85,"quality":80,"passed":true,"tokens":3829,"timeMs":35733,"cost":0.0536},{"modelName":"Claude Sonnet 4.5","score":80.8,"functional":76.8,"quality":78.6,"passed":true,"tokens":2749,"timeMs":46664,"cost":0.0354},{"modelName":"Claude Opus 4.5","score":90.5,"functional":85.9,"quality":81.3,"passed":true,"tokens":3225,"timeMs":43384,"cost":0.0746},{"modelName":"Claude Haiku 4.5","score":90.4,"functional":85.9,"quality":80.9,"passed":true,"tokens":5098,"timeMs":17433,"cost":0.0154},{"modelName":"DeepSeek v3.2","score":88.6,"functional":84.2,"quality":81,"passed":true,"tokens":3281,"timeMs":69346,"cost":0.0033},{"modelName":"OpenAI GPT-5.2","score":81.9,"functional":77.8,"quality":78.4,"passed":true,"tokens":2668,"timeMs":30193,"cost":0.028},{"modelName":"GLM 4.7 Flash","score":84.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":86.3,"functional":82,"quality":80.2,"passed":true,"tokens":2411,"timeMs":74818,"cost":0.0012},{"modelName":"Grok 4","score":89.9,"functional":85.4,"quality":81.5,"passed":true,"tokens":2410,"timeMs":82336,"cost":0.0253},{"modelName":"Grok 4.1 Fast","score":80.9,"functional":76.9,"quality":77.8,"passed":true,"tokens":3786,"timeMs":73514,"cost":0.0016},{"modelName":"Qwen3 Max","score":88.5,"functional":85,"quality":80,"passed":true,"tokens":4758,"timeMs":87371,"cost":0.027}]},{"taskId":"code-evolution/advanced/task-1","category":"code-evolution","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":86.4,"functional":82.1,"quality":80.4,"passed":true,"tokens":3787,"timeMs":83974,"cost":0.0059},{"modelName":"MiniMax M2.1","score":83.9,"functional":79.7,"quality":79.8,"passed":true,"tokens":12520,"timeMs":124129,"cost":0.0116},{"modelName":"GLM-4.7","score":76.6,"functional":72.8,"quality":78.3,"passed":true,"tokens":2809,"timeMs":66827,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":80.3,"functional":76.3,"quality":75.1,"passed":true,"tokens":2114,"timeMs":28156,"cost":0.0047},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4225,"timeMs":41258,"cost":0.0592},{"modelName":"Claude Sonnet 4.5","score":82.2,"functional":78.1,"quality":79,"passed":true,"tokens":3378,"timeMs":44887,"cost":0.0457},{"modelName":"Claude Opus 4.5","score":91.1,"functional":86.6,"quality":81.5,"passed":true,"tokens":4109,"timeMs":42419,"cost":0.0733},{"modelName":"Claude Haiku 4.5","score":88.8,"functional":84.4,"quality":80.4,"passed":true,"tokens":3812,"timeMs":23509,"cost":0.018},{"modelName":"DeepSeek v3.2","score":89.9,"functional":85.4,"quality":81.4,"passed":true,"tokens":3466,"timeMs":93860,"cost":0.0028},{"modelName":"OpenAI GPT-5.2","score":80.8,"functional":76.8,"quality":78.1,"passed":true,"tokens":2241,"timeMs":22808,"cost":0.0249},{"modelName":"GLM 4.7 Flash","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":83.9,"functional":79.7,"quality":79.4,"passed":true,"tokens":2395,"timeMs":54997,"cost":0.0012},{"modelName":"Grok 4","score":88.9,"functional":84.4,"quality":81.2,"passed":true,"tokens":2907,"timeMs":72282,"cost":0.0304},{"modelName":"Grok 4.1 Fast","score":79.3,"functional":75.4,"quality":77.4,"passed":true,"tokens":2887,"timeMs":62334,"cost":0.0015},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":5151,"timeMs":114550,"cost":0.0302}]},{"taskId":"code-evolution/advanced/task-2","category":"code-evolution","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":88.6,"functional":84.2,"quality":81,"passed":true,"tokens":5223,"timeMs":117811,"cost":0.0052},{"modelName":"MiniMax M2.1","score":81.6,"functional":77.5,"quality":79.1,"passed":true,"tokens":13129,"timeMs":131346,"cost":0.0159},{"modelName":"GLM-4.7","score":78.4,"functional":74.4,"quality":78.8,"passed":true,"tokens":3911,"timeMs":46421,"cost":0.0048},{"modelName":"Gemini 3 Flash","score":82.7,"functional":78.6,"quality":75.8,"passed":true,"tokens":1737,"timeMs":31806,"cost":0.0042},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4225,"timeMs":41258,"cost":0.0592},{"modelName":"Claude Sonnet 4.5","score":84.3,"functional":80.1,"quality":79.6,"passed":true,"tokens":3980,"timeMs":41749,"cost":0.0367},{"modelName":"Claude Opus 4.5","score":90.6,"functional":86.1,"quality":81.3,"passed":true,"tokens":4318,"timeMs":43112,"cost":0.0562},{"modelName":"Claude Haiku 4.5","score":86.5,"functional":82.2,"quality":79.7,"passed":true,"tokens":4645,"timeMs":17536,"cost":0.017},{"modelName":"DeepSeek v3.2","score":90.1,"functional":85.6,"quality":81.5,"passed":true,"tokens":3340,"timeMs":64954,"cost":0.0029},{"modelName":"OpenAI GPT-5.2","score":81,"functional":77,"quality":78.1,"passed":true,"tokens":2742,"timeMs":25463,"cost":0.0273},{"modelName":"GLM 4.7 Flash","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":81.9,"functional":77.8,"quality":78.8,"passed":true,"tokens":2465,"timeMs":65500,"cost":0.0011},{"modelName":"Grok 4","score":86.9,"functional":82.6,"quality":80.6,"passed":true,"tokens":2867,"timeMs":90368,"cost":0.0261},{"modelName":"Grok 4.1 Fast","score":78.8,"functional":74.9,"quality":77.2,"passed":true,"tokens":3048,"timeMs":91787,"cost":0.0013},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":5151,"timeMs":114550,"cost":0.0302}]},{"taskId":"code-evolution/advanced/task-3","category":"code-evolution","subcategory":"advanced","results":[{"modelName":"GLM 4-Plus","score":89.9,"functional":85.5,"quality":81.4,"passed":true,"tokens":3847,"timeMs":76145,"cost":0.0051},{"modelName":"MiniMax M2.1","score":79.9,"functional":75.9,"quality":78.7,"passed":true,"tokens":12493,"timeMs":155834,"cost":0.0131},{"modelName":"GLM-4.7","score":80.7,"functional":76.7,"quality":79.5,"passed":true,"tokens":2779,"timeMs":50485,"cost":0.0043},{"modelName":"Gemini 3 Flash","score":84.6,"functional":80.3,"quality":76.3,"passed":true,"tokens":2084,"timeMs":24919,"cost":0.0054},{"modelName":"Gemini 3 Pro Preview","score":89.6,"functional":85,"quality":80,"passed":true,"tokens":4225,"timeMs":41258,"cost":0.0592},{"modelName":"Claude Sonnet 4.5","score":86.8,"functional":82.5,"quality":80.4,"passed":true,"tokens":3943,"timeMs":46230,"cost":0.0397},{"modelName":"Claude Opus 4.5","score":89,"functional":84.5,"quality":80.9,"passed":true,"tokens":4140,"timeMs":56961,"cost":0.0817},{"modelName":"Claude Haiku 4.5","score":84.1,"functional":79.8,"quality":79,"passed":true,"tokens":3884,"timeMs":22619,"cost":0.0194},{"modelName":"DeepSeek v3.2","score":89.1,"functional":84.6,"quality":81.2,"passed":true,"tokens":3164,"timeMs":116364,"cost":0.0023},{"modelName":"OpenAI GPT-5.2","score":82.3,"functional":78.2,"quality":78.5,"passed":true,"tokens":2532,"timeMs":19812,"cost":0.0306},{"modelName":"GLM 4.7 Flash","score":88.6,"functional":85,"quality":80,"passed":true,"tokens":3600,"timeMs":45000,"cost":0.006},{"modelName":"Grok 4 Fast","score":80.9,"functional":76.8,"quality":78.5,"passed":true,"tokens":2554,"timeMs":69754,"cost":0.0013},{"modelName":"Grok 4","score":84.4,"functional":80.2,"quality":79.8,"passed":true,"tokens":2801,"timeMs":95499,"cost":0.0302},{"modelName":"Grok 4.1 Fast","score":79.5,"functional":75.5,"quality":77.4,"passed":true,"tokens":3003,"timeMs":77613,"cost":0.0015},{"modelName":"Qwen3 Max","score":89,"functional":85,"quality":80,"passed":true,"tokens":5151,"timeMs":114550,"cost":0.0302}]}],"totalTasks":180,"models":["GLM 4-Plus","MiniMax M2.1","GLM-4.7","Gemini 3 Flash","Gemini 3 Pro Preview","Claude Sonnet 4.5","Claude Opus 4.5","Claude Haiku 4.5","DeepSeek v3.2","OpenAI GPT-5.2","GLM 4.7 Flash","Grok 4 Fast","Grok 4","Grok 4.1 Fast","Qwen3 Max"],"categories":["saas-core","glue-code","ai-integration","frontend","api-integrations","code-evolution"]}