codex-multi-auth 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (327) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +162 -0
  3. package/assets/opencode-logo-ornate-dark.svg +18 -0
  4. package/assets/readme-hero.svg +31 -0
  5. package/config/README.md +87 -0
  6. package/config/minimal-opencode.json +13 -0
  7. package/config/opencode-legacy.json +571 -0
  8. package/config/opencode-modern.json +239 -0
  9. package/dist/index.d.ts +45 -0
  10. package/dist/index.d.ts.map +1 -0
  11. package/dist/index.js +3160 -0
  12. package/dist/index.js.map +1 -0
  13. package/dist/lib/accounts/rate-limits.d.ts +22 -0
  14. package/dist/lib/accounts/rate-limits.d.ts.map +1 -0
  15. package/dist/lib/accounts/rate-limits.js +63 -0
  16. package/dist/lib/accounts/rate-limits.js.map +1 -0
  17. package/dist/lib/accounts.d.ts +95 -0
  18. package/dist/lib/accounts.d.ts.map +1 -0
  19. package/dist/lib/accounts.js +668 -0
  20. package/dist/lib/accounts.js.map +1 -0
  21. package/dist/lib/audit.d.ts +45 -0
  22. package/dist/lib/audit.d.ts.map +1 -0
  23. package/dist/lib/audit.js +131 -0
  24. package/dist/lib/audit.js.map +1 -0
  25. package/dist/lib/auth/auth.d.ts +56 -0
  26. package/dist/lib/auth/auth.d.ts.map +1 -0
  27. package/dist/lib/auth/auth.js +214 -0
  28. package/dist/lib/auth/auth.js.map +1 -0
  29. package/dist/lib/auth/browser.d.ts +34 -0
  30. package/dist/lib/auth/browser.d.ts.map +1 -0
  31. package/dist/lib/auth/browser.js +185 -0
  32. package/dist/lib/auth/browser.js.map +1 -0
  33. package/dist/lib/auth/server.d.ts +24 -0
  34. package/dist/lib/auth/server.d.ts.map +1 -0
  35. package/dist/lib/auth/server.js +116 -0
  36. package/dist/lib/auth/server.js.map +1 -0
  37. package/dist/lib/auth/token-utils.d.ts +59 -0
  38. package/dist/lib/auth/token-utils.d.ts.map +1 -0
  39. package/dist/lib/auth/token-utils.js +331 -0
  40. package/dist/lib/auth/token-utils.js.map +1 -0
  41. package/dist/lib/auth-rate-limit.d.ts +20 -0
  42. package/dist/lib/auth-rate-limit.d.ts.map +1 -0
  43. package/dist/lib/auth-rate-limit.js +91 -0
  44. package/dist/lib/auth-rate-limit.js.map +1 -0
  45. package/dist/lib/auto-update-checker.d.ts +10 -0
  46. package/dist/lib/auto-update-checker.d.ts.map +1 -0
  47. package/dist/lib/auto-update-checker.js +216 -0
  48. package/dist/lib/auto-update-checker.js.map +1 -0
  49. package/dist/lib/capability-policy.d.ts +18 -0
  50. package/dist/lib/capability-policy.d.ts.map +1 -0
  51. package/dist/lib/capability-policy.js +150 -0
  52. package/dist/lib/capability-policy.js.map +1 -0
  53. package/dist/lib/circuit-breaker.d.ts +34 -0
  54. package/dist/lib/circuit-breaker.d.ts.map +1 -0
  55. package/dist/lib/circuit-breaker.js +124 -0
  56. package/dist/lib/circuit-breaker.js.map +1 -0
  57. package/dist/lib/cli.d.ts +64 -0
  58. package/dist/lib/cli.d.ts.map +1 -0
  59. package/dist/lib/cli.js +274 -0
  60. package/dist/lib/cli.js.map +1 -0
  61. package/dist/lib/codex-cli/observability.d.ts +22 -0
  62. package/dist/lib/codex-cli/observability.d.ts.map +1 -0
  63. package/dist/lib/codex-cli/observability.js +36 -0
  64. package/dist/lib/codex-cli/observability.js.map +1 -0
  65. package/dist/lib/codex-cli/state.d.ts +86 -0
  66. package/dist/lib/codex-cli/state.d.ts.map +1 -0
  67. package/dist/lib/codex-cli/state.js +470 -0
  68. package/dist/lib/codex-cli/state.js.map +1 -0
  69. package/dist/lib/codex-cli/sync.d.ts +27 -0
  70. package/dist/lib/codex-cli/sync.d.ts.map +1 -0
  71. package/dist/lib/codex-cli/sync.js +325 -0
  72. package/dist/lib/codex-cli/sync.js.map +1 -0
  73. package/dist/lib/codex-cli/writer.d.ts +12 -0
  74. package/dist/lib/codex-cli/writer.d.ts.map +1 -0
  75. package/dist/lib/codex-cli/writer.js +388 -0
  76. package/dist/lib/codex-cli/writer.js.map +1 -0
  77. package/dist/lib/codex-manager.d.ts +2 -0
  78. package/dist/lib/codex-manager.d.ts.map +1 -0
  79. package/dist/lib/codex-manager.js +4841 -0
  80. package/dist/lib/codex-manager.js.map +1 -0
  81. package/dist/lib/config.d.ts +269 -0
  82. package/dist/lib/config.d.ts.map +1 -0
  83. package/dist/lib/config.js +789 -0
  84. package/dist/lib/config.js.map +1 -0
  85. package/dist/lib/constants.d.ts +78 -0
  86. package/dist/lib/constants.d.ts.map +1 -0
  87. package/dist/lib/constants.js +78 -0
  88. package/dist/lib/constants.js.map +1 -0
  89. package/dist/lib/context-overflow.d.ts +27 -0
  90. package/dist/lib/context-overflow.d.ts.map +1 -0
  91. package/dist/lib/context-overflow.js +124 -0
  92. package/dist/lib/context-overflow.js.map +1 -0
  93. package/dist/lib/dashboard-settings.d.ts +90 -0
  94. package/dist/lib/dashboard-settings.d.ts.map +1 -0
  95. package/dist/lib/dashboard-settings.js +327 -0
  96. package/dist/lib/dashboard-settings.js.map +1 -0
  97. package/dist/lib/entitlement-cache.d.ts +41 -0
  98. package/dist/lib/entitlement-cache.d.ts.map +1 -0
  99. package/dist/lib/entitlement-cache.js +137 -0
  100. package/dist/lib/entitlement-cache.js.map +1 -0
  101. package/dist/lib/errors.d.ts +113 -0
  102. package/dist/lib/errors.d.ts.map +1 -0
  103. package/dist/lib/errors.js +103 -0
  104. package/dist/lib/errors.js.map +1 -0
  105. package/dist/lib/forecast.d.ts +42 -0
  106. package/dist/lib/forecast.d.ts.map +1 -0
  107. package/dist/lib/forecast.js +256 -0
  108. package/dist/lib/forecast.js.map +1 -0
  109. package/dist/lib/health.d.ts +33 -0
  110. package/dist/lib/health.d.ts.map +1 -0
  111. package/dist/lib/health.js +70 -0
  112. package/dist/lib/health.js.map +1 -0
  113. package/dist/lib/index.d.ts +32 -0
  114. package/dist/lib/index.d.ts.map +1 -0
  115. package/dist/lib/index.js +32 -0
  116. package/dist/lib/index.js.map +1 -0
  117. package/dist/lib/live-account-sync.d.ts +39 -0
  118. package/dist/lib/live-account-sync.d.ts.map +1 -0
  119. package/dist/lib/live-account-sync.js +196 -0
  120. package/dist/lib/live-account-sync.js.map +1 -0
  121. package/dist/lib/logger.d.ts +40 -0
  122. package/dist/lib/logger.d.ts.map +1 -0
  123. package/dist/lib/logger.js +364 -0
  124. package/dist/lib/logger.js.map +1 -0
  125. package/dist/lib/oauth-success.html +338 -0
  126. package/dist/lib/parallel-probe.d.ts +28 -0
  127. package/dist/lib/parallel-probe.d.ts.map +1 -0
  128. package/dist/lib/parallel-probe.js +97 -0
  129. package/dist/lib/parallel-probe.js.map +1 -0
  130. package/dist/lib/preemptive-quota-scheduler.d.ts +53 -0
  131. package/dist/lib/preemptive-quota-scheduler.d.ts.map +1 -0
  132. package/dist/lib/preemptive-quota-scheduler.js +220 -0
  133. package/dist/lib/preemptive-quota-scheduler.js.map +1 -0
  134. package/dist/lib/proactive-refresh.d.ts +66 -0
  135. package/dist/lib/proactive-refresh.d.ts.map +1 -0
  136. package/dist/lib/proactive-refresh.js +143 -0
  137. package/dist/lib/proactive-refresh.js.map +1 -0
  138. package/dist/lib/prompts/codex-opencode-bridge.d.ts +19 -0
  139. package/dist/lib/prompts/codex-opencode-bridge.d.ts.map +1 -0
  140. package/dist/lib/prompts/codex-opencode-bridge.js +169 -0
  141. package/dist/lib/prompts/codex-opencode-bridge.js.map +1 -0
  142. package/dist/lib/prompts/codex.d.ts +41 -0
  143. package/dist/lib/prompts/codex.d.ts.map +1 -0
  144. package/dist/lib/prompts/codex.js +383 -0
  145. package/dist/lib/prompts/codex.js.map +1 -0
  146. package/dist/lib/prompts/opencode-codex.d.ts +25 -0
  147. package/dist/lib/prompts/opencode-codex.d.ts.map +1 -0
  148. package/dist/lib/prompts/opencode-codex.js +270 -0
  149. package/dist/lib/prompts/opencode-codex.js.map +1 -0
  150. package/dist/lib/quota-cache.d.ts +68 -0
  151. package/dist/lib/quota-cache.d.ts.map +1 -0
  152. package/dist/lib/quota-cache.js +224 -0
  153. package/dist/lib/quota-cache.js.map +1 -0
  154. package/dist/lib/quota-probe.d.ts +49 -0
  155. package/dist/lib/quota-probe.d.ts.map +1 -0
  156. package/dist/lib/quota-probe.js +368 -0
  157. package/dist/lib/quota-probe.js.map +1 -0
  158. package/dist/lib/recovery/constants.d.ts +12 -0
  159. package/dist/lib/recovery/constants.d.ts.map +1 -0
  160. package/dist/lib/recovery/constants.js +31 -0
  161. package/dist/lib/recovery/constants.js.map +1 -0
  162. package/dist/lib/recovery/index.d.ts +12 -0
  163. package/dist/lib/recovery/index.d.ts.map +1 -0
  164. package/dist/lib/recovery/index.js +12 -0
  165. package/dist/lib/recovery/index.js.map +1 -0
  166. package/dist/lib/recovery/storage.d.ts +24 -0
  167. package/dist/lib/recovery/storage.d.ts.map +1 -0
  168. package/dist/lib/recovery/storage.js +362 -0
  169. package/dist/lib/recovery/storage.js.map +1 -0
  170. package/dist/lib/recovery/types.d.ts +116 -0
  171. package/dist/lib/recovery/types.d.ts.map +1 -0
  172. package/dist/lib/recovery/types.js +7 -0
  173. package/dist/lib/recovery/types.js.map +1 -0
  174. package/dist/lib/recovery.d.ts +31 -0
  175. package/dist/lib/recovery.d.ts.map +1 -0
  176. package/dist/lib/recovery.js +313 -0
  177. package/dist/lib/recovery.js.map +1 -0
  178. package/dist/lib/refresh-guardian.d.ts +31 -0
  179. package/dist/lib/refresh-guardian.d.ts.map +1 -0
  180. package/dist/lib/refresh-guardian.js +151 -0
  181. package/dist/lib/refresh-guardian.js.map +1 -0
  182. package/dist/lib/refresh-lease.d.ts +37 -0
  183. package/dist/lib/refresh-lease.d.ts.map +1 -0
  184. package/dist/lib/refresh-lease.js +335 -0
  185. package/dist/lib/refresh-lease.js.map +1 -0
  186. package/dist/lib/refresh-queue.d.ts +117 -0
  187. package/dist/lib/refresh-queue.d.ts.map +1 -0
  188. package/dist/lib/refresh-queue.js +297 -0
  189. package/dist/lib/refresh-queue.js.map +1 -0
  190. package/dist/lib/request/failure-policy.d.ts +42 -0
  191. package/dist/lib/request/failure-policy.d.ts.map +1 -0
  192. package/dist/lib/request/failure-policy.js +133 -0
  193. package/dist/lib/request/failure-policy.js.map +1 -0
  194. package/dist/lib/request/fetch-helpers.d.ts +152 -0
  195. package/dist/lib/request/fetch-helpers.d.ts.map +1 -0
  196. package/dist/lib/request/fetch-helpers.js +704 -0
  197. package/dist/lib/request/fetch-helpers.js.map +1 -0
  198. package/dist/lib/request/helpers/input-utils.d.ts +7 -0
  199. package/dist/lib/request/helpers/input-utils.d.ts.map +1 -0
  200. package/dist/lib/request/helpers/input-utils.js +214 -0
  201. package/dist/lib/request/helpers/input-utils.js.map +1 -0
  202. package/dist/lib/request/helpers/model-map.d.ts +28 -0
  203. package/dist/lib/request/helpers/model-map.d.ts.map +1 -0
  204. package/dist/lib/request/helpers/model-map.js +133 -0
  205. package/dist/lib/request/helpers/model-map.js.map +1 -0
  206. package/dist/lib/request/helpers/tool-utils.d.ts +29 -0
  207. package/dist/lib/request/helpers/tool-utils.d.ts.map +1 -0
  208. package/dist/lib/request/helpers/tool-utils.js +117 -0
  209. package/dist/lib/request/helpers/tool-utils.js.map +1 -0
  210. package/dist/lib/request/rate-limit-backoff.d.ts +17 -0
  211. package/dist/lib/request/rate-limit-backoff.d.ts.map +1 -0
  212. package/dist/lib/request/rate-limit-backoff.js +83 -0
  213. package/dist/lib/request/rate-limit-backoff.js.map +1 -0
  214. package/dist/lib/request/request-transformer.d.ts +107 -0
  215. package/dist/lib/request/request-transformer.d.ts.map +1 -0
  216. package/dist/lib/request/request-transformer.js +814 -0
  217. package/dist/lib/request/request-transformer.js.map +1 -0
  218. package/dist/lib/request/response-handler.d.ts +23 -0
  219. package/dist/lib/request/response-handler.d.ts.map +1 -0
  220. package/dist/lib/request/response-handler.js +155 -0
  221. package/dist/lib/request/response-handler.js.map +1 -0
  222. package/dist/lib/request/stream-failover.d.ts +21 -0
  223. package/dist/lib/request/stream-failover.d.ts.map +1 -0
  224. package/dist/lib/request/stream-failover.js +204 -0
  225. package/dist/lib/request/stream-failover.js.map +1 -0
  226. package/dist/lib/rotation.d.ts +146 -0
  227. package/dist/lib/rotation.d.ts.map +1 -0
  228. package/dist/lib/rotation.js +321 -0
  229. package/dist/lib/rotation.js.map +1 -0
  230. package/dist/lib/runtime-paths.d.ts +58 -0
  231. package/dist/lib/runtime-paths.d.ts.map +1 -0
  232. package/dist/lib/runtime-paths.js +164 -0
  233. package/dist/lib/runtime-paths.js.map +1 -0
  234. package/dist/lib/schemas.d.ts +435 -0
  235. package/dist/lib/schemas.d.ts.map +1 -0
  236. package/dist/lib/schemas.js +268 -0
  237. package/dist/lib/schemas.js.map +1 -0
  238. package/dist/lib/session-affinity.d.ts +23 -0
  239. package/dist/lib/session-affinity.d.ts.map +1 -0
  240. package/dist/lib/session-affinity.js +127 -0
  241. package/dist/lib/session-affinity.js.map +1 -0
  242. package/dist/lib/shutdown.d.ts +7 -0
  243. package/dist/lib/shutdown.d.ts.map +1 -0
  244. package/dist/lib/shutdown.js +43 -0
  245. package/dist/lib/shutdown.js.map +1 -0
  246. package/dist/lib/storage/migrations.d.ts +59 -0
  247. package/dist/lib/storage/migrations.d.ts.map +1 -0
  248. package/dist/lib/storage/migrations.js +41 -0
  249. package/dist/lib/storage/migrations.js.map +1 -0
  250. package/dist/lib/storage/paths.d.ts +51 -0
  251. package/dist/lib/storage/paths.d.ts.map +1 -0
  252. package/dist/lib/storage/paths.js +152 -0
  253. package/dist/lib/storage/paths.js.map +1 -0
  254. package/dist/lib/storage.d.ts +106 -0
  255. package/dist/lib/storage.d.ts.map +1 -0
  256. package/dist/lib/storage.js +896 -0
  257. package/dist/lib/storage.js.map +1 -0
  258. package/dist/lib/table-formatter.d.ts +32 -0
  259. package/dist/lib/table-formatter.d.ts.map +1 -0
  260. package/dist/lib/table-formatter.js +44 -0
  261. package/dist/lib/table-formatter.js.map +1 -0
  262. package/dist/lib/tools/hashline-tools.d.ts +51 -0
  263. package/dist/lib/tools/hashline-tools.d.ts.map +1 -0
  264. package/dist/lib/tools/hashline-tools.js +456 -0
  265. package/dist/lib/tools/hashline-tools.js.map +1 -0
  266. package/dist/lib/types.d.ts +130 -0
  267. package/dist/lib/types.d.ts.map +1 -0
  268. package/dist/lib/types.js +2 -0
  269. package/dist/lib/types.js.map +1 -0
  270. package/dist/lib/ui/ansi.d.ts +40 -0
  271. package/dist/lib/ui/ansi.d.ts.map +1 -0
  272. package/dist/lib/ui/ansi.js +68 -0
  273. package/dist/lib/ui/ansi.js.map +1 -0
  274. package/dist/lib/ui/auth-menu.d.ts +76 -0
  275. package/dist/lib/ui/auth-menu.d.ts.map +1 -0
  276. package/dist/lib/ui/auth-menu.js +590 -0
  277. package/dist/lib/ui/auth-menu.js.map +1 -0
  278. package/dist/lib/ui/confirm.d.ts +11 -0
  279. package/dist/lib/ui/confirm.d.ts.map +1 -0
  280. package/dist/lib/ui/confirm.js +29 -0
  281. package/dist/lib/ui/confirm.js.map +1 -0
  282. package/dist/lib/ui/copy.d.ts +123 -0
  283. package/dist/lib/ui/copy.d.ts.map +1 -0
  284. package/dist/lib/ui/copy.js +127 -0
  285. package/dist/lib/ui/copy.js.map +1 -0
  286. package/dist/lib/ui/format.d.ts +62 -0
  287. package/dist/lib/ui/format.d.ts.map +1 -0
  288. package/dist/lib/ui/format.js +205 -0
  289. package/dist/lib/ui/format.js.map +1 -0
  290. package/dist/lib/ui/runtime.d.ts +43 -0
  291. package/dist/lib/ui/runtime.d.ts.map +1 -0
  292. package/dist/lib/ui/runtime.js +69 -0
  293. package/dist/lib/ui/runtime.js.map +1 -0
  294. package/dist/lib/ui/select.d.ts +60 -0
  295. package/dist/lib/ui/select.d.ts.map +1 -0
  296. package/dist/lib/ui/select.js +467 -0
  297. package/dist/lib/ui/select.js.map +1 -0
  298. package/dist/lib/ui/theme.d.ts +56 -0
  299. package/dist/lib/ui/theme.d.ts.map +1 -0
  300. package/dist/lib/ui/theme.js +186 -0
  301. package/dist/lib/ui/theme.js.map +1 -0
  302. package/dist/lib/unified-settings.d.ts +71 -0
  303. package/dist/lib/unified-settings.d.ts.map +1 -0
  304. package/dist/lib/unified-settings.js +299 -0
  305. package/dist/lib/unified-settings.js.map +1 -0
  306. package/dist/lib/utils.d.ts +29 -0
  307. package/dist/lib/utils.d.ts.map +1 -0
  308. package/dist/lib/utils.js +54 -0
  309. package/dist/lib/utils.js.map +1 -0
  310. package/package.json +115 -0
  311. package/scripts/audit-dev-allowlist.js +128 -0
  312. package/scripts/bench-format/hashline-v2.mjs +642 -0
  313. package/scripts/bench-format/models.mjs +105 -0
  314. package/scripts/bench-format/opencode.mjs +205 -0
  315. package/scripts/bench-format/render.mjs +496 -0
  316. package/scripts/bench-format/stats.mjs +54 -0
  317. package/scripts/bench-format/tasks.mjs +151 -0
  318. package/scripts/benchmark-edit-formats.mjs +1161 -0
  319. package/scripts/benchmark-render-dashboard.mjs +49 -0
  320. package/scripts/codex-multi-auth.js +6 -0
  321. package/scripts/codex-routing.js +34 -0
  322. package/scripts/codex.js +122 -0
  323. package/scripts/copy-oauth-success.js +37 -0
  324. package/scripts/install-opencode-codex-auth.js +193 -0
  325. package/scripts/test-all-models.sh +7 -0
  326. package/scripts/test-model-matrix.js +424 -0
  327. package/scripts/validate-model-map.sh +7 -0
@@ -0,0 +1,496 @@
1
+ import { pctDelta, round1 } from "./stats.mjs";
2
+
3
+ function formatNumber(value, digits = 1) {
4
+ if (typeof value !== "number" || !Number.isFinite(value)) {
5
+ return "-";
6
+ }
7
+ return value.toFixed(digits);
8
+ }
9
+
10
+ function formatMs(value) {
11
+ if (typeof value !== "number" || !Number.isFinite(value)) {
12
+ return "-";
13
+ }
14
+ if (value >= 1000) {
15
+ return `${(value / 1000).toFixed(1)}s`;
16
+ }
17
+ return `${Math.round(value)}ms`;
18
+ }
19
+
20
+ function formatPct(value, digits = 1) {
21
+ if (typeof value !== "number" || !Number.isFinite(value)) {
22
+ return "-";
23
+ }
24
+ return `${value.toFixed(digits)}%`;
25
+ }
26
+
27
+ function formatSigned(value, digits = 1, suffix = "") {
28
+ if (typeof value !== "number" || !Number.isFinite(value)) {
29
+ return "-";
30
+ }
31
+ const sign = value > 0 ? "+" : "";
32
+ return `${sign}${value.toFixed(digits)}${suffix}`;
33
+ }
34
+
35
+ function modeMetric(row, mode) {
36
+ return row.modes?.[mode] ?? null;
37
+ }
38
+
39
+ function buildLeaderboardRows(summary) {
40
+ const rows = summary.rows ?? [];
41
+ return [...rows].sort((left, right) => {
42
+ const lv2 = modeMetric(left, "hashline_v2")?.accuracyPct ?? -1;
43
+ const rv2 = modeMetric(right, "hashline_v2")?.accuracyPct ?? -1;
44
+ if (rv2 !== lv2) return rv2 - lv2;
45
+ const lhash = modeMetric(left, "hashline")?.accuracyPct ?? -1;
46
+ const rhash = modeMetric(right, "hashline")?.accuracyPct ?? -1;
47
+ if (rhash !== lhash) return rhash - lhash;
48
+ return (left.displayName ?? left.modelId).localeCompare(right.displayName ?? right.modelId);
49
+ });
50
+ }
51
+
52
+ export function buildMarkdownReport(summary) {
53
+ const rows = buildLeaderboardRows(summary);
54
+ const lines = [];
55
+ const write = (line = "") => lines.push(line);
56
+
57
+ write("# Code Edit Format Benchmark");
58
+ write("");
59
+ write(`Generated: ${summary.meta?.generatedAt ?? "-"}`);
60
+ write(`Preset: ${summary.meta?.preset ?? "-"}`);
61
+ write(`Models: ${(summary.meta?.models ?? []).length}`);
62
+ write(`Tasks: ${(summary.meta?.tasks ?? []).length}`);
63
+ write(`Modes: ${(summary.meta?.modes ?? []).join(", ")}`);
64
+ write(`Runs: ${summary.meta?.runCount ?? 0} measured + ${summary.meta?.warmupCount ?? 0} warmup`);
65
+ write("");
66
+
67
+ if ((summary.failures ?? []).length > 0) {
68
+ write("## Failures");
69
+ write("");
70
+ for (const failure of summary.failures.slice(0, 20)) {
71
+ write(`- ${failure.modelId} / ${failure.mode} / ${failure.taskId} / ${failure.phase}: ${failure.reason}`);
72
+ }
73
+ if (summary.failures.length > 20) {
74
+ write(`- ... ${summary.failures.length - 20} more`);
75
+ }
76
+ write("");
77
+ }
78
+
79
+ write("## Leaderboard (Accuracy First)");
80
+ write("");
81
+ write("| # | Model | Patch | Replace | Hashline | Hashline v2 | Delta v2 vs Patch | Delta v2 vs Replace | Tokens v2 vs Replace |");
82
+ write("|---|-------|------:|--------:|---------:|------------:|--------------:|----------------:|--------------------:|");
83
+ rows.forEach((row, index) => {
84
+ const patch = modeMetric(row, "patch");
85
+ const replace = modeMetric(row, "replace");
86
+ const hashline = modeMetric(row, "hashline");
87
+ const hashlineV2 = modeMetric(row, "hashline_v2");
88
+ const v2VsPatch = (hashlineV2 && patch) ? round1((hashlineV2.accuracyPct ?? 0) - (patch.accuracyPct ?? 0)) : null;
89
+ const v2VsReplace = (hashlineV2 && replace) ? round1((hashlineV2.accuracyPct ?? 0) - (replace.accuracyPct ?? 0)) : null;
90
+ const tokenDelta = (hashlineV2 && replace)
91
+ ? round1(pctDelta(hashlineV2.tokensTotalP50 ?? NaN, replace.tokensTotalP50 ?? NaN))
92
+ : null;
93
+ write(
94
+ `| ${index + 1} | ${row.displayName} | ${formatPct(patch?.accuracyPct)} | ${formatPct(replace?.accuracyPct)} | ${formatPct(hashline?.accuracyPct)} | ${formatPct(hashlineV2?.accuracyPct)} | ${formatSigned(v2VsPatch)} | ${formatSigned(v2VsReplace)} | ${formatSigned(tokenDelta, 1, "%")} |`,
95
+ );
96
+ });
97
+ write("");
98
+
99
+ write("## Per-Mode Timing (p50 wall)");
100
+ write("");
101
+ write("| Model | Patch | Replace | Hashline | Hashline v2 |");
102
+ write("|-------|------:|--------:|---------:|------------:|");
103
+ rows.forEach((row) => {
104
+ write(
105
+ `| ${row.displayName} | ${formatMs(modeMetric(row, "patch")?.wallMsP50)} | ${formatMs(modeMetric(row, "replace")?.wallMsP50)} | ${formatMs(modeMetric(row, "hashline")?.wallMsP50)} | ${formatMs(modeMetric(row, "hashline_v2")?.wallMsP50)} |`,
106
+ );
107
+ });
108
+ write("");
109
+
110
+ write("## Notes");
111
+ write("");
112
+ write("- Accuracy is pass rate across the exact same task IDs for each model/mode.");
113
+ write("- Tokens column in dashboard defaults to selected mode vs Replace baseline.");
114
+ write("- Tool names are normalized for analysis (`edit` and `apply_patch` are treated as edit-call family).\n");
115
+
116
+ return lines.join("\n");
117
+ }
118
+
119
+ function escapeHtml(value) {
120
+ return String(value)
121
+ .replace(/&/g, "&")
122
+ .replace(/</g, "&lt;")
123
+ .replace(/>/g, "&gt;")
124
+ .replace(/\"/g, "&quot;")
125
+ .replace(/'/g, "&#39;");
126
+ }
127
+
128
+ export function renderDashboardHtml(summary) {
129
+ const rows = buildLeaderboardRows(summary);
130
+ const payload = {
131
+ meta: summary.meta,
132
+ rows,
133
+ };
134
+
135
+ return `<!doctype html>
136
+ <html lang="en">
137
+ <head>
138
+ <meta charset="utf-8" />
139
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
140
+ <title>Code Edit Format Benchmark</title>
141
+ <style>
142
+ :root {
143
+ --bg: #070b18;
144
+ --panel: #0b1123;
145
+ --panel-2: #0f172b;
146
+ --line: rgba(149, 167, 255, 0.16);
147
+ --text: #ecf2ff;
148
+ --muted: #9cb0d9;
149
+ --green: #4cf6a0;
150
+ --green-2: #28d98d;
151
+ --blue: #8fb6ff;
152
+ --chip: rgba(255, 255, 255, 0.05);
153
+ --danger: #ff6b86;
154
+ }
155
+ * { box-sizing: border-box; }
156
+ body {
157
+ margin: 0;
158
+ color: var(--text);
159
+ background:
160
+ radial-gradient(1000px 500px at 20% -10%, rgba(92, 128, 255, 0.16), transparent 60%),
161
+ radial-gradient(900px 500px at 100% 0%, rgba(76, 246, 160, 0.10), transparent 55%),
162
+ var(--bg);
163
+ font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
164
+ }
165
+ .container {
166
+ max-width: 1200px;
167
+ margin: 22px auto;
168
+ padding: 18px;
169
+ border: 1px solid var(--line);
170
+ border-radius: 18px;
171
+ background: linear-gradient(180deg, rgba(255,255,255,0.01), rgba(255,255,255,0));
172
+ box-shadow: 0 20px 80px rgba(0, 0, 0, 0.35);
173
+ }
174
+ .header h1 {
175
+ margin: 0;
176
+ font-size: 2rem;
177
+ letter-spacing: -0.03em;
178
+ }
179
+ .header .subtitle {
180
+ margin-top: 8px;
181
+ color: var(--muted);
182
+ font-size: 0.95rem;
183
+ }
184
+ .header .legend {
185
+ color: var(--muted);
186
+ }
187
+ .header .legend b { color: var(--green); font-weight: 700; }
188
+ .meta {
189
+ margin-top: 6px;
190
+ font-size: 0.8rem;
191
+ color: var(--muted);
192
+ }
193
+ .controls {
194
+ margin-top: 18px;
195
+ display: flex;
196
+ align-items: center;
197
+ gap: 10px;
198
+ flex-wrap: wrap;
199
+ }
200
+ .controls .label {
201
+ color: var(--muted);
202
+ font-size: 0.9rem;
203
+ }
204
+ .button {
205
+ border: 1px solid var(--line);
206
+ background: rgba(255,255,255,0.02);
207
+ color: var(--text);
208
+ border-radius: 999px;
209
+ padding: 8px 14px;
210
+ font-size: 0.85rem;
211
+ cursor: pointer;
212
+ }
213
+ .button.active {
214
+ border-color: rgba(76, 246, 160, 0.7);
215
+ box-shadow: inset 0 0 0 1px rgba(76, 246, 160, 0.18);
216
+ background: rgba(76, 246, 160, 0.08);
217
+ }
218
+ .table {
219
+ margin-top: 18px;
220
+ border-top: 1px solid var(--line);
221
+ }
222
+ .header-row, .row {
223
+ display: grid;
224
+ grid-template-columns: 58px minmax(170px, 220px) minmax(380px, 1fr) 85px 85px 90px;
225
+ gap: 12px;
226
+ align-items: center;
227
+ }
228
+ .header-row {
229
+ color: var(--muted);
230
+ font-size: 0.78rem;
231
+ padding: 12px 14px;
232
+ }
233
+ .row {
234
+ margin-top: 8px;
235
+ padding: 12px 14px;
236
+ border: 1px solid var(--line);
237
+ border-radius: 12px;
238
+ background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01));
239
+ }
240
+ .rank {
241
+ color: var(--blue);
242
+ font-weight: 700;
243
+ text-align: right;
244
+ }
245
+ .model {
246
+ font-weight: 600;
247
+ line-height: 1.15;
248
+ }
249
+ .model small {
250
+ display: block;
251
+ margin-top: 4px;
252
+ color: var(--muted);
253
+ font-weight: 500;
254
+ }
255
+ .bars { display: grid; gap: 7px; }
256
+ .bar-line {
257
+ display: grid;
258
+ grid-template-columns: 84px 1fr 74px;
259
+ gap: 8px;
260
+ align-items: center;
261
+ }
262
+ .bar-label {
263
+ color: var(--muted);
264
+ font-size: 0.75rem;
265
+ text-transform: uppercase;
266
+ letter-spacing: 0.06em;
267
+ }
268
+ .bar-track {
269
+ position: relative;
270
+ height: 22px;
271
+ border-radius: 6px;
272
+ border: 1px solid rgba(255,255,255,0.04);
273
+ background: rgba(255,255,255,0.03);
274
+ overflow: hidden;
275
+ }
276
+ .bar-fill {
277
+ position: absolute;
278
+ inset: 0 auto 0 0;
279
+ width: 0%;
280
+ background: linear-gradient(90deg, rgba(255,255,255,0.08), rgba(255,255,255,0.10));
281
+ }
282
+ .bar-fill.hashline, .bar-fill.hashline_v2 {
283
+ background: linear-gradient(90deg, rgba(40, 217, 141, 0.50), rgba(76, 246, 160, 0.78));
284
+ }
285
+ .bar-fill.selected {
286
+ box-shadow: 0 0 20px rgba(76, 246, 160, 0.22);
287
+ }
288
+ .bar-value {
289
+ text-align: right;
290
+ font-weight: 700;
291
+ color: var(--text);
292
+ font-variant-numeric: tabular-nums;
293
+ }
294
+ .badge {
295
+ display: inline-flex;
296
+ justify-content: center;
297
+ align-items: center;
298
+ min-width: 56px;
299
+ padding: 4px 10px;
300
+ border-radius: 999px;
301
+ font-weight: 700;
302
+ font-size: 0.85rem;
303
+ border: 1px solid rgba(255,255,255,0.08);
304
+ background: rgba(255,255,255,0.03);
305
+ color: var(--muted);
306
+ }
307
+ .badge.pos {
308
+ color: var(--green);
309
+ border-color: rgba(76,246,160,0.24);
310
+ background: rgba(76,246,160,0.08);
311
+ }
312
+ .badge.neg {
313
+ color: var(--danger);
314
+ border-color: rgba(255,107,134,0.22);
315
+ background: rgba(255,107,134,0.07);
316
+ }
317
+ .badge.neutral { color: var(--muted); }
318
+ .details {
319
+ margin-top: 6px;
320
+ color: var(--muted);
321
+ font-size: 0.75rem;
322
+ }
323
+ @media (max-width: 980px) {
324
+ .header-row { display: none; }
325
+ .row {
326
+ grid-template-columns: 42px 1fr;
327
+ gap: 8px;
328
+ }
329
+ .row > .metric, .row > .token { grid-column: 2 / -1; }
330
+ }
331
+ </style>
332
+ </head>
333
+ <body>
334
+ <div class="container">
335
+ <div class="header">
336
+ <h1>Code Edit Format Benchmark</h1>
337
+ <div class="subtitle">
338
+ <span class="legend">Patch · Replace · <b>Hashline</b> · <b>Hashline v2</b></span>
339
+ <span> - accuracy across ${(summary.meta?.models ?? []).length} models</span>
340
+ </div>
341
+ <div class="meta">Preset: ${escapeHtml(summary.meta?.preset ?? "-")} | Tasks: ${(summary.meta?.tasks ?? []).length} | Modes: ${escapeHtml((summary.meta?.modes ?? []).join(", "))} | Generated: ${escapeHtml(summary.meta?.generatedAt ?? "-")}</div>
342
+ </div>
343
+ <div class="controls">
344
+ <span class="label">Sort</span>
345
+ <button class="button active" data-sort="deltaVsReplaceHashline">Delta vs Replace</button>
346
+ <button class="button" data-sort="deltaVsPatchHashline">Delta vs Patch</button>
347
+ <button class="button" data-sort="hashlinePct">Hashline %</button>
348
+ <button class="button" data-sort="hashlineV2Pct">Hashline v2 %</button>
349
+ </div>
350
+ <div class="table">
351
+ <div class="header-row">
352
+ <div></div>
353
+ <div>MODEL</div>
354
+ <div></div>
355
+ <div>Delta Patch</div>
356
+ <div>Delta Repl.</div>
357
+ <div>TOKENS</div>
358
+ </div>
359
+ <div id="rows"></div>
360
+ </div>
361
+ </div>
362
+ <script>
363
+ const data = ${JSON.stringify(payload)};
364
+
365
+ function round1(v) {
366
+ return typeof v === 'number' && Number.isFinite(v) ? Math.round(v * 10) / 10 : null;
367
+ }
368
+
369
+ function pct(v) {
370
+ return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) + '%' : '-';
371
+ }
372
+
373
+ function badge(v, suffix = '') {
374
+ if (typeof v !== 'number' || !Number.isFinite(v)) {
375
+ return '<span class="badge neutral">-</span>';
376
+ }
377
+ const cls = v > 0 ? 'pos' : v < 0 ? 'neg' : 'neutral';
378
+ const sign = v > 0 ? '+' : '';
379
+ return '<span class="badge ' + cls + '">' + sign + v.toFixed(1) + suffix + '</span>';
380
+ }
381
+
382
+ function mode(row, name) {
383
+ return (row.modes && row.modes[name]) || {};
384
+ }
385
+
386
+ function tokenDeltaPct(selected, replace) {
387
+ if (!selected || !replace) return null;
388
+ const a = selected.tokensTotalP50;
389
+ const b = replace.tokensTotalP50;
390
+ if (!Number.isFinite(a) || !Number.isFinite(b) || b === 0) return null;
391
+ return ((a - b) / b) * 100;
392
+ }
393
+
394
+ function toRankData(rows) {
395
+ return rows.map((row) => {
396
+ const patch = mode(row, 'patch');
397
+ const replace = mode(row, 'replace');
398
+ const hashline = mode(row, 'hashline');
399
+ const hashlineV2 = mode(row, 'hashline_v2');
400
+ return {
401
+ ...row,
402
+ _metrics: {
403
+ patchPct: patch.accuracyPct ?? -1,
404
+ replacePct: replace.accuracyPct ?? -1,
405
+ hashlinePct: hashline.accuracyPct ?? -1,
406
+ hashlineV2Pct: hashlineV2.accuracyPct ?? -1,
407
+ deltaVsReplaceHashline: Number.isFinite(hashline.accuracyPct) && Number.isFinite(replace.accuracyPct)
408
+ ? hashline.accuracyPct - replace.accuracyPct
409
+ : -999,
410
+ deltaVsPatchHashline: Number.isFinite(hashline.accuracyPct) && Number.isFinite(patch.accuracyPct)
411
+ ? hashline.accuracyPct - patch.accuracyPct
412
+ : -999,
413
+ },
414
+ };
415
+ });
416
+ }
417
+
418
+ const rankedRows = toRankData(data.rows || []);
419
+ let activeSort = 'deltaVsReplaceHashline';
420
+
421
+ function sortRows(rows, key) {
422
+ return [...rows].sort((a, b) => {
423
+ const diff = (b._metrics[key] ?? -999) - (a._metrics[key] ?? -999);
424
+ if (diff !== 0) return diff;
425
+ const av2 = b._metrics.hashlineV2Pct - a._metrics.hashlineV2Pct;
426
+ if (av2 !== 0) return av2;
427
+ return (a.displayName || a.modelId).localeCompare(b.displayName || b.modelId);
428
+ });
429
+ }
430
+
431
+ function createBarLine(label, metric, selectedKey) {
432
+ const pctValue = metric && Number.isFinite(metric.accuracyPct) ? metric.accuracyPct : 0;
433
+ const modeName = (label === 'PATCH' ? 'patch' : label === 'REPLACE' ? 'replace' : label === 'HASHLINE' ? 'hashline' : 'hashline_v2');
434
+ const selected = selectedKey === 'hashlinePct' ? modeName === 'hashline' : selectedKey === 'hashlineV2Pct' ? modeName === 'hashline_v2' : modeName === 'hashline';
435
+ return ''
436
+ + '<div class="bar-line">'
437
+ + '<div class="bar-label">' + label + '</div>'
438
+ + '<div class="bar-track"><div class="bar-fill ' + modeName + ' ' + (selected ? 'selected' : '') + '" style="width:' + Math.max(0, Math.min(100, pctValue)) + '%"></div></div>'
439
+ + '<div class="bar-value">' + pct(metric ? metric.accuracyPct : null) + '</div>'
440
+ + '</div>';
441
+ }
442
+
443
+ function render() {
444
+ const rowsEl = document.getElementById('rows');
445
+ const rows = sortRows(rankedRows, activeSort);
446
+ rowsEl.innerHTML = rows.map((row, index) => {
447
+ const patch = mode(row, 'patch');
448
+ const replace = mode(row, 'replace');
449
+ const hashline = mode(row, 'hashline');
450
+ const hashlineV2 = mode(row, 'hashline_v2');
451
+
452
+ const selectedMode = activeSort === 'hashlineV2Pct' ? hashlineV2 : hashline;
453
+ const deltaPatch = selectedMode && patch && Number.isFinite(selectedMode.accuracyPct) && Number.isFinite(patch.accuracyPct)
454
+ ? round1(selectedMode.accuracyPct - patch.accuracyPct)
455
+ : null;
456
+ const deltaReplace = selectedMode && replace && Number.isFinite(selectedMode.accuracyPct) && Number.isFinite(replace.accuracyPct)
457
+ ? round1(selectedMode.accuracyPct - replace.accuracyPct)
458
+ : null;
459
+ const tokensDelta = tokenDeltaPct(selectedMode, replace);
460
+
461
+ const p50V2 = patch && hashlineV2 ? (Number.isFinite(hashlineV2.wallMsP50) ? (hashlineV2.wallMsP50 / 1000).toFixed(1) + 's' : '-') : '-';
462
+ return ''
463
+ + '<div class="row">'
464
+ + '<div class="rank">' + (index + 1) + '</div>'
465
+ + '<div class="model">' + (row.displayName || row.modelId) + '<small>' + row.modelId + '</small></div>'
466
+ + '<div>'
467
+ + '<div class="bars">'
468
+ + createBarLine('PATCH', patch, activeSort)
469
+ + createBarLine('REPLACE', replace, activeSort)
470
+ + createBarLine('HASHLINE', hashline, activeSort)
471
+ + createBarLine('HASHLINE v2', hashlineV2, activeSort)
472
+ + '</div>'
473
+ + '<div class="details">Task pass rate across identical task IDs and measured runs. p50 wall (hashline v2): ' + p50V2 + '</div>'
474
+ + '</div>'
475
+ + '<div class="metric">' + badge(deltaPatch) + '</div>'
476
+ + '<div class="metric">' + badge(deltaReplace) + '</div>'
477
+ + '<div class="token">' + badge(tokensDelta, '%') + '</div>'
478
+ + '</div>';
479
+ }).join('');
480
+ }
481
+
482
+ for (const button of document.querySelectorAll('.button[data-sort]')) {
483
+ button.addEventListener('click', () => {
484
+ activeSort = button.dataset.sort;
485
+ for (const other of document.querySelectorAll('.button[data-sort]')) {
486
+ other.classList.toggle('active', other === button);
487
+ }
488
+ render();
489
+ });
490
+ }
491
+
492
+ render();
493
+ </script>
494
+ </body>
495
+ </html>`;
496
+ }
@@ -0,0 +1,54 @@
1
+ export function percentile(values, p) {
2
+ if (!Array.isArray(values) || values.length === 0) {
3
+ return null;
4
+ }
5
+ const sorted = [...values].sort((a, b) => a - b);
6
+ if (sorted.length === 1) {
7
+ return sorted[0];
8
+ }
9
+ const index = (sorted.length - 1) * p;
10
+ const low = Math.floor(index);
11
+ const high = Math.ceil(index);
12
+ if (low === high) {
13
+ return sorted[low];
14
+ }
15
+ const frac = index - low;
16
+ return sorted[low] + (sorted[high] - sorted[low]) * frac;
17
+ }
18
+
19
+ export function stats(values) {
20
+ const nums = values.filter((value) => typeof value === "number" && Number.isFinite(value));
21
+ if (nums.length === 0) {
22
+ return null;
23
+ }
24
+ const sum = nums.reduce((acc, value) => acc + value, 0);
25
+ return {
26
+ n: nums.length,
27
+ min: Math.min(...nums),
28
+ p50: percentile(nums, 0.5),
29
+ p95: percentile(nums, 0.95),
30
+ max: Math.max(...nums),
31
+ mean: sum / nums.length,
32
+ };
33
+ }
34
+
35
+ export function safePercent(numerator, denominator) {
36
+ if (!Number.isFinite(numerator) || !Number.isFinite(denominator) || denominator <= 0) {
37
+ return null;
38
+ }
39
+ return (numerator / denominator) * 100;
40
+ }
41
+
42
+ export function round1(value) {
43
+ if (typeof value !== "number" || !Number.isFinite(value)) {
44
+ return null;
45
+ }
46
+ return Math.round(value * 10) / 10;
47
+ }
48
+
49
+ export function pctDelta(current, baseline) {
50
+ if (!Number.isFinite(current) || !Number.isFinite(baseline) || baseline === 0) {
51
+ return null;
52
+ }
53
+ return ((current - baseline) / baseline) * 100;
54
+ }
@@ -0,0 +1,151 @@
1
+ export const BENCHMARK_FIXTURE = {
2
+ relativePath: "src/TodoApp.tsx",
3
+ sourcePath: "bench/format-benchmark/fixtures/TodoApp.tsx",
4
+ };
5
+
6
+ function hasAll(content, snippets) {
7
+ return snippets.every((snippet) => content.includes(snippet));
8
+ }
9
+
10
+ function lacksAll(content, snippets) {
11
+ return snippets.every((snippet) => !content.includes(snippet));
12
+ }
13
+
14
+ function regex(content, pattern) {
15
+ return pattern.test(content);
16
+ }
17
+
18
+ export const TASKS = [
19
+ {
20
+ id: "T01",
21
+ name: "Rename heading text",
22
+ difficulty: "trivial",
23
+ timeoutMs: 120000,
24
+ prompt: "Change the heading text from \"My Todos\" to \"Task Board\". Only make this text change.",
25
+ validate: (content) => hasAll(content, ["Task Board"]) && lacksAll(content, [">My Todos<"]),
26
+ },
27
+ {
28
+ id: "T02",
29
+ name: "Rename input placeholder",
30
+ difficulty: "trivial",
31
+ timeoutMs: 120000,
32
+ prompt: "Change the input placeholder from \"Add a todo...\" to \"What needs to be done?\" and make no other changes.",
33
+ validate: (content) => hasAll(content, ["What needs to be done?"]) && lacksAll(content, ["Add a todo..."]),
34
+ },
35
+ {
36
+ id: "T03",
37
+ name: "Add clearCompleted action",
38
+ difficulty: "easy",
39
+ timeoutMs: 180000,
40
+ prompt: "Add a clearCompleted function that removes completed todos using filter, and add a \"Clear Completed\" button after the closing </ul> that calls it.",
41
+ validate: (content) => hasAll(content, ["clearCompleted", "Clear Completed", "filter"]) && regex(content, /clearCompleted\s*=|function\s+clearCompleted/),
42
+ },
43
+ {
44
+ id: "T04",
45
+ name: "Add footer count display",
46
+ difficulty: "easy",
47
+ timeoutMs: 180000,
48
+ prompt: "Add a paragraph right after the closing </ul> that displays the remaining todo count using the text \"items left\".",
49
+ validate: (content) => hasAll(content, ["items left"]) && regex(content, /<p[^>]*>.*items left/i),
50
+ },
51
+ {
52
+ id: "T05",
53
+ name: "Trim input before add",
54
+ difficulty: "easy",
55
+ timeoutMs: 180000,
56
+ prompt: "Update addTodo to trim the input before checking emptiness and before creating the new todo item. Keep behavior otherwise the same.",
57
+ validate: (content) => hasAll(content, [".trim()"]),
58
+ },
59
+ {
60
+ id: "T06",
61
+ name: "Duplicate guard in addTodo",
62
+ difficulty: "medium",
63
+ timeoutMs: 180000,
64
+ prompt: "Add a duplicate guard in addTodo so case-insensitive duplicate todo text is not added. Use toLowerCase and a todos.some check.",
65
+ validate: (content) => hasAll(content, ["toLowerCase()", ".some("]) && regex(content, /duplicate|already exists|some\(/i),
66
+ },
67
+ {
68
+ id: "T07",
69
+ name: "Add max todo limit",
70
+ difficulty: "medium",
71
+ timeoutMs: 180000,
72
+ prompt: "Add a MAX_TODOS constant with value 100 and prevent adding a todo when todos.length is already at the limit.",
73
+ validate: (content) => hasAll(content, ["MAX_TODOS", "100"]) && regex(content, /todos\.length\s*>?=\s*MAX_TODOS/),
74
+ },
75
+ {
76
+ id: "T08",
77
+ name: "Aria label for checkbox",
78
+ difficulty: "easy",
79
+ timeoutMs: 180000,
80
+ prompt: "Add an aria-label to the checkbox input that includes the todo text and starts with \"Toggle todo\".",
81
+ validate: (content) => hasAll(content, ["aria-label", "Toggle todo"]),
82
+ },
83
+ {
84
+ id: "T09",
85
+ name: "Aria label for delete button",
86
+ difficulty: "easy",
87
+ timeoutMs: 180000,
88
+ prompt: "Add an aria-label to the Delete button that includes the todo text and starts with \"Delete todo\".",
89
+ validate: (content) => hasAll(content, ["aria-label", "Delete todo"]),
90
+ },
91
+ {
92
+ id: "T10",
93
+ name: "Rename interface TodoItem",
94
+ difficulty: "medium",
95
+ timeoutMs: 240000,
96
+ prompt: "Rename the TodoItem interface to TodoRecord and update all references in the file accordingly without changing behavior.",
97
+ validate: (content) => hasAll(content, ["interface TodoRecord"]) && lacksAll(content, ["interface TodoItem {"]),
98
+ },
99
+ {
100
+ id: "T11",
101
+ name: "Extract TodoListItem component",
102
+ difficulty: "hard",
103
+ timeoutMs: 300000,
104
+ prompt: "Extract the list item rendering into a new component named TodoListItem in the same file and use <TodoListItem> inside the todos.map call. Pass todo, onToggle, and onDelete props.",
105
+ validate: (content) => regex(content, /function\s+TodoListItem|const\s+TodoListItem\s*=/) && hasAll(content, ["<TodoListItem", "onToggle", "onDelete"]),
106
+ },
107
+ {
108
+ id: "T12",
109
+ name: "Add empty state conditional",
110
+ difficulty: "hard",
111
+ timeoutMs: 300000,
112
+ prompt: "Render an empty state paragraph with className \"empty-state\" and text \"No todos yet\" when todos.length is 0, otherwise render the existing <ul> list.",
113
+ validate: (content) => hasAll(content, ["empty-state", "No todos yet"]) && regex(content, /todos\.length/),
114
+ },
115
+ {
116
+ id: "T13",
117
+ name: "Disable clear completed when none",
118
+ difficulty: "medium",
119
+ timeoutMs: 240000,
120
+ prompt: "If you add a Clear Completed button, make it disabled when there are no completed todos. If the button does not exist yet, add it and wire the disabled state.",
121
+ validate: (content) => hasAll(content, ["Clear Completed", "disabled"]) && regex(content, /some\(|filter\(/),
122
+ },
123
+ {
124
+ id: "T14",
125
+ name: "Memoize remaining count",
126
+ difficulty: "medium",
127
+ timeoutMs: 240000,
128
+ prompt: "Refactor remainingCount to use React useMemo. Update imports if needed and keep the rendered output unchanged.",
129
+ validate: (content) => hasAll(content, ["useMemo", "remainingCount"]) && regex(content, /useMemo\(/),
130
+ },
131
+ {
132
+ id: "T15",
133
+ name: "Sort completed last in render",
134
+ difficulty: "hard",
135
+ timeoutMs: 300000,
136
+ prompt: "Before rendering the list, sort todos so incomplete items appear before completed items while preserving the rest of the UI. Do not mutate state directly.",
137
+ validate: (content) => regex(content, /\.slice\(\)\.sort\(|const\s+sortedTodos/) && regex(content, /completed/),
138
+ },
139
+ {
140
+ id: "T16",
141
+ name: "Add footer summary stats",
142
+ difficulty: "medium",
143
+ timeoutMs: 240000,
144
+ prompt: "Add a footer element after the list that shows total todos and completed todos using labels \"Total:\" and \"Completed:\".",
145
+ validate: (content) => hasAll(content, ["<footer", "Total:", "Completed:"]),
146
+ },
147
+ ];
148
+
149
+ export function getTaskMap() {
150
+ return new Map(TASKS.map((task) => [task.id, task]));
151
+ }