aiforcecli-chat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/License.MD +49 -0
  2. package/README.md +642 -0
  3. package/aiforcecli.config.example.json +66 -0
  4. package/assets/README.md +14 -0
  5. package/dist/cli.js +2 -0
  6. package/dist/index.js +2 -0
  7. package/package.json +62 -0
  8. package/tools/scorecard/README.md +92 -0
  9. package/tools/scorecard/config.json +134 -0
  10. package/tools/scorecard/fetch.mjs +335 -0
  11. package/tools/scorecard/generate.mjs +289 -0
  12. package/tools/scorecard/generated/example/invalid-rows.json +1 -0
  13. package/tools/scorecard/generated/example/scorecard-report.md +147 -0
  14. package/tools/scorecard/generated/example/scorecard.compact.json +61 -0
  15. package/tools/scorecard/generated/example/scorecard.json +1492 -0
  16. package/tools/scorecard/generated/example/unmapped-models.json +1492 -0
  17. package/tools/scorecard/generated/raw/aider_polyglot.html +21071 -0
  18. package/tools/scorecard/generated/raw/terminal_bench_2_1.html +2 -0
  19. package/tools/scorecard/generated/scorecard/invalid-rows.json +1 -0
  20. package/tools/scorecard/generated/scorecard/scorecard-report.md +133 -0
  21. package/tools/scorecard/generated/scorecard/scorecard.compact.json +51 -0
  22. package/tools/scorecard/generated/scorecard/scorecard.json +1181 -0
  23. package/tools/scorecard/generated/scorecard/unmapped-models.json +1492 -0
  24. package/tools/scorecard/generated/scorecard-example/invalid-rows.json +1 -0
  25. package/tools/scorecard/generated/scorecard-example/scorecard-report.md +40 -0
  26. package/tools/scorecard/generated/scorecard-example/scorecard.compact.json +22 -0
  27. package/tools/scorecard/generated/scorecard-example/scorecard.json +389 -0
  28. package/tools/scorecard/generated/scorecard-example/unmapped-models.json +1 -0
  29. package/tools/scorecard/generated/scorecard-fetch/raw/aider_polyglot.html +21071 -0
  30. package/tools/scorecard/generated/scorecard-fetch/raw/terminal_bench_2_1.html +2 -0
  31. package/tools/scorecard/snapshots/example.normalized.example.json +38 -0
  32. package/tools/scorecard/snapshots/live.aider_polyglot.json +1318 -0
  33. package/tools/scorecard/snapshots/live.terminal_bench_2_1.json +294 -0
@@ -0,0 +1,1318 @@
1
+ {
2
+ "source": "aider_polyglot",
3
+ "fetchedAt": "2026-06-16T21:10:46.950Z",
4
+ "url": "https://aider.chat/docs/leaderboards/",
5
+ "parser": "aiderPolyglot",
6
+ "rows": [
7
+ {
8
+ "source": "aider_polyglot",
9
+ "benchmark": "aider_polyglot",
10
+ "url": "https://aider.chat/docs/leaderboards/",
11
+ "modelRaw": "gpt-5 high",
12
+ "metric": "pass_rate_2",
13
+ "score": 88,
14
+ "scoreScale": "percent",
15
+ "sampleSize": 225,
16
+ "date": "2025-08-23",
17
+ "extra": {
18
+ "passRate1": 52,
19
+ "passNum1": 117,
20
+ "passNum2": 198,
21
+ "totalCostUsd": 29.0829,
22
+ "secondsPerCase": 194,
23
+ "editFormat": "diff"
24
+ }
25
+ },
26
+ {
27
+ "source": "aider_polyglot",
28
+ "benchmark": "aider_polyglot",
29
+ "url": "https://aider.chat/docs/leaderboards/",
30
+ "modelRaw": "gpt-5 medium",
31
+ "metric": "pass_rate_2",
32
+ "score": 86.7,
33
+ "scoreScale": "percent",
34
+ "sampleSize": 225,
35
+ "date": "2025-08-25",
36
+ "extra": {
37
+ "passRate1": 49.8,
38
+ "passNum1": 112,
39
+ "passNum2": 195,
40
+ "totalCostUsd": 17.693,
41
+ "secondsPerCase": 118.7,
42
+ "editFormat": "diff"
43
+ }
44
+ },
45
+ {
46
+ "source": "aider_polyglot",
47
+ "benchmark": "aider_polyglot",
48
+ "url": "https://aider.chat/docs/leaderboards/",
49
+ "modelRaw": "o3-pro high",
50
+ "metric": "pass_rate_2",
51
+ "score": 84.9,
52
+ "scoreScale": "percent",
53
+ "sampleSize": 225,
54
+ "date": "2025-06-28",
55
+ "extra": {
56
+ "passRate1": 43.6,
57
+ "passNum1": 98,
58
+ "passNum2": 191,
59
+ "totalCostUsd": 146.3249,
60
+ "secondsPerCase": 449,
61
+ "editFormat": "diff"
62
+ }
63
+ },
64
+ {
65
+ "source": "aider_polyglot",
66
+ "benchmark": "aider_polyglot",
67
+ "url": "https://aider.chat/docs/leaderboards/",
68
+ "modelRaw": "gemini-2.5-pro-preview-06-05 32k think",
69
+ "metric": "pass_rate_2",
70
+ "score": 83.1,
71
+ "scoreScale": "percent",
72
+ "sampleSize": 225,
73
+ "date": "2025-06-06",
74
+ "extra": {
75
+ "passRate1": 46.2,
76
+ "passNum1": 104,
77
+ "passNum2": 187,
78
+ "totalCostUsd": 49.8822,
79
+ "secondsPerCase": 200.3,
80
+ "editFormat": "diff-fenced"
81
+ }
82
+ },
83
+ {
84
+ "source": "aider_polyglot",
85
+ "benchmark": "aider_polyglot",
86
+ "url": "https://aider.chat/docs/leaderboards/",
87
+ "modelRaw": "gpt-5 low",
88
+ "metric": "pass_rate_2",
89
+ "score": 81.3,
90
+ "scoreScale": "percent",
91
+ "sampleSize": 225,
92
+ "date": "2025-08-25",
93
+ "extra": {
94
+ "passRate1": 43.1,
95
+ "passNum1": 97,
96
+ "passNum2": 183,
97
+ "totalCostUsd": 10.3713,
98
+ "secondsPerCase": 62.4,
99
+ "editFormat": "diff"
100
+ }
101
+ },
102
+ {
103
+ "source": "aider_polyglot",
104
+ "benchmark": "aider_polyglot",
105
+ "url": "https://aider.chat/docs/leaderboards/",
106
+ "modelRaw": "o3 high",
107
+ "metric": "pass_rate_2",
108
+ "score": 81.3,
109
+ "scoreScale": "percent",
110
+ "sampleSize": 225,
111
+ "date": "2025-06-25",
112
+ "extra": {
113
+ "passRate1": 40,
114
+ "passNum1": 90,
115
+ "passNum2": 183,
116
+ "totalCostUsd": 21.2259,
117
+ "secondsPerCase": 197.3,
118
+ "editFormat": "diff"
119
+ }
120
+ },
121
+ {
122
+ "source": "aider_polyglot",
123
+ "benchmark": "aider_polyglot",
124
+ "url": "https://aider.chat/docs/leaderboards/",
125
+ "modelRaw": "grok-4 high",
126
+ "metric": "pass_rate_2",
127
+ "score": 79.6,
128
+ "scoreScale": "percent",
129
+ "sampleSize": 225,
130
+ "date": "2025-07-11",
131
+ "extra": {
132
+ "passRate1": 40.9,
133
+ "passNum1": 92,
134
+ "passNum2": 179,
135
+ "totalCostUsd": 59.6182,
136
+ "secondsPerCase": 403.2,
137
+ "editFormat": "diff"
138
+ }
139
+ },
140
+ {
141
+ "source": "aider_polyglot",
142
+ "benchmark": "aider_polyglot",
143
+ "url": "https://aider.chat/docs/leaderboards/",
144
+ "modelRaw": "gemini-2.5-pro-preview-06-05 default think",
145
+ "metric": "pass_rate_2",
146
+ "score": 79.1,
147
+ "scoreScale": "percent",
148
+ "sampleSize": 225,
149
+ "date": "2025-06-06",
150
+ "extra": {
151
+ "passRate1": 44.9,
152
+ "passNum1": 101,
153
+ "passNum2": 178,
154
+ "totalCostUsd": 45.5961,
155
+ "secondsPerCase": 175.2,
156
+ "editFormat": "diff-fenced"
157
+ }
158
+ },
159
+ {
160
+ "source": "aider_polyglot",
161
+ "benchmark": "aider_polyglot",
162
+ "url": "https://aider.chat/docs/leaderboards/",
163
+ "modelRaw": "o3 high + gpt-4.1",
164
+ "metric": "pass_rate_2",
165
+ "score": 78.2,
166
+ "scoreScale": "percent",
167
+ "sampleSize": 224,
168
+ "date": "2025-06-27",
169
+ "extra": {
170
+ "passRate1": 34.8,
171
+ "passNum1": 78,
172
+ "passNum2": 176,
173
+ "totalCostUsd": 17.5518,
174
+ "secondsPerCase": 121.8,
175
+ "editFormat": "architect"
176
+ }
177
+ },
178
+ {
179
+ "source": "aider_polyglot",
180
+ "benchmark": "aider_polyglot",
181
+ "url": "https://aider.chat/docs/leaderboards/",
182
+ "modelRaw": "o3",
183
+ "metric": "pass_rate_2",
184
+ "score": 76.9,
185
+ "scoreScale": "percent",
186
+ "sampleSize": 225,
187
+ "date": "2025-06-25",
188
+ "extra": {
189
+ "passRate1": 40.9,
190
+ "passNum1": 92,
191
+ "passNum2": 173,
192
+ "totalCostUsd": 13.7517,
193
+ "secondsPerCase": 101.7,
194
+ "editFormat": "diff"
195
+ }
196
+ },
197
+ {
198
+ "source": "aider_polyglot",
199
+ "benchmark": "aider_polyglot",
200
+ "url": "https://aider.chat/docs/leaderboards/",
201
+ "modelRaw": "Gemini 2.5 Pro Preview 05-06",
202
+ "metric": "pass_rate_2",
203
+ "score": 76.9,
204
+ "scoreScale": "percent",
205
+ "sampleSize": 225,
206
+ "date": "2025-05-07",
207
+ "extra": {
208
+ "passRate1": 36.4,
209
+ "passNum1": 82,
210
+ "passNum2": 173,
211
+ "totalCostUsd": 37.4104,
212
+ "secondsPerCase": 165.3,
213
+ "editFormat": "diff-fenced"
214
+ }
215
+ },
216
+ {
217
+ "source": "aider_polyglot",
218
+ "benchmark": "aider_polyglot",
219
+ "url": "https://aider.chat/docs/leaderboards/",
220
+ "modelRaw": "DeepSeek-V3.2-Exp Reasoner",
221
+ "metric": "pass_rate_2",
222
+ "score": 74.2,
223
+ "scoreScale": "percent",
224
+ "sampleSize": 225,
225
+ "date": "2025-10-03",
226
+ "extra": {
227
+ "passRate1": 39.6,
228
+ "passNum1": 89,
229
+ "passNum2": 167,
230
+ "totalCostUsd": 1.3045,
231
+ "secondsPerCase": 291.2,
232
+ "editFormat": "diff"
233
+ }
234
+ },
235
+ {
236
+ "source": "aider_polyglot",
237
+ "benchmark": "aider_polyglot",
238
+ "url": "https://aider.chat/docs/leaderboards/",
239
+ "modelRaw": "Gemini 2.5 Pro Preview 03-25",
240
+ "metric": "pass_rate_2",
241
+ "score": 72.9,
242
+ "scoreScale": "percent",
243
+ "sampleSize": 225,
244
+ "date": "2025-04-12",
245
+ "extra": {
246
+ "passRate1": 40.9,
247
+ "passNum1": 92,
248
+ "passNum2": 164,
249
+ "totalCostUsd": 0,
250
+ "secondsPerCase": 45.3,
251
+ "editFormat": "diff-fenced"
252
+ }
253
+ },
254
+ {
255
+ "source": "aider_polyglot",
256
+ "benchmark": "aider_polyglot",
257
+ "url": "https://aider.chat/docs/leaderboards/",
258
+ "modelRaw": "claude-opus-4-20250514 32k thinking",
259
+ "metric": "pass_rate_2",
260
+ "score": 72,
261
+ "scoreScale": "percent",
262
+ "sampleSize": 225,
263
+ "date": "2025-05-25",
264
+ "extra": {
265
+ "passRate1": 37.3,
266
+ "passNum1": 84,
267
+ "passNum2": 162,
268
+ "totalCostUsd": 65.7484,
269
+ "secondsPerCase": 44.1,
270
+ "editFormat": "diff"
271
+ }
272
+ },
273
+ {
274
+ "source": "aider_polyglot",
275
+ "benchmark": "aider_polyglot",
276
+ "url": "https://aider.chat/docs/leaderboards/",
277
+ "modelRaw": "o4-mini high",
278
+ "metric": "pass_rate_2",
279
+ "score": 72,
280
+ "scoreScale": "percent",
281
+ "sampleSize": 225,
282
+ "date": "2025-04-16",
283
+ "extra": {
284
+ "passRate1": 19.6,
285
+ "passNum1": 44,
286
+ "passNum2": 162,
287
+ "totalCostUsd": 19.6399,
288
+ "secondsPerCase": 176.5,
289
+ "editFormat": "diff"
290
+ }
291
+ },
292
+ {
293
+ "source": "aider_polyglot",
294
+ "benchmark": "aider_polyglot",
295
+ "url": "https://aider.chat/docs/leaderboards/",
296
+ "modelRaw": "DeepSeek R1 0528",
297
+ "metric": "pass_rate_2",
298
+ "score": 71.4,
299
+ "scoreScale": "percent",
300
+ "sampleSize": 224,
301
+ "date": "2025-06-06",
302
+ "extra": {
303
+ "passRate1": 34.4,
304
+ "passNum1": 77,
305
+ "passNum2": 160,
306
+ "totalCostUsd": 4.8016,
307
+ "secondsPerCase": 716.6,
308
+ "editFormat": "diff"
309
+ }
310
+ },
311
+ {
312
+ "source": "aider_polyglot",
313
+ "benchmark": "aider_polyglot",
314
+ "url": "https://aider.chat/docs/leaderboards/",
315
+ "modelRaw": "claude-opus-4-20250514 no think",
316
+ "metric": "pass_rate_2",
317
+ "score": 70.7,
318
+ "scoreScale": "percent",
319
+ "sampleSize": 225,
320
+ "date": "2025-05-25",
321
+ "extra": {
322
+ "passRate1": 32.9,
323
+ "passNum1": 74,
324
+ "passNum2": 159,
325
+ "totalCostUsd": 68.6253,
326
+ "secondsPerCase": 42.5,
327
+ "editFormat": "diff"
328
+ }
329
+ },
330
+ {
331
+ "source": "aider_polyglot",
332
+ "benchmark": "aider_polyglot",
333
+ "url": "https://aider.chat/docs/leaderboards/",
334
+ "modelRaw": "DeepSeek-V3.2-Exp Chat",
335
+ "metric": "pass_rate_2",
336
+ "score": 70.2,
337
+ "scoreScale": "percent",
338
+ "sampleSize": 225,
339
+ "date": "2025-10-03",
340
+ "extra": {
341
+ "passRate1": 38.7,
342
+ "passNum1": 87,
343
+ "passNum2": 158,
344
+ "totalCostUsd": 0.8756,
345
+ "secondsPerCase": 104,
346
+ "editFormat": "diff"
347
+ }
348
+ },
349
+ {
350
+ "source": "aider_polyglot",
351
+ "benchmark": "aider_polyglot",
352
+ "url": "https://aider.chat/docs/leaderboards/",
353
+ "modelRaw": "claude-3-7-sonnet-20250219 32k thinking tokens",
354
+ "metric": "pass_rate_2",
355
+ "score": 64.9,
356
+ "scoreScale": "percent",
357
+ "sampleSize": 225,
358
+ "date": "2025-02-24",
359
+ "extra": {
360
+ "passRate1": 29.3,
361
+ "passNum1": 66,
362
+ "passNum2": 146,
363
+ "totalCostUsd": 36.8343,
364
+ "secondsPerCase": 105.2,
365
+ "editFormat": "diff"
366
+ }
367
+ },
368
+ {
369
+ "source": "aider_polyglot",
370
+ "benchmark": "aider_polyglot",
371
+ "url": "https://aider.chat/docs/leaderboards/",
372
+ "modelRaw": "DeepSeek R1 + claude-3-5-sonnet-20241022",
373
+ "metric": "pass_rate_2",
374
+ "score": 64,
375
+ "scoreScale": "percent",
376
+ "sampleSize": 225,
377
+ "date": "2025-01-23",
378
+ "extra": {
379
+ "passRate1": 27.1,
380
+ "passNum1": 61,
381
+ "passNum2": 144,
382
+ "totalCostUsd": 13.2933,
383
+ "secondsPerCase": 251.6,
384
+ "editFormat": "architect"
385
+ }
386
+ },
387
+ {
388
+ "source": "aider_polyglot",
389
+ "benchmark": "aider_polyglot",
390
+ "url": "https://aider.chat/docs/leaderboards/",
391
+ "modelRaw": "o1-2024-12-17 high",
392
+ "metric": "pass_rate_2",
393
+ "score": 61.7,
394
+ "scoreScale": "percent",
395
+ "sampleSize": 224,
396
+ "date": "2024-12-21",
397
+ "extra": {
398
+ "passRate1": 23.7,
399
+ "passNum1": 53,
400
+ "passNum2": 139,
401
+ "totalCostUsd": 186.4958,
402
+ "secondsPerCase": 133.2,
403
+ "editFormat": "diff"
404
+ }
405
+ },
406
+ {
407
+ "source": "aider_polyglot",
408
+ "benchmark": "aider_polyglot",
409
+ "url": "https://aider.chat/docs/leaderboards/",
410
+ "modelRaw": "claude-sonnet-4-20250514 32k thinking",
411
+ "metric": "pass_rate_2",
412
+ "score": 61.3,
413
+ "scoreScale": "percent",
414
+ "sampleSize": 225,
415
+ "date": "2025-05-24",
416
+ "extra": {
417
+ "passRate1": 25.8,
418
+ "passNum1": 58,
419
+ "passNum2": 138,
420
+ "totalCostUsd": 26.5755,
421
+ "secondsPerCase": 79.9,
422
+ "editFormat": "diff"
423
+ }
424
+ },
425
+ {
426
+ "source": "aider_polyglot",
427
+ "benchmark": "aider_polyglot",
428
+ "url": "https://aider.chat/docs/leaderboards/",
429
+ "modelRaw": "claude-3-7-sonnet-20250219 no thinking",
430
+ "metric": "pass_rate_2",
431
+ "score": 60.4,
432
+ "scoreScale": "percent",
433
+ "sampleSize": 225,
434
+ "date": "2025-02-24",
435
+ "extra": {
436
+ "passRate1": 24.4,
437
+ "passNum1": 55,
438
+ "passNum2": 136,
439
+ "totalCostUsd": 17.7191,
440
+ "secondsPerCase": 28.3,
441
+ "editFormat": "diff"
442
+ }
443
+ },
444
+ {
445
+ "source": "aider_polyglot",
446
+ "benchmark": "aider_polyglot",
447
+ "url": "https://aider.chat/docs/leaderboards/",
448
+ "modelRaw": "o3-mini high",
449
+ "metric": "pass_rate_2",
450
+ "score": 60.4,
451
+ "scoreScale": "percent",
452
+ "sampleSize": 224,
453
+ "date": "2025-01-31",
454
+ "extra": {
455
+ "passRate1": 21,
456
+ "passNum1": 47,
457
+ "passNum2": 136,
458
+ "totalCostUsd": 18.1584,
459
+ "secondsPerCase": 124.6,
460
+ "editFormat": "diff"
461
+ }
462
+ },
463
+ {
464
+ "source": "aider_polyglot",
465
+ "benchmark": "aider_polyglot",
466
+ "url": "https://aider.chat/docs/leaderboards/",
467
+ "modelRaw": "Qwen3 235B A22B diff, no think, Alibaba API",
468
+ "metric": "pass_rate_2",
469
+ "score": 59.6,
470
+ "scoreScale": "percent",
471
+ "sampleSize": 225,
472
+ "date": "2025-05-09",
473
+ "extra": {
474
+ "passRate1": 28.9,
475
+ "passNum1": 65,
476
+ "passNum2": 134,
477
+ "totalCostUsd": 0,
478
+ "secondsPerCase": 45.4,
479
+ "editFormat": "diff"
480
+ }
481
+ },
482
+ {
483
+ "source": "aider_polyglot",
484
+ "benchmark": "aider_polyglot",
485
+ "url": "https://aider.chat/docs/leaderboards/",
486
+ "modelRaw": "Kimi K2",
487
+ "metric": "pass_rate_2",
488
+ "score": 59.1,
489
+ "scoreScale": "percent",
490
+ "sampleSize": 225,
491
+ "date": "2025-07-17",
492
+ "extra": {
493
+ "passRate1": 20.4,
494
+ "passNum1": 46,
495
+ "passNum2": 133,
496
+ "totalCostUsd": 1.2357,
497
+ "secondsPerCase": 67.6,
498
+ "editFormat": "diff"
499
+ }
500
+ },
501
+ {
502
+ "source": "aider_polyglot",
503
+ "benchmark": "aider_polyglot",
504
+ "url": "https://aider.chat/docs/leaderboards/",
505
+ "modelRaw": "DeepSeek R1",
506
+ "metric": "pass_rate_2",
507
+ "score": 56.9,
508
+ "scoreScale": "percent",
509
+ "sampleSize": 225,
510
+ "date": "2025-01-20",
511
+ "extra": {
512
+ "passRate1": 26.7,
513
+ "passNum1": 60,
514
+ "passNum2": 128,
515
+ "totalCostUsd": 5.4193,
516
+ "secondsPerCase": 113.7,
517
+ "editFormat": "diff"
518
+ }
519
+ },
520
+ {
521
+ "source": "aider_polyglot",
522
+ "benchmark": "aider_polyglot",
523
+ "url": "https://aider.chat/docs/leaderboards/",
524
+ "modelRaw": "claude-sonnet-4-20250514 no thinking",
525
+ "metric": "pass_rate_2",
526
+ "score": 56.4,
527
+ "scoreScale": "percent",
528
+ "sampleSize": 225,
529
+ "date": "2025-05-24",
530
+ "extra": {
531
+ "passRate1": 20.4,
532
+ "passNum1": 46,
533
+ "passNum2": 127,
534
+ "totalCostUsd": 15.8155,
535
+ "secondsPerCase": 29.8,
536
+ "editFormat": "diff"
537
+ }
538
+ },
539
+ {
540
+ "source": "aider_polyglot",
541
+ "benchmark": "aider_polyglot",
542
+ "url": "https://aider.chat/docs/leaderboards/",
543
+ "modelRaw": "gemini-2.5-flash-preview-05-20 24k think",
544
+ "metric": "pass_rate_2",
545
+ "score": 55.1,
546
+ "scoreScale": "percent",
547
+ "sampleSize": 225,
548
+ "date": "2025-05-25",
549
+ "extra": {
550
+ "passRate1": 26.2,
551
+ "passNum1": 59,
552
+ "passNum2": 124,
553
+ "totalCostUsd": 8.5625,
554
+ "secondsPerCase": 53.9,
555
+ "editFormat": "diff"
556
+ }
557
+ },
558
+ {
559
+ "source": "aider_polyglot",
560
+ "benchmark": "aider_polyglot",
561
+ "url": "https://aider.chat/docs/leaderboards/",
562
+ "modelRaw": "DeepSeek V3 0324",
563
+ "metric": "pass_rate_2",
564
+ "score": 55.1,
565
+ "scoreScale": "percent",
566
+ "sampleSize": 225,
567
+ "date": "2025-03-24",
568
+ "extra": {
569
+ "passRate1": 28,
570
+ "passNum1": 63,
571
+ "passNum2": 124,
572
+ "totalCostUsd": 1.1164,
573
+ "secondsPerCase": 290,
574
+ "editFormat": "diff"
575
+ }
576
+ },
577
+ {
578
+ "source": "aider_polyglot",
579
+ "benchmark": "aider_polyglot",
580
+ "url": "https://aider.chat/docs/leaderboards/",
581
+ "modelRaw": "Quasar Alpha",
582
+ "metric": "pass_rate_2",
583
+ "score": 54.7,
584
+ "scoreScale": "percent",
585
+ "sampleSize": 225,
586
+ "date": "2025-04-04",
587
+ "extra": {
588
+ "passRate1": 21.8,
589
+ "passNum1": 49,
590
+ "passNum2": 123,
591
+ "totalCostUsd": 0,
592
+ "secondsPerCase": 14.8,
593
+ "editFormat": "diff"
594
+ }
595
+ },
596
+ {
597
+ "source": "aider_polyglot",
598
+ "benchmark": "aider_polyglot",
599
+ "url": "https://aider.chat/docs/leaderboards/",
600
+ "modelRaw": "o3-mini medium",
601
+ "metric": "pass_rate_2",
602
+ "score": 53.8,
603
+ "scoreScale": "percent",
604
+ "sampleSize": 225,
605
+ "date": "2025-01-31",
606
+ "extra": {
607
+ "passRate1": 19.1,
608
+ "passNum1": 43,
609
+ "passNum2": 121,
610
+ "totalCostUsd": 8.8599,
611
+ "secondsPerCase": 47.2,
612
+ "editFormat": "diff"
613
+ }
614
+ },
615
+ {
616
+ "source": "aider_polyglot",
617
+ "benchmark": "aider_polyglot",
618
+ "url": "https://aider.chat/docs/leaderboards/",
619
+ "modelRaw": "Grok 3 Beta",
620
+ "metric": "pass_rate_2",
621
+ "score": 53.3,
622
+ "scoreScale": "percent",
623
+ "sampleSize": 225,
624
+ "date": "2025-04-10",
625
+ "extra": {
626
+ "passRate1": 22.2,
627
+ "passNum1": 50,
628
+ "passNum2": 120,
629
+ "totalCostUsd": 11.0338,
630
+ "secondsPerCase": 15.3,
631
+ "editFormat": "diff"
632
+ }
633
+ },
634
+ {
635
+ "source": "aider_polyglot",
636
+ "benchmark": "aider_polyglot",
637
+ "url": "https://aider.chat/docs/leaderboards/",
638
+ "modelRaw": "Optimus Alpha",
639
+ "metric": "pass_rate_2",
640
+ "score": 52.9,
641
+ "scoreScale": "percent",
642
+ "sampleSize": 225,
643
+ "date": "2025-04-10",
644
+ "extra": {
645
+ "passRate1": 21.3,
646
+ "passNum1": 48,
647
+ "passNum2": 119,
648
+ "totalCostUsd": 0,
649
+ "secondsPerCase": 18.4,
650
+ "editFormat": "diff"
651
+ }
652
+ },
653
+ {
654
+ "source": "aider_polyglot",
655
+ "benchmark": "aider_polyglot",
656
+ "url": "https://aider.chat/docs/leaderboards/",
657
+ "modelRaw": "gpt-4.1",
658
+ "metric": "pass_rate_2",
659
+ "score": 52.4,
660
+ "scoreScale": "percent",
661
+ "sampleSize": 225,
662
+ "date": "2025-04-14",
663
+ "extra": {
664
+ "passRate1": 20,
665
+ "passNum1": 45,
666
+ "passNum2": 118,
667
+ "totalCostUsd": 9.8556,
668
+ "secondsPerCase": 20.5,
669
+ "editFormat": "diff"
670
+ }
671
+ },
672
+ {
673
+ "source": "aider_polyglot",
674
+ "benchmark": "aider_polyglot",
675
+ "url": "https://aider.chat/docs/leaderboards/",
676
+ "modelRaw": "claude-3-5-sonnet-20241022",
677
+ "metric": "pass_rate_2",
678
+ "score": 51.6,
679
+ "scoreScale": "percent",
680
+ "sampleSize": 225,
681
+ "date": "2025-01-17",
682
+ "extra": {
683
+ "passRate1": 22.2,
684
+ "passNum1": 50,
685
+ "passNum2": 116,
686
+ "totalCostUsd": 14.4063,
687
+ "secondsPerCase": 21.4,
688
+ "editFormat": "diff"
689
+ }
690
+ },
691
+ {
692
+ "source": "aider_polyglot",
693
+ "benchmark": "aider_polyglot",
694
+ "url": "https://aider.chat/docs/leaderboards/",
695
+ "modelRaw": "Grok 3 Mini Beta high",
696
+ "metric": "pass_rate_2",
697
+ "score": 49.3,
698
+ "scoreScale": "percent",
699
+ "sampleSize": 225,
700
+ "date": "2025-04-10",
701
+ "extra": {
702
+ "passRate1": 17.3,
703
+ "passNum1": 39,
704
+ "passNum2": 111,
705
+ "totalCostUsd": 0.7346,
706
+ "secondsPerCase": 79.1,
707
+ "editFormat": "whole"
708
+ }
709
+ },
710
+ {
711
+ "source": "aider_polyglot",
712
+ "benchmark": "aider_polyglot",
713
+ "url": "https://aider.chat/docs/leaderboards/",
714
+ "modelRaw": "DeepSeek Chat V3 prev",
715
+ "metric": "pass_rate_2",
716
+ "score": 48.4,
717
+ "scoreScale": "percent",
718
+ "sampleSize": 225,
719
+ "date": "2024-12-25",
720
+ "extra": {
721
+ "passRate1": 22.7,
722
+ "passNum1": 51,
723
+ "passNum2": 109,
724
+ "totalCostUsd": 0.3369,
725
+ "secondsPerCase": 34.8,
726
+ "editFormat": "diff"
727
+ }
728
+ },
729
+ {
730
+ "source": "aider_polyglot",
731
+ "benchmark": "aider_polyglot",
732
+ "url": "https://aider.chat/docs/leaderboards/",
733
+ "modelRaw": "gemini-2.5-flash-preview-04-17 default",
734
+ "metric": "pass_rate_2",
735
+ "score": 47.1,
736
+ "scoreScale": "percent",
737
+ "sampleSize": 225,
738
+ "date": "2025-04-20",
739
+ "extra": {
740
+ "passRate1": 21.8,
741
+ "passNum1": 49,
742
+ "passNum2": 106,
743
+ "totalCostUsd": 1.8451,
744
+ "secondsPerCase": 50.1,
745
+ "editFormat": "diff"
746
+ }
747
+ },
748
+ {
749
+ "source": "aider_polyglot",
750
+ "benchmark": "aider_polyglot",
751
+ "url": "https://aider.chat/docs/leaderboards/",
752
+ "modelRaw": "chatgpt-4o-latest 2025-03-29",
753
+ "metric": "pass_rate_2",
754
+ "score": 45.3,
755
+ "scoreScale": "percent",
756
+ "sampleSize": 225,
757
+ "date": "2025-03-29",
758
+ "extra": {
759
+ "passRate1": 16.4,
760
+ "passNum1": 37,
761
+ "passNum2": 102,
762
+ "totalCostUsd": 19.7416,
763
+ "secondsPerCase": 10.3,
764
+ "editFormat": "diff"
765
+ }
766
+ },
767
+ {
768
+ "source": "aider_polyglot",
769
+ "benchmark": "aider_polyglot",
770
+ "url": "https://aider.chat/docs/leaderboards/",
771
+ "modelRaw": "gpt-4.5-preview",
772
+ "metric": "pass_rate_2",
773
+ "score": 44.9,
774
+ "scoreScale": "percent",
775
+ "sampleSize": 224,
776
+ "date": "2025-02-27",
777
+ "extra": {
778
+ "passRate1": 22.3,
779
+ "passNum1": 50,
780
+ "passNum2": 101,
781
+ "totalCostUsd": 183.1802,
782
+ "secondsPerCase": 113.5,
783
+ "editFormat": "diff"
784
+ }
785
+ },
786
+ {
787
+ "source": "aider_polyglot",
788
+ "benchmark": "aider_polyglot",
789
+ "url": "https://aider.chat/docs/leaderboards/",
790
+ "modelRaw": "gemini-2.5-flash-preview-05-20 no think",
791
+ "metric": "pass_rate_2",
792
+ "score": 44,
793
+ "scoreScale": "percent",
794
+ "sampleSize": 225,
795
+ "date": "2025-05-26",
796
+ "extra": {
797
+ "passRate1": 20.9,
798
+ "passNum1": 47,
799
+ "passNum2": 99,
800
+ "totalCostUsd": 1.1354,
801
+ "secondsPerCase": 12.2,
802
+ "editFormat": "diff"
803
+ }
804
+ },
805
+ {
806
+ "source": "aider_polyglot",
807
+ "benchmark": "aider_polyglot",
808
+ "url": "https://aider.chat/docs/leaderboards/",
809
+ "modelRaw": "gpt-oss-120b high",
810
+ "metric": "pass_rate_2",
811
+ "score": 41.8,
812
+ "scoreScale": "percent",
813
+ "sampleSize": 225,
814
+ "date": "2025-08-06",
815
+ "extra": {
816
+ "passRate1": 13.8,
817
+ "passNum1": 31,
818
+ "passNum2": 94,
819
+ "totalCostUsd": 0.7406,
820
+ "secondsPerCase": 35.5,
821
+ "editFormat": "diff"
822
+ }
823
+ },
824
+ {
825
+ "source": "aider_polyglot",
826
+ "benchmark": "aider_polyglot",
827
+ "url": "https://aider.chat/docs/leaderboards/",
828
+ "modelRaw": "Qwen3 32B",
829
+ "metric": "pass_rate_2",
830
+ "score": 40,
831
+ "scoreScale": "percent",
832
+ "sampleSize": 225,
833
+ "date": "2025-05-08",
834
+ "extra": {
835
+ "passRate1": 14.2,
836
+ "passNum1": 32,
837
+ "passNum2": 90,
838
+ "totalCostUsd": 0.7603,
839
+ "secondsPerCase": 372.2,
840
+ "editFormat": "diff"
841
+ }
842
+ },
843
+ {
844
+ "source": "aider_polyglot",
845
+ "benchmark": "aider_polyglot",
846
+ "url": "https://aider.chat/docs/leaderboards/",
847
+ "modelRaw": "gemini-exp-1206",
848
+ "metric": "pass_rate_2",
849
+ "score": 38.2,
850
+ "scoreScale": "percent",
851
+ "sampleSize": 225,
852
+ "date": "2024-12-22",
853
+ "extra": {
854
+ "passRate1": 19.6,
855
+ "passNum1": 44,
856
+ "passNum2": 86,
857
+ "totalCostUsd": 0,
858
+ "secondsPerCase": 45.5,
859
+ "editFormat": "whole"
860
+ }
861
+ },
862
+ {
863
+ "source": "aider_polyglot",
864
+ "benchmark": "aider_polyglot",
865
+ "url": "https://aider.chat/docs/leaderboards/",
866
+ "modelRaw": "Gemini 2.0 Pro exp-02-05",
867
+ "metric": "pass_rate_2",
868
+ "score": 35.6,
869
+ "scoreScale": "percent",
870
+ "sampleSize": 225,
871
+ "date": "2025-02-25",
872
+ "extra": {
873
+ "passRate1": 20.4,
874
+ "passNum1": 46,
875
+ "passNum2": 80,
876
+ "totalCostUsd": 0,
877
+ "secondsPerCase": 34.8,
878
+ "editFormat": "whole"
879
+ }
880
+ },
881
+ {
882
+ "source": "aider_polyglot",
883
+ "benchmark": "aider_polyglot",
884
+ "url": "https://aider.chat/docs/leaderboards/",
885
+ "modelRaw": "Grok 3 Mini Beta low",
886
+ "metric": "pass_rate_2",
887
+ "score": 34.7,
888
+ "scoreScale": "percent",
889
+ "sampleSize": 225,
890
+ "date": "2025-04-10",
891
+ "extra": {
892
+ "passRate1": 11.1,
893
+ "passNum1": 25,
894
+ "passNum2": 78,
895
+ "totalCostUsd": 0.7856,
896
+ "secondsPerCase": 35.1,
897
+ "editFormat": "whole"
898
+ }
899
+ },
900
+ {
901
+ "source": "aider_polyglot",
902
+ "benchmark": "aider_polyglot",
903
+ "url": "https://aider.chat/docs/leaderboards/",
904
+ "modelRaw": "o1-mini-2024-09-12",
905
+ "metric": "pass_rate_2",
906
+ "score": 32.9,
907
+ "scoreScale": "percent",
908
+ "sampleSize": 225,
909
+ "date": "2024-12-22",
910
+ "extra": {
911
+ "passRate1": 5.8,
912
+ "passNum1": 13,
913
+ "passNum2": 74,
914
+ "totalCostUsd": 18.577,
915
+ "secondsPerCase": 34.7,
916
+ "editFormat": "whole"
917
+ }
918
+ },
919
+ {
920
+ "source": "aider_polyglot",
921
+ "benchmark": "aider_polyglot",
922
+ "url": "https://aider.chat/docs/leaderboards/",
923
+ "modelRaw": "gpt-4.1-mini",
924
+ "metric": "pass_rate_2",
925
+ "score": 32.4,
926
+ "scoreScale": "percent",
927
+ "sampleSize": 225,
928
+ "date": "2025-04-14",
929
+ "extra": {
930
+ "passRate1": 11.1,
931
+ "passNum1": 25,
932
+ "passNum2": 73,
933
+ "totalCostUsd": 1.9918,
934
+ "secondsPerCase": 19.5,
935
+ "editFormat": "diff"
936
+ }
937
+ },
938
+ {
939
+ "source": "aider_polyglot",
940
+ "benchmark": "aider_polyglot",
941
+ "url": "https://aider.chat/docs/leaderboards/",
942
+ "modelRaw": "claude-3-5-haiku-20241022",
943
+ "metric": "pass_rate_2",
944
+ "score": 28,
945
+ "scoreScale": "percent",
946
+ "sampleSize": 225,
947
+ "date": "2024-12-21",
948
+ "extra": {
949
+ "passRate1": 7.1,
950
+ "passNum1": 16,
951
+ "passNum2": 63,
952
+ "totalCostUsd": 6.0583,
953
+ "secondsPerCase": 31.8,
954
+ "editFormat": "diff"
955
+ }
956
+ },
957
+ {
958
+ "source": "aider_polyglot",
959
+ "benchmark": "aider_polyglot",
960
+ "url": "https://aider.chat/docs/leaderboards/",
961
+ "modelRaw": "chatgpt-4o-latest 2025-02-15",
962
+ "metric": "pass_rate_2",
963
+ "score": 27.1,
964
+ "scoreScale": "percent",
965
+ "sampleSize": 223,
966
+ "date": "2025-02-15",
967
+ "extra": {
968
+ "passRate1": 9,
969
+ "passNum1": 20,
970
+ "passNum2": 61,
971
+ "totalCostUsd": 14.3703,
972
+ "secondsPerCase": 12.4,
973
+ "editFormat": "diff"
974
+ }
975
+ },
976
+ {
977
+ "source": "aider_polyglot",
978
+ "benchmark": "aider_polyglot",
979
+ "url": "https://aider.chat/docs/leaderboards/",
980
+ "modelRaw": "QwQ-32B + Qwen 2.5 Coder Instruct",
981
+ "metric": "pass_rate_2",
982
+ "score": 26.2,
983
+ "scoreScale": "percent",
984
+ "sampleSize": 225,
985
+ "date": "2025-03-07",
986
+ "extra": {
987
+ "passRate1": 9.8,
988
+ "passNum1": 22,
989
+ "passNum2": 59,
990
+ "totalCostUsd": 0,
991
+ "secondsPerCase": 137.4,
992
+ "editFormat": "architect"
993
+ }
994
+ },
995
+ {
996
+ "source": "aider_polyglot",
997
+ "benchmark": "aider_polyglot",
998
+ "url": "https://aider.chat/docs/leaderboards/",
999
+ "modelRaw": "gpt-4o-2024-08-06",
1000
+ "metric": "pass_rate_2",
1001
+ "score": 23.1,
1002
+ "scoreScale": "percent",
1003
+ "sampleSize": 225,
1004
+ "date": "2024-12-30",
1005
+ "extra": {
1006
+ "passRate1": 4.9,
1007
+ "passNum1": 11,
1008
+ "passNum2": 52,
1009
+ "totalCostUsd": 7.0286,
1010
+ "secondsPerCase": 16,
1011
+ "editFormat": "diff"
1012
+ }
1013
+ },
1014
+ {
1015
+ "source": "aider_polyglot",
1016
+ "benchmark": "aider_polyglot",
1017
+ "url": "https://aider.chat/docs/leaderboards/",
1018
+ "modelRaw": "gemini-2.0-flash-exp",
1019
+ "metric": "pass_rate_2",
1020
+ "score": 22.2,
1021
+ "scoreScale": "percent",
1022
+ "sampleSize": 225,
1023
+ "date": "2024-12-22",
1024
+ "extra": {
1025
+ "passRate1": 11.6,
1026
+ "passNum1": 26,
1027
+ "passNum2": 50,
1028
+ "totalCostUsd": 0,
1029
+ "secondsPerCase": 12.2,
1030
+ "editFormat": "whole"
1031
+ }
1032
+ },
1033
+ {
1034
+ "source": "aider_polyglot",
1035
+ "benchmark": "aider_polyglot",
1036
+ "url": "https://aider.chat/docs/leaderboards/",
1037
+ "modelRaw": "qwen-max-2025-01-25",
1038
+ "metric": "pass_rate_2",
1039
+ "score": 21.8,
1040
+ "scoreScale": "percent",
1041
+ "sampleSize": 225,
1042
+ "date": "2025-01-28",
1043
+ "extra": {
1044
+ "passRate1": 9.3,
1045
+ "passNum1": 21,
1046
+ "passNum2": 49,
1047
+ "secondsPerCase": 39.5,
1048
+ "editFormat": "diff"
1049
+ }
1050
+ },
1051
+ {
1052
+ "source": "aider_polyglot",
1053
+ "benchmark": "aider_polyglot",
1054
+ "url": "https://aider.chat/docs/leaderboards/",
1055
+ "modelRaw": "QwQ-32B",
1056
+ "metric": "pass_rate_2",
1057
+ "score": 20.9,
1058
+ "scoreScale": "percent",
1059
+ "sampleSize": 225,
1060
+ "date": "2025-03-06",
1061
+ "extra": {
1062
+ "passRate1": 8,
1063
+ "passNum1": 18,
1064
+ "passNum2": 47,
1065
+ "totalCostUsd": 0,
1066
+ "secondsPerCase": 228.6,
1067
+ "editFormat": "diff"
1068
+ }
1069
+ },
1070
+ {
1071
+ "source": "aider_polyglot",
1072
+ "benchmark": "aider_polyglot",
1073
+ "url": "https://aider.chat/docs/leaderboards/",
1074
+ "modelRaw": "gemini-2.0-flash-thinking-exp-01-21",
1075
+ "metric": "pass_rate_2",
1076
+ "score": 18.2,
1077
+ "scoreScale": "percent",
1078
+ "sampleSize": 225,
1079
+ "date": "2025-01-21",
1080
+ "extra": {
1081
+ "passRate1": 5.8,
1082
+ "passNum1": 13,
1083
+ "passNum2": 41,
1084
+ "totalCostUsd": 0,
1085
+ "secondsPerCase": 24.2,
1086
+ "editFormat": "diff"
1087
+ }
1088
+ },
1089
+ {
1090
+ "source": "aider_polyglot",
1091
+ "benchmark": "aider_polyglot",
1092
+ "url": "https://aider.chat/docs/leaderboards/",
1093
+ "modelRaw": "gpt-4o-2024-11-20",
1094
+ "metric": "pass_rate_2",
1095
+ "score": 18.2,
1096
+ "scoreScale": "percent",
1097
+ "sampleSize": 225,
1098
+ "date": "2024-12-30",
1099
+ "extra": {
1100
+ "passRate1": 4.9,
1101
+ "passNum1": 11,
1102
+ "passNum2": 41,
1103
+ "totalCostUsd": 6.7351,
1104
+ "secondsPerCase": 12.1,
1105
+ "editFormat": "diff"
1106
+ }
1107
+ },
1108
+ {
1109
+ "source": "aider_polyglot",
1110
+ "benchmark": "aider_polyglot",
1111
+ "url": "https://aider.chat/docs/leaderboards/",
1112
+ "modelRaw": "DeepSeek Chat V2.5",
1113
+ "metric": "pass_rate_2",
1114
+ "score": 17.8,
1115
+ "scoreScale": "percent",
1116
+ "sampleSize": 225,
1117
+ "date": "2024-12-21",
1118
+ "extra": {
1119
+ "passRate1": 5.3,
1120
+ "passNum1": 12,
1121
+ "passNum2": 40,
1122
+ "totalCostUsd": 0.5101,
1123
+ "secondsPerCase": 184,
1124
+ "editFormat": "diff"
1125
+ }
1126
+ },
1127
+ {
1128
+ "source": "aider_polyglot",
1129
+ "benchmark": "aider_polyglot",
1130
+ "url": "https://aider.chat/docs/leaderboards/",
1131
+ "modelRaw": "Qwen2.5-Coder-32B-Instruct",
1132
+ "metric": "pass_rate_2",
1133
+ "score": 16.4,
1134
+ "scoreScale": "percent",
1135
+ "sampleSize": 225,
1136
+ "date": "2024-12-26",
1137
+ "extra": {
1138
+ "passRate1": 4.9,
1139
+ "passNum1": 11,
1140
+ "passNum2": 37,
1141
+ "totalCostUsd": 0,
1142
+ "secondsPerCase": 42,
1143
+ "editFormat": "whole"
1144
+ }
1145
+ },
1146
+ {
1147
+ "source": "aider_polyglot",
1148
+ "benchmark": "aider_polyglot",
1149
+ "url": "https://aider.chat/docs/leaderboards/",
1150
+ "modelRaw": "Llama 4 Maverick",
1151
+ "metric": "pass_rate_2",
1152
+ "score": 15.6,
1153
+ "scoreScale": "percent",
1154
+ "sampleSize": 225,
1155
+ "date": "2025-04-06",
1156
+ "extra": {
1157
+ "passRate1": 4.4,
1158
+ "passNum1": 10,
1159
+ "passNum2": 35,
1160
+ "totalCostUsd": 0,
1161
+ "secondsPerCase": 20.5,
1162
+ "editFormat": "whole"
1163
+ }
1164
+ },
1165
+ {
1166
+ "source": "aider_polyglot",
1167
+ "benchmark": "aider_polyglot",
1168
+ "url": "https://aider.chat/docs/leaderboards/",
1169
+ "modelRaw": "yi-lightning",
1170
+ "metric": "pass_rate_2",
1171
+ "score": 12.9,
1172
+ "scoreScale": "percent",
1173
+ "sampleSize": 225,
1174
+ "date": "2024-12-23",
1175
+ "extra": {
1176
+ "passRate1": 5.8,
1177
+ "passNum1": 13,
1178
+ "passNum2": 29,
1179
+ "totalCostUsd": 0,
1180
+ "secondsPerCase": 146.7,
1181
+ "editFormat": "whole"
1182
+ }
1183
+ },
1184
+ {
1185
+ "source": "aider_polyglot",
1186
+ "benchmark": "aider_polyglot",
1187
+ "url": "https://aider.chat/docs/leaderboards/",
1188
+ "modelRaw": "command-a-03-2025-quality",
1189
+ "metric": "pass_rate_2",
1190
+ "score": 12,
1191
+ "scoreScale": "percent",
1192
+ "sampleSize": 225,
1193
+ "date": "2025-03-14",
1194
+ "extra": {
1195
+ "passRate1": 2.2,
1196
+ "passNum1": 5,
1197
+ "passNum2": 27,
1198
+ "totalCostUsd": 0,
1199
+ "secondsPerCase": 85.1,
1200
+ "editFormat": "whole"
1201
+ }
1202
+ },
1203
+ {
1204
+ "source": "aider_polyglot",
1205
+ "benchmark": "aider_polyglot",
1206
+ "url": "https://aider.chat/docs/leaderboards/",
1207
+ "modelRaw": "Codestral 25.01",
1208
+ "metric": "pass_rate_2",
1209
+ "score": 11.1,
1210
+ "scoreScale": "percent",
1211
+ "sampleSize": 225,
1212
+ "date": "2025-01-13",
1213
+ "extra": {
1214
+ "passRate1": 4,
1215
+ "passNum1": 9,
1216
+ "passNum2": 25,
1217
+ "totalCostUsd": 1.9834,
1218
+ "secondsPerCase": 9.3,
1219
+ "editFormat": "whole"
1220
+ }
1221
+ },
1222
+ {
1223
+ "source": "aider_polyglot",
1224
+ "benchmark": "aider_polyglot",
1225
+ "url": "https://aider.chat/docs/leaderboards/",
1226
+ "modelRaw": "openhands-lm-32b-v0.1",
1227
+ "metric": "pass_rate_2",
1228
+ "score": 10.2,
1229
+ "scoreScale": "percent",
1230
+ "sampleSize": 225,
1231
+ "date": "2025-04-19",
1232
+ "extra": {
1233
+ "passRate1": 4,
1234
+ "passNum1": 9,
1235
+ "passNum2": 23,
1236
+ "totalCostUsd": 0,
1237
+ "secondsPerCase": 195.6,
1238
+ "editFormat": "whole"
1239
+ }
1240
+ },
1241
+ {
1242
+ "source": "aider_polyglot",
1243
+ "benchmark": "aider_polyglot",
1244
+ "url": "https://aider.chat/docs/leaderboards/",
1245
+ "modelRaw": "gpt-4.1-nano",
1246
+ "metric": "pass_rate_2",
1247
+ "score": 8.9,
1248
+ "scoreScale": "percent",
1249
+ "sampleSize": 225,
1250
+ "date": "2025-04-14",
1251
+ "extra": {
1252
+ "passRate1": 3.1,
1253
+ "passNum1": 7,
1254
+ "passNum2": 20,
1255
+ "totalCostUsd": 0.4281,
1256
+ "secondsPerCase": 12,
1257
+ "editFormat": "whole"
1258
+ }
1259
+ },
1260
+ {
1261
+ "source": "aider_polyglot",
1262
+ "benchmark": "aider_polyglot",
1263
+ "url": "https://aider.chat/docs/leaderboards/",
1264
+ "modelRaw": "Qwen2.5-Coder-32B-Instruct",
1265
+ "metric": "pass_rate_2",
1266
+ "score": 8,
1267
+ "scoreScale": "percent",
1268
+ "sampleSize": 225,
1269
+ "date": "2024-12-22",
1270
+ "extra": {
1271
+ "passRate1": 4.4,
1272
+ "passNum1": 10,
1273
+ "passNum2": 18,
1274
+ "totalCostUsd": 0,
1275
+ "secondsPerCase": 84.4,
1276
+ "editFormat": "diff"
1277
+ }
1278
+ },
1279
+ {
1280
+ "source": "aider_polyglot",
1281
+ "benchmark": "aider_polyglot",
1282
+ "url": "https://aider.chat/docs/leaderboards/",
1283
+ "modelRaw": "gemma-3-27b-it",
1284
+ "metric": "pass_rate_2",
1285
+ "score": 4.9,
1286
+ "scoreScale": "percent",
1287
+ "sampleSize": 225,
1288
+ "date": "2025-03-15",
1289
+ "extra": {
1290
+ "passRate1": 1.8,
1291
+ "passNum1": 4,
1292
+ "passNum2": 11,
1293
+ "totalCostUsd": 0,
1294
+ "secondsPerCase": 79.7,
1295
+ "editFormat": "whole"
1296
+ }
1297
+ },
1298
+ {
1299
+ "source": "aider_polyglot",
1300
+ "benchmark": "aider_polyglot",
1301
+ "url": "https://aider.chat/docs/leaderboards/",
1302
+ "modelRaw": "gpt-4o-mini-2024-07-18",
1303
+ "metric": "pass_rate_2",
1304
+ "score": 3.6,
1305
+ "scoreScale": "percent",
1306
+ "sampleSize": 225,
1307
+ "date": "2024-12-21",
1308
+ "extra": {
1309
+ "passRate1": 0.9,
1310
+ "passNum1": 2,
1311
+ "passNum2": 8,
1312
+ "totalCostUsd": 0.3236,
1313
+ "secondsPerCase": 17.3,
1314
+ "editFormat": "whole"
1315
+ }
1316
+ }
1317
+ ]
1318
+ }