aiforcecli-chat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/License.MD +49 -0
  2. package/README.md +642 -0
  3. package/aiforcecli.config.example.json +66 -0
  4. package/assets/README.md +14 -0
  5. package/dist/cli.js +2 -0
  6. package/dist/index.js +2 -0
  7. package/package.json +62 -0
  8. package/tools/scorecard/README.md +92 -0
  9. package/tools/scorecard/config.json +134 -0
  10. package/tools/scorecard/fetch.mjs +335 -0
  11. package/tools/scorecard/generate.mjs +289 -0
  12. package/tools/scorecard/generated/example/invalid-rows.json +1 -0
  13. package/tools/scorecard/generated/example/scorecard-report.md +147 -0
  14. package/tools/scorecard/generated/example/scorecard.compact.json +61 -0
  15. package/tools/scorecard/generated/example/scorecard.json +1492 -0
  16. package/tools/scorecard/generated/example/unmapped-models.json +1492 -0
  17. package/tools/scorecard/generated/raw/aider_polyglot.html +21071 -0
  18. package/tools/scorecard/generated/raw/terminal_bench_2_1.html +2 -0
  19. package/tools/scorecard/generated/scorecard/invalid-rows.json +1 -0
  20. package/tools/scorecard/generated/scorecard/scorecard-report.md +133 -0
  21. package/tools/scorecard/generated/scorecard/scorecard.compact.json +51 -0
  22. package/tools/scorecard/generated/scorecard/scorecard.json +1181 -0
  23. package/tools/scorecard/generated/scorecard/unmapped-models.json +1492 -0
  24. package/tools/scorecard/generated/scorecard-example/invalid-rows.json +1 -0
  25. package/tools/scorecard/generated/scorecard-example/scorecard-report.md +40 -0
  26. package/tools/scorecard/generated/scorecard-example/scorecard.compact.json +22 -0
  27. package/tools/scorecard/generated/scorecard-example/scorecard.json +389 -0
  28. package/tools/scorecard/generated/scorecard-example/unmapped-models.json +1 -0
  29. package/tools/scorecard/generated/scorecard-fetch/raw/aider_polyglot.html +21071 -0
  30. package/tools/scorecard/generated/scorecard-fetch/raw/terminal_bench_2_1.html +2 -0
  31. package/tools/scorecard/snapshots/example.normalized.example.json +38 -0
  32. package/tools/scorecard/snapshots/live.aider_polyglot.json +1318 -0
  33. package/tools/scorecard/snapshots/live.terminal_bench_2_1.json +294 -0
@@ -0,0 +1,1492 @@
1
+ [
2
+ {
3
+ "source": "aider_polyglot",
4
+ "benchmark": "aider_polyglot",
5
+ "url": "https://aider.chat/docs/leaderboards/",
6
+ "modelRaw": "o3-pro high",
7
+ "metric": "pass_rate_2",
8
+ "score": 84.9,
9
+ "scoreScale": "percent",
10
+ "sampleSize": 225,
11
+ "date": "2025-06-28",
12
+ "extra": {
13
+ "passRate1": 43.6,
14
+ "passNum1": 98,
15
+ "passNum2": 191,
16
+ "totalCostUsd": 146.3249,
17
+ "secondsPerCase": 449,
18
+ "editFormat": "diff"
19
+ },
20
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
21
+ "snapshotIndex": 2
22
+ },
23
+ {
24
+ "source": "aider_polyglot",
25
+ "benchmark": "aider_polyglot",
26
+ "url": "https://aider.chat/docs/leaderboards/",
27
+ "modelRaw": "o3 high",
28
+ "metric": "pass_rate_2",
29
+ "score": 81.3,
30
+ "scoreScale": "percent",
31
+ "sampleSize": 225,
32
+ "date": "2025-06-25",
33
+ "extra": {
34
+ "passRate1": 40,
35
+ "passNum1": 90,
36
+ "passNum2": 183,
37
+ "totalCostUsd": 21.2259,
38
+ "secondsPerCase": 197.3,
39
+ "editFormat": "diff"
40
+ },
41
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
42
+ "snapshotIndex": 5
43
+ },
44
+ {
45
+ "source": "aider_polyglot",
46
+ "benchmark": "aider_polyglot",
47
+ "url": "https://aider.chat/docs/leaderboards/",
48
+ "modelRaw": "grok-4 high",
49
+ "metric": "pass_rate_2",
50
+ "score": 79.6,
51
+ "scoreScale": "percent",
52
+ "sampleSize": 225,
53
+ "date": "2025-07-11",
54
+ "extra": {
55
+ "passRate1": 40.9,
56
+ "passNum1": 92,
57
+ "passNum2": 179,
58
+ "totalCostUsd": 59.6182,
59
+ "secondsPerCase": 403.2,
60
+ "editFormat": "diff"
61
+ },
62
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
63
+ "snapshotIndex": 6
64
+ },
65
+ {
66
+ "source": "aider_polyglot",
67
+ "benchmark": "aider_polyglot",
68
+ "url": "https://aider.chat/docs/leaderboards/",
69
+ "modelRaw": "o3 high + gpt-4.1",
70
+ "metric": "pass_rate_2",
71
+ "score": 78.2,
72
+ "scoreScale": "percent",
73
+ "sampleSize": 224,
74
+ "date": "2025-06-27",
75
+ "extra": {
76
+ "passRate1": 34.8,
77
+ "passNum1": 78,
78
+ "passNum2": 176,
79
+ "totalCostUsd": 17.5518,
80
+ "secondsPerCase": 121.8,
81
+ "editFormat": "architect"
82
+ },
83
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
84
+ "snapshotIndex": 8
85
+ },
86
+ {
87
+ "source": "aider_polyglot",
88
+ "benchmark": "aider_polyglot",
89
+ "url": "https://aider.chat/docs/leaderboards/",
90
+ "modelRaw": "o3",
91
+ "metric": "pass_rate_2",
92
+ "score": 76.9,
93
+ "scoreScale": "percent",
94
+ "sampleSize": 225,
95
+ "date": "2025-06-25",
96
+ "extra": {
97
+ "passRate1": 40.9,
98
+ "passNum1": 92,
99
+ "passNum2": 173,
100
+ "totalCostUsd": 13.7517,
101
+ "secondsPerCase": 101.7,
102
+ "editFormat": "diff"
103
+ },
104
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
105
+ "snapshotIndex": 9
106
+ },
107
+ {
108
+ "source": "aider_polyglot",
109
+ "benchmark": "aider_polyglot",
110
+ "url": "https://aider.chat/docs/leaderboards/",
111
+ "modelRaw": "DeepSeek-V3.2-Exp Reasoner",
112
+ "metric": "pass_rate_2",
113
+ "score": 74.2,
114
+ "scoreScale": "percent",
115
+ "sampleSize": 225,
116
+ "date": "2025-10-03",
117
+ "extra": {
118
+ "passRate1": 39.6,
119
+ "passNum1": 89,
120
+ "passNum2": 167,
121
+ "totalCostUsd": 1.3045,
122
+ "secondsPerCase": 291.2,
123
+ "editFormat": "diff"
124
+ },
125
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
126
+ "snapshotIndex": 11
127
+ },
128
+ {
129
+ "source": "aider_polyglot",
130
+ "benchmark": "aider_polyglot",
131
+ "url": "https://aider.chat/docs/leaderboards/",
132
+ "modelRaw": "Gemini 2.5 Pro Preview 03-25",
133
+ "metric": "pass_rate_2",
134
+ "score": 72.9,
135
+ "scoreScale": "percent",
136
+ "sampleSize": 225,
137
+ "date": "2025-04-12",
138
+ "extra": {
139
+ "passRate1": 40.9,
140
+ "passNum1": 92,
141
+ "passNum2": 164,
142
+ "totalCostUsd": 0,
143
+ "secondsPerCase": 45.3,
144
+ "editFormat": "diff-fenced"
145
+ },
146
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
147
+ "snapshotIndex": 12
148
+ },
149
+ {
150
+ "source": "aider_polyglot",
151
+ "benchmark": "aider_polyglot",
152
+ "url": "https://aider.chat/docs/leaderboards/",
153
+ "modelRaw": "claude-opus-4-20250514 32k thinking",
154
+ "metric": "pass_rate_2",
155
+ "score": 72,
156
+ "scoreScale": "percent",
157
+ "sampleSize": 225,
158
+ "date": "2025-05-25",
159
+ "extra": {
160
+ "passRate1": 37.3,
161
+ "passNum1": 84,
162
+ "passNum2": 162,
163
+ "totalCostUsd": 65.7484,
164
+ "secondsPerCase": 44.1,
165
+ "editFormat": "diff"
166
+ },
167
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
168
+ "snapshotIndex": 13
169
+ },
170
+ {
171
+ "source": "aider_polyglot",
172
+ "benchmark": "aider_polyglot",
173
+ "url": "https://aider.chat/docs/leaderboards/",
174
+ "modelRaw": "o4-mini high",
175
+ "metric": "pass_rate_2",
176
+ "score": 72,
177
+ "scoreScale": "percent",
178
+ "sampleSize": 225,
179
+ "date": "2025-04-16",
180
+ "extra": {
181
+ "passRate1": 19.6,
182
+ "passNum1": 44,
183
+ "passNum2": 162,
184
+ "totalCostUsd": 19.6399,
185
+ "secondsPerCase": 176.5,
186
+ "editFormat": "diff"
187
+ },
188
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
189
+ "snapshotIndex": 14
190
+ },
191
+ {
192
+ "source": "aider_polyglot",
193
+ "benchmark": "aider_polyglot",
194
+ "url": "https://aider.chat/docs/leaderboards/",
195
+ "modelRaw": "DeepSeek R1 0528",
196
+ "metric": "pass_rate_2",
197
+ "score": 71.4,
198
+ "scoreScale": "percent",
199
+ "sampleSize": 224,
200
+ "date": "2025-06-06",
201
+ "extra": {
202
+ "passRate1": 34.4,
203
+ "passNum1": 77,
204
+ "passNum2": 160,
205
+ "totalCostUsd": 4.8016,
206
+ "secondsPerCase": 716.6,
207
+ "editFormat": "diff"
208
+ },
209
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
210
+ "snapshotIndex": 15
211
+ },
212
+ {
213
+ "source": "aider_polyglot",
214
+ "benchmark": "aider_polyglot",
215
+ "url": "https://aider.chat/docs/leaderboards/",
216
+ "modelRaw": "claude-opus-4-20250514 no think",
217
+ "metric": "pass_rate_2",
218
+ "score": 70.7,
219
+ "scoreScale": "percent",
220
+ "sampleSize": 225,
221
+ "date": "2025-05-25",
222
+ "extra": {
223
+ "passRate1": 32.9,
224
+ "passNum1": 74,
225
+ "passNum2": 159,
226
+ "totalCostUsd": 68.6253,
227
+ "secondsPerCase": 42.5,
228
+ "editFormat": "diff"
229
+ },
230
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
231
+ "snapshotIndex": 16
232
+ },
233
+ {
234
+ "source": "aider_polyglot",
235
+ "benchmark": "aider_polyglot",
236
+ "url": "https://aider.chat/docs/leaderboards/",
237
+ "modelRaw": "DeepSeek-V3.2-Exp Chat",
238
+ "metric": "pass_rate_2",
239
+ "score": 70.2,
240
+ "scoreScale": "percent",
241
+ "sampleSize": 225,
242
+ "date": "2025-10-03",
243
+ "extra": {
244
+ "passRate1": 38.7,
245
+ "passNum1": 87,
246
+ "passNum2": 158,
247
+ "totalCostUsd": 0.8756,
248
+ "secondsPerCase": 104,
249
+ "editFormat": "diff"
250
+ },
251
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
252
+ "snapshotIndex": 17
253
+ },
254
+ {
255
+ "source": "aider_polyglot",
256
+ "benchmark": "aider_polyglot",
257
+ "url": "https://aider.chat/docs/leaderboards/",
258
+ "modelRaw": "claude-3-7-sonnet-20250219 32k thinking tokens",
259
+ "metric": "pass_rate_2",
260
+ "score": 64.9,
261
+ "scoreScale": "percent",
262
+ "sampleSize": 225,
263
+ "date": "2025-02-24",
264
+ "extra": {
265
+ "passRate1": 29.3,
266
+ "passNum1": 66,
267
+ "passNum2": 146,
268
+ "totalCostUsd": 36.8343,
269
+ "secondsPerCase": 105.2,
270
+ "editFormat": "diff"
271
+ },
272
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
273
+ "snapshotIndex": 18
274
+ },
275
+ {
276
+ "source": "aider_polyglot",
277
+ "benchmark": "aider_polyglot",
278
+ "url": "https://aider.chat/docs/leaderboards/",
279
+ "modelRaw": "DeepSeek R1 + claude-3-5-sonnet-20241022",
280
+ "metric": "pass_rate_2",
281
+ "score": 64,
282
+ "scoreScale": "percent",
283
+ "sampleSize": 225,
284
+ "date": "2025-01-23",
285
+ "extra": {
286
+ "passRate1": 27.1,
287
+ "passNum1": 61,
288
+ "passNum2": 144,
289
+ "totalCostUsd": 13.2933,
290
+ "secondsPerCase": 251.6,
291
+ "editFormat": "architect"
292
+ },
293
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
294
+ "snapshotIndex": 19
295
+ },
296
+ {
297
+ "source": "aider_polyglot",
298
+ "benchmark": "aider_polyglot",
299
+ "url": "https://aider.chat/docs/leaderboards/",
300
+ "modelRaw": "o1-2024-12-17 high",
301
+ "metric": "pass_rate_2",
302
+ "score": 61.7,
303
+ "scoreScale": "percent",
304
+ "sampleSize": 224,
305
+ "date": "2024-12-21",
306
+ "extra": {
307
+ "passRate1": 23.7,
308
+ "passNum1": 53,
309
+ "passNum2": 139,
310
+ "totalCostUsd": 186.4958,
311
+ "secondsPerCase": 133.2,
312
+ "editFormat": "diff"
313
+ },
314
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
315
+ "snapshotIndex": 20
316
+ },
317
+ {
318
+ "source": "aider_polyglot",
319
+ "benchmark": "aider_polyglot",
320
+ "url": "https://aider.chat/docs/leaderboards/",
321
+ "modelRaw": "claude-sonnet-4-20250514 32k thinking",
322
+ "metric": "pass_rate_2",
323
+ "score": 61.3,
324
+ "scoreScale": "percent",
325
+ "sampleSize": 225,
326
+ "date": "2025-05-24",
327
+ "extra": {
328
+ "passRate1": 25.8,
329
+ "passNum1": 58,
330
+ "passNum2": 138,
331
+ "totalCostUsd": 26.5755,
332
+ "secondsPerCase": 79.9,
333
+ "editFormat": "diff"
334
+ },
335
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
336
+ "snapshotIndex": 21
337
+ },
338
+ {
339
+ "source": "aider_polyglot",
340
+ "benchmark": "aider_polyglot",
341
+ "url": "https://aider.chat/docs/leaderboards/",
342
+ "modelRaw": "claude-3-7-sonnet-20250219 no thinking",
343
+ "metric": "pass_rate_2",
344
+ "score": 60.4,
345
+ "scoreScale": "percent",
346
+ "sampleSize": 225,
347
+ "date": "2025-02-24",
348
+ "extra": {
349
+ "passRate1": 24.4,
350
+ "passNum1": 55,
351
+ "passNum2": 136,
352
+ "totalCostUsd": 17.7191,
353
+ "secondsPerCase": 28.3,
354
+ "editFormat": "diff"
355
+ },
356
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
357
+ "snapshotIndex": 22
358
+ },
359
+ {
360
+ "source": "aider_polyglot",
361
+ "benchmark": "aider_polyglot",
362
+ "url": "https://aider.chat/docs/leaderboards/",
363
+ "modelRaw": "o3-mini high",
364
+ "metric": "pass_rate_2",
365
+ "score": 60.4,
366
+ "scoreScale": "percent",
367
+ "sampleSize": 224,
368
+ "date": "2025-01-31",
369
+ "extra": {
370
+ "passRate1": 21,
371
+ "passNum1": 47,
372
+ "passNum2": 136,
373
+ "totalCostUsd": 18.1584,
374
+ "secondsPerCase": 124.6,
375
+ "editFormat": "diff"
376
+ },
377
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
378
+ "snapshotIndex": 23
379
+ },
380
+ {
381
+ "source": "aider_polyglot",
382
+ "benchmark": "aider_polyglot",
383
+ "url": "https://aider.chat/docs/leaderboards/",
384
+ "modelRaw": "Qwen3 235B A22B diff, no think, Alibaba API",
385
+ "metric": "pass_rate_2",
386
+ "score": 59.6,
387
+ "scoreScale": "percent",
388
+ "sampleSize": 225,
389
+ "date": "2025-05-09",
390
+ "extra": {
391
+ "passRate1": 28.9,
392
+ "passNum1": 65,
393
+ "passNum2": 134,
394
+ "totalCostUsd": 0,
395
+ "secondsPerCase": 45.4,
396
+ "editFormat": "diff"
397
+ },
398
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
399
+ "snapshotIndex": 24
400
+ },
401
+ {
402
+ "source": "aider_polyglot",
403
+ "benchmark": "aider_polyglot",
404
+ "url": "https://aider.chat/docs/leaderboards/",
405
+ "modelRaw": "Kimi K2",
406
+ "metric": "pass_rate_2",
407
+ "score": 59.1,
408
+ "scoreScale": "percent",
409
+ "sampleSize": 225,
410
+ "date": "2025-07-17",
411
+ "extra": {
412
+ "passRate1": 20.4,
413
+ "passNum1": 46,
414
+ "passNum2": 133,
415
+ "totalCostUsd": 1.2357,
416
+ "secondsPerCase": 67.6,
417
+ "editFormat": "diff"
418
+ },
419
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
420
+ "snapshotIndex": 25
421
+ },
422
+ {
423
+ "source": "aider_polyglot",
424
+ "benchmark": "aider_polyglot",
425
+ "url": "https://aider.chat/docs/leaderboards/",
426
+ "modelRaw": "DeepSeek R1",
427
+ "metric": "pass_rate_2",
428
+ "score": 56.9,
429
+ "scoreScale": "percent",
430
+ "sampleSize": 225,
431
+ "date": "2025-01-20",
432
+ "extra": {
433
+ "passRate1": 26.7,
434
+ "passNum1": 60,
435
+ "passNum2": 128,
436
+ "totalCostUsd": 5.4193,
437
+ "secondsPerCase": 113.7,
438
+ "editFormat": "diff"
439
+ },
440
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
441
+ "snapshotIndex": 26
442
+ },
443
+ {
444
+ "source": "aider_polyglot",
445
+ "benchmark": "aider_polyglot",
446
+ "url": "https://aider.chat/docs/leaderboards/",
447
+ "modelRaw": "claude-sonnet-4-20250514 no thinking",
448
+ "metric": "pass_rate_2",
449
+ "score": 56.4,
450
+ "scoreScale": "percent",
451
+ "sampleSize": 225,
452
+ "date": "2025-05-24",
453
+ "extra": {
454
+ "passRate1": 20.4,
455
+ "passNum1": 46,
456
+ "passNum2": 127,
457
+ "totalCostUsd": 15.8155,
458
+ "secondsPerCase": 29.8,
459
+ "editFormat": "diff"
460
+ },
461
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
462
+ "snapshotIndex": 27
463
+ },
464
+ {
465
+ "source": "aider_polyglot",
466
+ "benchmark": "aider_polyglot",
467
+ "url": "https://aider.chat/docs/leaderboards/",
468
+ "modelRaw": "gemini-2.5-flash-preview-05-20 24k think",
469
+ "metric": "pass_rate_2",
470
+ "score": 55.1,
471
+ "scoreScale": "percent",
472
+ "sampleSize": 225,
473
+ "date": "2025-05-25",
474
+ "extra": {
475
+ "passRate1": 26.2,
476
+ "passNum1": 59,
477
+ "passNum2": 124,
478
+ "totalCostUsd": 8.5625,
479
+ "secondsPerCase": 53.9,
480
+ "editFormat": "diff"
481
+ },
482
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
483
+ "snapshotIndex": 28
484
+ },
485
+ {
486
+ "source": "aider_polyglot",
487
+ "benchmark": "aider_polyglot",
488
+ "url": "https://aider.chat/docs/leaderboards/",
489
+ "modelRaw": "DeepSeek V3 0324",
490
+ "metric": "pass_rate_2",
491
+ "score": 55.1,
492
+ "scoreScale": "percent",
493
+ "sampleSize": 225,
494
+ "date": "2025-03-24",
495
+ "extra": {
496
+ "passRate1": 28,
497
+ "passNum1": 63,
498
+ "passNum2": 124,
499
+ "totalCostUsd": 1.1164,
500
+ "secondsPerCase": 290,
501
+ "editFormat": "diff"
502
+ },
503
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
504
+ "snapshotIndex": 29
505
+ },
506
+ {
507
+ "source": "aider_polyglot",
508
+ "benchmark": "aider_polyglot",
509
+ "url": "https://aider.chat/docs/leaderboards/",
510
+ "modelRaw": "Quasar Alpha",
511
+ "metric": "pass_rate_2",
512
+ "score": 54.7,
513
+ "scoreScale": "percent",
514
+ "sampleSize": 225,
515
+ "date": "2025-04-04",
516
+ "extra": {
517
+ "passRate1": 21.8,
518
+ "passNum1": 49,
519
+ "passNum2": 123,
520
+ "totalCostUsd": 0,
521
+ "secondsPerCase": 14.8,
522
+ "editFormat": "diff"
523
+ },
524
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
525
+ "snapshotIndex": 30
526
+ },
527
+ {
528
+ "source": "aider_polyglot",
529
+ "benchmark": "aider_polyglot",
530
+ "url": "https://aider.chat/docs/leaderboards/",
531
+ "modelRaw": "o3-mini medium",
532
+ "metric": "pass_rate_2",
533
+ "score": 53.8,
534
+ "scoreScale": "percent",
535
+ "sampleSize": 225,
536
+ "date": "2025-01-31",
537
+ "extra": {
538
+ "passRate1": 19.1,
539
+ "passNum1": 43,
540
+ "passNum2": 121,
541
+ "totalCostUsd": 8.8599,
542
+ "secondsPerCase": 47.2,
543
+ "editFormat": "diff"
544
+ },
545
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
546
+ "snapshotIndex": 31
547
+ },
548
+ {
549
+ "source": "aider_polyglot",
550
+ "benchmark": "aider_polyglot",
551
+ "url": "https://aider.chat/docs/leaderboards/",
552
+ "modelRaw": "Grok 3 Beta",
553
+ "metric": "pass_rate_2",
554
+ "score": 53.3,
555
+ "scoreScale": "percent",
556
+ "sampleSize": 225,
557
+ "date": "2025-04-10",
558
+ "extra": {
559
+ "passRate1": 22.2,
560
+ "passNum1": 50,
561
+ "passNum2": 120,
562
+ "totalCostUsd": 11.0338,
563
+ "secondsPerCase": 15.3,
564
+ "editFormat": "diff"
565
+ },
566
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
567
+ "snapshotIndex": 32
568
+ },
569
+ {
570
+ "source": "aider_polyglot",
571
+ "benchmark": "aider_polyglot",
572
+ "url": "https://aider.chat/docs/leaderboards/",
573
+ "modelRaw": "Optimus Alpha",
574
+ "metric": "pass_rate_2",
575
+ "score": 52.9,
576
+ "scoreScale": "percent",
577
+ "sampleSize": 225,
578
+ "date": "2025-04-10",
579
+ "extra": {
580
+ "passRate1": 21.3,
581
+ "passNum1": 48,
582
+ "passNum2": 119,
583
+ "totalCostUsd": 0,
584
+ "secondsPerCase": 18.4,
585
+ "editFormat": "diff"
586
+ },
587
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
588
+ "snapshotIndex": 33
589
+ },
590
+ {
591
+ "source": "aider_polyglot",
592
+ "benchmark": "aider_polyglot",
593
+ "url": "https://aider.chat/docs/leaderboards/",
594
+ "modelRaw": "gpt-4.1",
595
+ "metric": "pass_rate_2",
596
+ "score": 52.4,
597
+ "scoreScale": "percent",
598
+ "sampleSize": 225,
599
+ "date": "2025-04-14",
600
+ "extra": {
601
+ "passRate1": 20,
602
+ "passNum1": 45,
603
+ "passNum2": 118,
604
+ "totalCostUsd": 9.8556,
605
+ "secondsPerCase": 20.5,
606
+ "editFormat": "diff"
607
+ },
608
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
609
+ "snapshotIndex": 34
610
+ },
611
+ {
612
+ "source": "aider_polyglot",
613
+ "benchmark": "aider_polyglot",
614
+ "url": "https://aider.chat/docs/leaderboards/",
615
+ "modelRaw": "claude-3-5-sonnet-20241022",
616
+ "metric": "pass_rate_2",
617
+ "score": 51.6,
618
+ "scoreScale": "percent",
619
+ "sampleSize": 225,
620
+ "date": "2025-01-17",
621
+ "extra": {
622
+ "passRate1": 22.2,
623
+ "passNum1": 50,
624
+ "passNum2": 116,
625
+ "totalCostUsd": 14.4063,
626
+ "secondsPerCase": 21.4,
627
+ "editFormat": "diff"
628
+ },
629
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
630
+ "snapshotIndex": 35
631
+ },
632
+ {
633
+ "source": "aider_polyglot",
634
+ "benchmark": "aider_polyglot",
635
+ "url": "https://aider.chat/docs/leaderboards/",
636
+ "modelRaw": "Grok 3 Mini Beta high",
637
+ "metric": "pass_rate_2",
638
+ "score": 49.3,
639
+ "scoreScale": "percent",
640
+ "sampleSize": 225,
641
+ "date": "2025-04-10",
642
+ "extra": {
643
+ "passRate1": 17.3,
644
+ "passNum1": 39,
645
+ "passNum2": 111,
646
+ "totalCostUsd": 0.7346,
647
+ "secondsPerCase": 79.1,
648
+ "editFormat": "whole"
649
+ },
650
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
651
+ "snapshotIndex": 36
652
+ },
653
+ {
654
+ "source": "aider_polyglot",
655
+ "benchmark": "aider_polyglot",
656
+ "url": "https://aider.chat/docs/leaderboards/",
657
+ "modelRaw": "DeepSeek Chat V3 prev",
658
+ "metric": "pass_rate_2",
659
+ "score": 48.4,
660
+ "scoreScale": "percent",
661
+ "sampleSize": 225,
662
+ "date": "2024-12-25",
663
+ "extra": {
664
+ "passRate1": 22.7,
665
+ "passNum1": 51,
666
+ "passNum2": 109,
667
+ "totalCostUsd": 0.3369,
668
+ "secondsPerCase": 34.8,
669
+ "editFormat": "diff"
670
+ },
671
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
672
+ "snapshotIndex": 37
673
+ },
674
+ {
675
+ "source": "aider_polyglot",
676
+ "benchmark": "aider_polyglot",
677
+ "url": "https://aider.chat/docs/leaderboards/",
678
+ "modelRaw": "gemini-2.5-flash-preview-04-17 default",
679
+ "metric": "pass_rate_2",
680
+ "score": 47.1,
681
+ "scoreScale": "percent",
682
+ "sampleSize": 225,
683
+ "date": "2025-04-20",
684
+ "extra": {
685
+ "passRate1": 21.8,
686
+ "passNum1": 49,
687
+ "passNum2": 106,
688
+ "totalCostUsd": 1.8451,
689
+ "secondsPerCase": 50.1,
690
+ "editFormat": "diff"
691
+ },
692
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
693
+ "snapshotIndex": 38
694
+ },
695
+ {
696
+ "source": "aider_polyglot",
697
+ "benchmark": "aider_polyglot",
698
+ "url": "https://aider.chat/docs/leaderboards/",
699
+ "modelRaw": "chatgpt-4o-latest 2025-03-29",
700
+ "metric": "pass_rate_2",
701
+ "score": 45.3,
702
+ "scoreScale": "percent",
703
+ "sampleSize": 225,
704
+ "date": "2025-03-29",
705
+ "extra": {
706
+ "passRate1": 16.4,
707
+ "passNum1": 37,
708
+ "passNum2": 102,
709
+ "totalCostUsd": 19.7416,
710
+ "secondsPerCase": 10.3,
711
+ "editFormat": "diff"
712
+ },
713
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
714
+ "snapshotIndex": 39
715
+ },
716
+ {
717
+ "source": "aider_polyglot",
718
+ "benchmark": "aider_polyglot",
719
+ "url": "https://aider.chat/docs/leaderboards/",
720
+ "modelRaw": "gpt-4.5-preview",
721
+ "metric": "pass_rate_2",
722
+ "score": 44.9,
723
+ "scoreScale": "percent",
724
+ "sampleSize": 224,
725
+ "date": "2025-02-27",
726
+ "extra": {
727
+ "passRate1": 22.3,
728
+ "passNum1": 50,
729
+ "passNum2": 101,
730
+ "totalCostUsd": 183.1802,
731
+ "secondsPerCase": 113.5,
732
+ "editFormat": "diff"
733
+ },
734
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
735
+ "snapshotIndex": 40
736
+ },
737
+ {
738
+ "source": "aider_polyglot",
739
+ "benchmark": "aider_polyglot",
740
+ "url": "https://aider.chat/docs/leaderboards/",
741
+ "modelRaw": "gemini-2.5-flash-preview-05-20 no think",
742
+ "metric": "pass_rate_2",
743
+ "score": 44,
744
+ "scoreScale": "percent",
745
+ "sampleSize": 225,
746
+ "date": "2025-05-26",
747
+ "extra": {
748
+ "passRate1": 20.9,
749
+ "passNum1": 47,
750
+ "passNum2": 99,
751
+ "totalCostUsd": 1.1354,
752
+ "secondsPerCase": 12.2,
753
+ "editFormat": "diff"
754
+ },
755
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
756
+ "snapshotIndex": 41
757
+ },
758
+ {
759
+ "source": "aider_polyglot",
760
+ "benchmark": "aider_polyglot",
761
+ "url": "https://aider.chat/docs/leaderboards/",
762
+ "modelRaw": "gpt-oss-120b high",
763
+ "metric": "pass_rate_2",
764
+ "score": 41.8,
765
+ "scoreScale": "percent",
766
+ "sampleSize": 225,
767
+ "date": "2025-08-06",
768
+ "extra": {
769
+ "passRate1": 13.8,
770
+ "passNum1": 31,
771
+ "passNum2": 94,
772
+ "totalCostUsd": 0.7406,
773
+ "secondsPerCase": 35.5,
774
+ "editFormat": "diff"
775
+ },
776
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
777
+ "snapshotIndex": 42
778
+ },
779
+ {
780
+ "source": "aider_polyglot",
781
+ "benchmark": "aider_polyglot",
782
+ "url": "https://aider.chat/docs/leaderboards/",
783
+ "modelRaw": "Qwen3 32B",
784
+ "metric": "pass_rate_2",
785
+ "score": 40,
786
+ "scoreScale": "percent",
787
+ "sampleSize": 225,
788
+ "date": "2025-05-08",
789
+ "extra": {
790
+ "passRate1": 14.2,
791
+ "passNum1": 32,
792
+ "passNum2": 90,
793
+ "totalCostUsd": 0.7603,
794
+ "secondsPerCase": 372.2,
795
+ "editFormat": "diff"
796
+ },
797
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
798
+ "snapshotIndex": 43
799
+ },
800
+ {
801
+ "source": "aider_polyglot",
802
+ "benchmark": "aider_polyglot",
803
+ "url": "https://aider.chat/docs/leaderboards/",
804
+ "modelRaw": "gemini-exp-1206",
805
+ "metric": "pass_rate_2",
806
+ "score": 38.2,
807
+ "scoreScale": "percent",
808
+ "sampleSize": 225,
809
+ "date": "2024-12-22",
810
+ "extra": {
811
+ "passRate1": 19.6,
812
+ "passNum1": 44,
813
+ "passNum2": 86,
814
+ "totalCostUsd": 0,
815
+ "secondsPerCase": 45.5,
816
+ "editFormat": "whole"
817
+ },
818
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
819
+ "snapshotIndex": 44
820
+ },
821
+ {
822
+ "source": "aider_polyglot",
823
+ "benchmark": "aider_polyglot",
824
+ "url": "https://aider.chat/docs/leaderboards/",
825
+ "modelRaw": "Gemini 2.0 Pro exp-02-05",
826
+ "metric": "pass_rate_2",
827
+ "score": 35.6,
828
+ "scoreScale": "percent",
829
+ "sampleSize": 225,
830
+ "date": "2025-02-25",
831
+ "extra": {
832
+ "passRate1": 20.4,
833
+ "passNum1": 46,
834
+ "passNum2": 80,
835
+ "totalCostUsd": 0,
836
+ "secondsPerCase": 34.8,
837
+ "editFormat": "whole"
838
+ },
839
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
840
+ "snapshotIndex": 45
841
+ },
842
+ {
843
+ "source": "aider_polyglot",
844
+ "benchmark": "aider_polyglot",
845
+ "url": "https://aider.chat/docs/leaderboards/",
846
+ "modelRaw": "Grok 3 Mini Beta low",
847
+ "metric": "pass_rate_2",
848
+ "score": 34.7,
849
+ "scoreScale": "percent",
850
+ "sampleSize": 225,
851
+ "date": "2025-04-10",
852
+ "extra": {
853
+ "passRate1": 11.1,
854
+ "passNum1": 25,
855
+ "passNum2": 78,
856
+ "totalCostUsd": 0.7856,
857
+ "secondsPerCase": 35.1,
858
+ "editFormat": "whole"
859
+ },
860
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
861
+ "snapshotIndex": 46
862
+ },
863
+ {
864
+ "source": "aider_polyglot",
865
+ "benchmark": "aider_polyglot",
866
+ "url": "https://aider.chat/docs/leaderboards/",
867
+ "modelRaw": "o1-mini-2024-09-12",
868
+ "metric": "pass_rate_2",
869
+ "score": 32.9,
870
+ "scoreScale": "percent",
871
+ "sampleSize": 225,
872
+ "date": "2024-12-22",
873
+ "extra": {
874
+ "passRate1": 5.8,
875
+ "passNum1": 13,
876
+ "passNum2": 74,
877
+ "totalCostUsd": 18.577,
878
+ "secondsPerCase": 34.7,
879
+ "editFormat": "whole"
880
+ },
881
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
882
+ "snapshotIndex": 47
883
+ },
884
+ {
885
+ "source": "aider_polyglot",
886
+ "benchmark": "aider_polyglot",
887
+ "url": "https://aider.chat/docs/leaderboards/",
888
+ "modelRaw": "gpt-4.1-mini",
889
+ "metric": "pass_rate_2",
890
+ "score": 32.4,
891
+ "scoreScale": "percent",
892
+ "sampleSize": 225,
893
+ "date": "2025-04-14",
894
+ "extra": {
895
+ "passRate1": 11.1,
896
+ "passNum1": 25,
897
+ "passNum2": 73,
898
+ "totalCostUsd": 1.9918,
899
+ "secondsPerCase": 19.5,
900
+ "editFormat": "diff"
901
+ },
902
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
903
+ "snapshotIndex": 48
904
+ },
905
+ {
906
+ "source": "aider_polyglot",
907
+ "benchmark": "aider_polyglot",
908
+ "url": "https://aider.chat/docs/leaderboards/",
909
+ "modelRaw": "claude-3-5-haiku-20241022",
910
+ "metric": "pass_rate_2",
911
+ "score": 28,
912
+ "scoreScale": "percent",
913
+ "sampleSize": 225,
914
+ "date": "2024-12-21",
915
+ "extra": {
916
+ "passRate1": 7.1,
917
+ "passNum1": 16,
918
+ "passNum2": 63,
919
+ "totalCostUsd": 6.0583,
920
+ "secondsPerCase": 31.8,
921
+ "editFormat": "diff"
922
+ },
923
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
924
+ "snapshotIndex": 49
925
+ },
926
+ {
927
+ "source": "aider_polyglot",
928
+ "benchmark": "aider_polyglot",
929
+ "url": "https://aider.chat/docs/leaderboards/",
930
+ "modelRaw": "chatgpt-4o-latest 2025-02-15",
931
+ "metric": "pass_rate_2",
932
+ "score": 27.1,
933
+ "scoreScale": "percent",
934
+ "sampleSize": 223,
935
+ "date": "2025-02-15",
936
+ "extra": {
937
+ "passRate1": 9,
938
+ "passNum1": 20,
939
+ "passNum2": 61,
940
+ "totalCostUsd": 14.3703,
941
+ "secondsPerCase": 12.4,
942
+ "editFormat": "diff"
943
+ },
944
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
945
+ "snapshotIndex": 50
946
+ },
947
+ {
948
+ "source": "aider_polyglot",
949
+ "benchmark": "aider_polyglot",
950
+ "url": "https://aider.chat/docs/leaderboards/",
951
+ "modelRaw": "QwQ-32B + Qwen 2.5 Coder Instruct",
952
+ "metric": "pass_rate_2",
953
+ "score": 26.2,
954
+ "scoreScale": "percent",
955
+ "sampleSize": 225,
956
+ "date": "2025-03-07",
957
+ "extra": {
958
+ "passRate1": 9.8,
959
+ "passNum1": 22,
960
+ "passNum2": 59,
961
+ "totalCostUsd": 0,
962
+ "secondsPerCase": 137.4,
963
+ "editFormat": "architect"
964
+ },
965
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
966
+ "snapshotIndex": 51
967
+ },
968
+ {
969
+ "source": "aider_polyglot",
970
+ "benchmark": "aider_polyglot",
971
+ "url": "https://aider.chat/docs/leaderboards/",
972
+ "modelRaw": "gpt-4o-2024-08-06",
973
+ "metric": "pass_rate_2",
974
+ "score": 23.1,
975
+ "scoreScale": "percent",
976
+ "sampleSize": 225,
977
+ "date": "2024-12-30",
978
+ "extra": {
979
+ "passRate1": 4.9,
980
+ "passNum1": 11,
981
+ "passNum2": 52,
982
+ "totalCostUsd": 7.0286,
983
+ "secondsPerCase": 16,
984
+ "editFormat": "diff"
985
+ },
986
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
987
+ "snapshotIndex": 52
988
+ },
989
+ {
990
+ "source": "aider_polyglot",
991
+ "benchmark": "aider_polyglot",
992
+ "url": "https://aider.chat/docs/leaderboards/",
993
+ "modelRaw": "gemini-2.0-flash-exp",
994
+ "metric": "pass_rate_2",
995
+ "score": 22.2,
996
+ "scoreScale": "percent",
997
+ "sampleSize": 225,
998
+ "date": "2024-12-22",
999
+ "extra": {
1000
+ "passRate1": 11.6,
1001
+ "passNum1": 26,
1002
+ "passNum2": 50,
1003
+ "totalCostUsd": 0,
1004
+ "secondsPerCase": 12.2,
1005
+ "editFormat": "whole"
1006
+ },
1007
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1008
+ "snapshotIndex": 53
1009
+ },
1010
+ {
1011
+ "source": "aider_polyglot",
1012
+ "benchmark": "aider_polyglot",
1013
+ "url": "https://aider.chat/docs/leaderboards/",
1014
+ "modelRaw": "qwen-max-2025-01-25",
1015
+ "metric": "pass_rate_2",
1016
+ "score": 21.8,
1017
+ "scoreScale": "percent",
1018
+ "sampleSize": 225,
1019
+ "date": "2025-01-28",
1020
+ "extra": {
1021
+ "passRate1": 9.3,
1022
+ "passNum1": 21,
1023
+ "passNum2": 49,
1024
+ "secondsPerCase": 39.5,
1025
+ "editFormat": "diff"
1026
+ },
1027
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1028
+ "snapshotIndex": 54
1029
+ },
1030
+ {
1031
+ "source": "aider_polyglot",
1032
+ "benchmark": "aider_polyglot",
1033
+ "url": "https://aider.chat/docs/leaderboards/",
1034
+ "modelRaw": "QwQ-32B",
1035
+ "metric": "pass_rate_2",
1036
+ "score": 20.9,
1037
+ "scoreScale": "percent",
1038
+ "sampleSize": 225,
1039
+ "date": "2025-03-06",
1040
+ "extra": {
1041
+ "passRate1": 8,
1042
+ "passNum1": 18,
1043
+ "passNum2": 47,
1044
+ "totalCostUsd": 0,
1045
+ "secondsPerCase": 228.6,
1046
+ "editFormat": "diff"
1047
+ },
1048
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1049
+ "snapshotIndex": 55
1050
+ },
1051
+ {
1052
+ "source": "aider_polyglot",
1053
+ "benchmark": "aider_polyglot",
1054
+ "url": "https://aider.chat/docs/leaderboards/",
1055
+ "modelRaw": "gemini-2.0-flash-thinking-exp-01-21",
1056
+ "metric": "pass_rate_2",
1057
+ "score": 18.2,
1058
+ "scoreScale": "percent",
1059
+ "sampleSize": 225,
1060
+ "date": "2025-01-21",
1061
+ "extra": {
1062
+ "passRate1": 5.8,
1063
+ "passNum1": 13,
1064
+ "passNum2": 41,
1065
+ "totalCostUsd": 0,
1066
+ "secondsPerCase": 24.2,
1067
+ "editFormat": "diff"
1068
+ },
1069
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1070
+ "snapshotIndex": 56
1071
+ },
1072
+ {
1073
+ "source": "aider_polyglot",
1074
+ "benchmark": "aider_polyglot",
1075
+ "url": "https://aider.chat/docs/leaderboards/",
1076
+ "modelRaw": "gpt-4o-2024-11-20",
1077
+ "metric": "pass_rate_2",
1078
+ "score": 18.2,
1079
+ "scoreScale": "percent",
1080
+ "sampleSize": 225,
1081
+ "date": "2024-12-30",
1082
+ "extra": {
1083
+ "passRate1": 4.9,
1084
+ "passNum1": 11,
1085
+ "passNum2": 41,
1086
+ "totalCostUsd": 6.7351,
1087
+ "secondsPerCase": 12.1,
1088
+ "editFormat": "diff"
1089
+ },
1090
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1091
+ "snapshotIndex": 57
1092
+ },
1093
+ {
1094
+ "source": "aider_polyglot",
1095
+ "benchmark": "aider_polyglot",
1096
+ "url": "https://aider.chat/docs/leaderboards/",
1097
+ "modelRaw": "DeepSeek Chat V2.5",
1098
+ "metric": "pass_rate_2",
1099
+ "score": 17.8,
1100
+ "scoreScale": "percent",
1101
+ "sampleSize": 225,
1102
+ "date": "2024-12-21",
1103
+ "extra": {
1104
+ "passRate1": 5.3,
1105
+ "passNum1": 12,
1106
+ "passNum2": 40,
1107
+ "totalCostUsd": 0.5101,
1108
+ "secondsPerCase": 184,
1109
+ "editFormat": "diff"
1110
+ },
1111
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1112
+ "snapshotIndex": 58
1113
+ },
1114
+ {
1115
+ "source": "aider_polyglot",
1116
+ "benchmark": "aider_polyglot",
1117
+ "url": "https://aider.chat/docs/leaderboards/",
1118
+ "modelRaw": "Qwen2.5-Coder-32B-Instruct",
1119
+ "metric": "pass_rate_2",
1120
+ "score": 16.4,
1121
+ "scoreScale": "percent",
1122
+ "sampleSize": 225,
1123
+ "date": "2024-12-26",
1124
+ "extra": {
1125
+ "passRate1": 4.9,
1126
+ "passNum1": 11,
1127
+ "passNum2": 37,
1128
+ "totalCostUsd": 0,
1129
+ "secondsPerCase": 42,
1130
+ "editFormat": "whole"
1131
+ },
1132
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1133
+ "snapshotIndex": 59
1134
+ },
1135
+ {
1136
+ "source": "aider_polyglot",
1137
+ "benchmark": "aider_polyglot",
1138
+ "url": "https://aider.chat/docs/leaderboards/",
1139
+ "modelRaw": "Llama 4 Maverick",
1140
+ "metric": "pass_rate_2",
1141
+ "score": 15.6,
1142
+ "scoreScale": "percent",
1143
+ "sampleSize": 225,
1144
+ "date": "2025-04-06",
1145
+ "extra": {
1146
+ "passRate1": 4.4,
1147
+ "passNum1": 10,
1148
+ "passNum2": 35,
1149
+ "totalCostUsd": 0,
1150
+ "secondsPerCase": 20.5,
1151
+ "editFormat": "whole"
1152
+ },
1153
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1154
+ "snapshotIndex": 60
1155
+ },
1156
+ {
1157
+ "source": "aider_polyglot",
1158
+ "benchmark": "aider_polyglot",
1159
+ "url": "https://aider.chat/docs/leaderboards/",
1160
+ "modelRaw": "yi-lightning",
1161
+ "metric": "pass_rate_2",
1162
+ "score": 12.9,
1163
+ "scoreScale": "percent",
1164
+ "sampleSize": 225,
1165
+ "date": "2024-12-23",
1166
+ "extra": {
1167
+ "passRate1": 5.8,
1168
+ "passNum1": 13,
1169
+ "passNum2": 29,
1170
+ "totalCostUsd": 0,
1171
+ "secondsPerCase": 146.7,
1172
+ "editFormat": "whole"
1173
+ },
1174
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1175
+ "snapshotIndex": 61
1176
+ },
1177
+ {
1178
+ "source": "aider_polyglot",
1179
+ "benchmark": "aider_polyglot",
1180
+ "url": "https://aider.chat/docs/leaderboards/",
1181
+ "modelRaw": "command-a-03-2025-quality",
1182
+ "metric": "pass_rate_2",
1183
+ "score": 12,
1184
+ "scoreScale": "percent",
1185
+ "sampleSize": 225,
1186
+ "date": "2025-03-14",
1187
+ "extra": {
1188
+ "passRate1": 2.2,
1189
+ "passNum1": 5,
1190
+ "passNum2": 27,
1191
+ "totalCostUsd": 0,
1192
+ "secondsPerCase": 85.1,
1193
+ "editFormat": "whole"
1194
+ },
1195
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1196
+ "snapshotIndex": 62
1197
+ },
1198
+ {
1199
+ "source": "aider_polyglot",
1200
+ "benchmark": "aider_polyglot",
1201
+ "url": "https://aider.chat/docs/leaderboards/",
1202
+ "modelRaw": "Codestral 25.01",
1203
+ "metric": "pass_rate_2",
1204
+ "score": 11.1,
1205
+ "scoreScale": "percent",
1206
+ "sampleSize": 225,
1207
+ "date": "2025-01-13",
1208
+ "extra": {
1209
+ "passRate1": 4,
1210
+ "passNum1": 9,
1211
+ "passNum2": 25,
1212
+ "totalCostUsd": 1.9834,
1213
+ "secondsPerCase": 9.3,
1214
+ "editFormat": "whole"
1215
+ },
1216
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1217
+ "snapshotIndex": 63
1218
+ },
1219
+ {
1220
+ "source": "aider_polyglot",
1221
+ "benchmark": "aider_polyglot",
1222
+ "url": "https://aider.chat/docs/leaderboards/",
1223
+ "modelRaw": "openhands-lm-32b-v0.1",
1224
+ "metric": "pass_rate_2",
1225
+ "score": 10.2,
1226
+ "scoreScale": "percent",
1227
+ "sampleSize": 225,
1228
+ "date": "2025-04-19",
1229
+ "extra": {
1230
+ "passRate1": 4,
1231
+ "passNum1": 9,
1232
+ "passNum2": 23,
1233
+ "totalCostUsd": 0,
1234
+ "secondsPerCase": 195.6,
1235
+ "editFormat": "whole"
1236
+ },
1237
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1238
+ "snapshotIndex": 64
1239
+ },
1240
+ {
1241
+ "source": "aider_polyglot",
1242
+ "benchmark": "aider_polyglot",
1243
+ "url": "https://aider.chat/docs/leaderboards/",
1244
+ "modelRaw": "gpt-4.1-nano",
1245
+ "metric": "pass_rate_2",
1246
+ "score": 8.9,
1247
+ "scoreScale": "percent",
1248
+ "sampleSize": 225,
1249
+ "date": "2025-04-14",
1250
+ "extra": {
1251
+ "passRate1": 3.1,
1252
+ "passNum1": 7,
1253
+ "passNum2": 20,
1254
+ "totalCostUsd": 0.4281,
1255
+ "secondsPerCase": 12,
1256
+ "editFormat": "whole"
1257
+ },
1258
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1259
+ "snapshotIndex": 65
1260
+ },
1261
+ {
1262
+ "source": "aider_polyglot",
1263
+ "benchmark": "aider_polyglot",
1264
+ "url": "https://aider.chat/docs/leaderboards/",
1265
+ "modelRaw": "Qwen2.5-Coder-32B-Instruct",
1266
+ "metric": "pass_rate_2",
1267
+ "score": 8,
1268
+ "scoreScale": "percent",
1269
+ "sampleSize": 225,
1270
+ "date": "2024-12-22",
1271
+ "extra": {
1272
+ "passRate1": 4.4,
1273
+ "passNum1": 10,
1274
+ "passNum2": 18,
1275
+ "totalCostUsd": 0,
1276
+ "secondsPerCase": 84.4,
1277
+ "editFormat": "diff"
1278
+ },
1279
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1280
+ "snapshotIndex": 66
1281
+ },
1282
+ {
1283
+ "source": "aider_polyglot",
1284
+ "benchmark": "aider_polyglot",
1285
+ "url": "https://aider.chat/docs/leaderboards/",
1286
+ "modelRaw": "gemma-3-27b-it",
1287
+ "metric": "pass_rate_2",
1288
+ "score": 4.9,
1289
+ "scoreScale": "percent",
1290
+ "sampleSize": 225,
1291
+ "date": "2025-03-15",
1292
+ "extra": {
1293
+ "passRate1": 1.8,
1294
+ "passNum1": 4,
1295
+ "passNum2": 11,
1296
+ "totalCostUsd": 0,
1297
+ "secondsPerCase": 79.7,
1298
+ "editFormat": "whole"
1299
+ },
1300
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1301
+ "snapshotIndex": 67
1302
+ },
1303
+ {
1304
+ "source": "aider_polyglot",
1305
+ "benchmark": "aider_polyglot",
1306
+ "url": "https://aider.chat/docs/leaderboards/",
1307
+ "modelRaw": "gpt-4o-mini-2024-07-18",
1308
+ "metric": "pass_rate_2",
1309
+ "score": 3.6,
1310
+ "scoreScale": "percent",
1311
+ "sampleSize": 225,
1312
+ "date": "2024-12-21",
1313
+ "extra": {
1314
+ "passRate1": 0.9,
1315
+ "passNum1": 2,
1316
+ "passNum2": 8,
1317
+ "totalCostUsd": 0.3236,
1318
+ "secondsPerCase": 17.3,
1319
+ "editFormat": "whole"
1320
+ },
1321
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.aider_polyglot.json",
1322
+ "snapshotIndex": 68
1323
+ },
1324
+ {
1325
+ "source": "terminal_bench",
1326
+ "benchmark": "terminal_bench",
1327
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
1328
+ "modelRaw": "Terminus 2 GPT-5.5",
1329
+ "metric": "accuracy",
1330
+ "score": 0.7820224719101123,
1331
+ "scoreScale": "0-1",
1332
+ "date": "2026-05-01",
1333
+ "extra": {
1334
+ "agent": "Terminus 2",
1335
+ "model": [
1336
+ "GPT-5.5"
1337
+ ],
1338
+ "stderr": 0.011996717137113833,
1339
+ "verified": true,
1340
+ "agentName": "terminus-2",
1341
+ "agentVersion": "2.0.0",
1342
+ "modelNames": [
1343
+ "gpt-5.5"
1344
+ ],
1345
+ "modelProviders": [
1346
+ "openai"
1347
+ ]
1348
+ },
1349
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.terminal_bench_2_1.json",
1350
+ "snapshotIndex": 2
1351
+ },
1352
+ {
1353
+ "source": "terminal_bench",
1354
+ "benchmark": "terminal_bench",
1355
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
1356
+ "modelRaw": "Terminus 2 Claude Opus 4.8",
1357
+ "metric": "accuracy",
1358
+ "score": 0.7460674157303371,
1359
+ "scoreScale": "0-1",
1360
+ "date": "2026-05-29",
1361
+ "extra": {
1362
+ "agent": "Terminus 2",
1363
+ "model": [
1364
+ "Claude Opus 4.8"
1365
+ ],
1366
+ "stderr": 0.012308372078767778,
1367
+ "verified": true,
1368
+ "agentName": "terminus-2",
1369
+ "agentVersion": "2.0.0",
1370
+ "modelNames": [
1371
+ "claude-opus-4-8"
1372
+ ],
1373
+ "modelProviders": [
1374
+ "anthropic"
1375
+ ]
1376
+ },
1377
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.terminal_bench_2_1.json",
1378
+ "snapshotIndex": 3
1379
+ },
1380
+ {
1381
+ "source": "terminal_bench",
1382
+ "benchmark": "terminal_bench",
1383
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
1384
+ "modelRaw": "Terminus 2 Gemini 3 Pro",
1385
+ "metric": "accuracy",
1386
+ "score": 0.7438202247191011,
1387
+ "scoreScale": "0-1",
1388
+ "date": "2026-05-01",
1389
+ "extra": {
1390
+ "agent": "Terminus 2",
1391
+ "model": [
1392
+ "Gemini 3 Pro"
1393
+ ],
1394
+ "stderr": 0.013199258566821045,
1395
+ "verified": true,
1396
+ "agentName": "terminus-2",
1397
+ "agentVersion": "2.0.0",
1398
+ "modelNames": [
1399
+ "gemini-3-pro-preview"
1400
+ ],
1401
+ "modelProviders": [
1402
+ "gemini"
1403
+ ]
1404
+ },
1405
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.terminal_bench_2_1.json",
1406
+ "snapshotIndex": 4
1407
+ },
1408
+ {
1409
+ "source": "terminal_bench",
1410
+ "benchmark": "terminal_bench",
1411
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
1412
+ "modelRaw": "Terminus 2 Gemini 3.1 Pro",
1413
+ "metric": "accuracy",
1414
+ "score": 0.7031835205992509,
1415
+ "scoreScale": "0-1",
1416
+ "date": "2026-05-05",
1417
+ "extra": {
1418
+ "agent": "Terminus 2",
1419
+ "model": [
1420
+ "Gemini 3.1 Pro"
1421
+ ],
1422
+ "stderr": 0.014791636846043224,
1423
+ "verified": true,
1424
+ "agentName": "terminus-2",
1425
+ "agentVersion": "2.0.0",
1426
+ "modelNames": [
1427
+ "gemini-3.1-pro-preview"
1428
+ ],
1429
+ "modelProviders": [
1430
+ "gemini"
1431
+ ]
1432
+ },
1433
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.terminal_bench_2_1.json",
1434
+ "snapshotIndex": 6
1435
+ },
1436
+ {
1437
+ "source": "terminal_bench",
1438
+ "benchmark": "terminal_bench",
1439
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
1440
+ "modelRaw": "Terminus 2 Claude Opus 4.7",
1441
+ "metric": "accuracy",
1442
+ "score": 0.6606741573033708,
1443
+ "scoreScale": "0-1",
1444
+ "date": "2026-05-01",
1445
+ "extra": {
1446
+ "agent": "Terminus 2",
1447
+ "model": [
1448
+ "Claude Opus 4.7"
1449
+ ],
1450
+ "stderr": 0.013669129281569032,
1451
+ "verified": true,
1452
+ "agentName": "terminus-2",
1453
+ "agentVersion": "2.0.0",
1454
+ "modelNames": [
1455
+ "claude-opus-4-7"
1456
+ ],
1457
+ "modelProviders": [
1458
+ "anthropic"
1459
+ ]
1460
+ },
1461
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.terminal_bench_2_1.json",
1462
+ "snapshotIndex": 9
1463
+ },
1464
+ {
1465
+ "source": "terminal_bench",
1466
+ "benchmark": "terminal_bench",
1467
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
1468
+ "modelRaw": "Claude Code GLM 5.1",
1469
+ "metric": "accuracy",
1470
+ "score": 0.5865168539325842,
1471
+ "scoreScale": "0-1",
1472
+ "date": "2026-05-02",
1473
+ "extra": {
1474
+ "agent": "Claude Code",
1475
+ "model": [
1476
+ "GLM 5.1"
1477
+ ],
1478
+ "stderr": 0.012410517996839619,
1479
+ "verified": true,
1480
+ "agentName": "claude-code",
1481
+ "agentVersion": "2.1.123",
1482
+ "modelNames": [
1483
+ "glm-5.1"
1484
+ ],
1485
+ "modelProviders": [
1486
+ "z-ai"
1487
+ ]
1488
+ },
1489
+ "snapshotFile": "tools\\scorecard\\snapshots\\live.terminal_bench_2_1.json",
1490
+ "snapshotIndex": 10
1491
+ }
1492
+ ]