npm - aiforcecli-chat - Versions diffs - 0.1.0 - Mend

aiforcecli-chat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/License.MD +49 -0
package/README.md +642 -0
package/aiforcecli.config.example.json +66 -0
package/assets/README.md +14 -0
package/dist/cli.js +2 -0
package/dist/index.js +2 -0
package/package.json +62 -0
package/tools/scorecard/README.md +92 -0
package/tools/scorecard/config.json +134 -0
package/tools/scorecard/fetch.mjs +335 -0
package/tools/scorecard/generate.mjs +289 -0
package/tools/scorecard/generated/example/invalid-rows.json +1 -0
package/tools/scorecard/generated/example/scorecard-report.md +147 -0
package/tools/scorecard/generated/example/scorecard.compact.json +61 -0
package/tools/scorecard/generated/example/scorecard.json +1492 -0
package/tools/scorecard/generated/example/unmapped-models.json +1492 -0
package/tools/scorecard/generated/raw/aider_polyglot.html +21071 -0
package/tools/scorecard/generated/raw/terminal_bench_2_1.html +2 -0
package/tools/scorecard/generated/scorecard/invalid-rows.json +1 -0
package/tools/scorecard/generated/scorecard/scorecard-report.md +133 -0
package/tools/scorecard/generated/scorecard/scorecard.compact.json +51 -0
package/tools/scorecard/generated/scorecard/scorecard.json +1181 -0
package/tools/scorecard/generated/scorecard/unmapped-models.json +1492 -0
package/tools/scorecard/generated/scorecard-example/invalid-rows.json +1 -0
package/tools/scorecard/generated/scorecard-example/scorecard-report.md +40 -0
package/tools/scorecard/generated/scorecard-example/scorecard.compact.json +22 -0
package/tools/scorecard/generated/scorecard-example/scorecard.json +389 -0
package/tools/scorecard/generated/scorecard-example/unmapped-models.json +1 -0
package/tools/scorecard/generated/scorecard-fetch/raw/aider_polyglot.html +21071 -0
package/tools/scorecard/generated/scorecard-fetch/raw/terminal_bench_2_1.html +2 -0
package/tools/scorecard/snapshots/example.normalized.example.json +38 -0
package/tools/scorecard/snapshots/live.aider_polyglot.json +1318 -0
package/tools/scorecard/snapshots/live.terminal_bench_2_1.json +294 -0

package/tools/scorecard/generated/example/scorecard.json ADDED Viewed

@@ -0,0 +1,1492 @@
+{
+  "version": "manual.2026.06.16",
+  "generatedAt": "2026-06-16T20:38:21.186Z",
+  "taskTypes": [
+    "bugfix",
+    "feature",
+    "refactor",
+    "test",
+    "docs",
+    "security",
+    "perf",
+    "general"
+  ],
+  "notes": [
+    "Generated scorecard artifact. It is not used by the application unless explicitly wired in later.",
+    "Scores are normalized public benchmark priors, not private repo outcomes."
+  ],
+  "scores": {
+    "antigravity:gemini-3.1-pro": {
+      "bugfix": {
+        "score": 0.7331,
+        "confidence": 0.2143,
+        "evidenceWeight": 0.5456,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.1574,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.1556,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0805,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0805,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0717,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      },
+      "feature": {
+        "score": 0.7424,
+        "confidence": 0.1985,
+        "evidenceWeight": 0.4954,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.1224,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.121,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0872,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0872,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0777,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      },
+      "refactor": {
+        "score": 0.7574,
+        "confidence": 0.1949,
+        "evidenceWeight": 0.484,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.1073,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.1073,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0956,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.0874,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.0864,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          }
+        ]
+      },
+      "test": {
+        "score": 0.7364,
+        "confidence": 0.1608,
+        "evidenceWeight": 0.3831,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.1049,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.1037,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0603,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0603,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0538,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      },
+      "docs": {
+        "score": 0.7981,
+        "confidence": 0.0462,
+        "evidenceWeight": 0.0969,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0335,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0335,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0299,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      },
+      "security": {
+        "score": 0.7208,
+        "confidence": 0.1325,
+        "evidenceWeight": 0.3056,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.1049,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.1037,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0335,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0335,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0299,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      },
+      "perf": {
+        "score": 0.7254,
+        "confidence": 0.1593,
+        "evidenceWeight": 0.3791,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.1224,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.121,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0469,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0469,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0418,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      },
+      "general": {
+        "score": 0.7231,
+        "confidence": 0.2397,
+        "evidenceWeight": 0.6305,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7066,
+            "weight": 0.2098,
+            "date": "2026-05-05",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3.1 Pro"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6629,
+            "weight": 0.2074,
+            "date": "2026-05-02",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Gemini CLI Gemini 3 Pro"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.831,
+            "weight": 0.0738,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 32k think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.791,
+            "weight": 0.0738,
+            "sampleSize": 225,
+            "date": "2025-06-06",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gemini-2.5-pro-preview-06-05 default think"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.769,
+            "weight": 0.0657,
+            "sampleSize": 225,
+            "date": "2025-05-07",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Gemini 2.5 Pro Preview 05-06"
+          }
+        ]
+      }
+    },
+    "claude-code:opus": {
+      "bugfix": {
+        "score": 0.7454,
+        "confidence": 0.1407,
+        "evidenceWeight": 0.3276,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.1726,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.155,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      },
+      "feature": {
+        "score": 0.7454,
+        "confidence": 0.113,
+        "evidenceWeight": 0.2548,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.1343,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.1205,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      },
+      "refactor": {
+        "score": 0.7454,
+        "confidence": 0.0834,
+        "evidenceWeight": 0.182,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.0959,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.0861,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      },
+      "test": {
+        "score": 0.7454,
+        "confidence": 0.0984,
+        "evidenceWeight": 0.2184,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.1151,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.1033,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      },
+      "security": {
+        "score": 0.7454,
+        "confidence": 0.0984,
+        "evidenceWeight": 0.2184,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.1151,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.1033,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      },
+      "perf": {
+        "score": 0.7454,
+        "confidence": 0.113,
+        "evidenceWeight": 0.2548,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.1343,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.1205,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      },
+      "general": {
+        "score": 0.7454,
+        "confidence": 0.1793,
+        "evidenceWeight": 0.4368,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.7888,
+            "weight": 0.2302,
+            "date": "2026-05-29",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.8"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.6972,
+            "weight": 0.2066,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Claude Code Claude Opus 4.7"
+          }
+        ]
+      }
+    },
+    "claude-code:sonnet": {
+      "bugfix": {
+        "score": 0.7098,
+        "confidence": 0.387,
+        "evidenceWeight": 1.2626,
+        "sources": [
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.9408,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.3218,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "feature": {
+        "score": 0.6994,
+        "confidence": 0.2532,
+        "evidenceWeight": 0.6779,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.3487,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.3293,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "refactor": {
+        "score": 0.7019,
+        "confidence": 0.3212,
+        "evidenceWeight": 0.9465,
+        "sources": [
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.5174,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.4291,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "test": {
+        "score": 0.6997,
+        "confidence": 0.1924,
+        "evidenceWeight": 0.4766,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.2414,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.2352,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "docs": {
+        "score": 0.68,
+        "confidence": 0.0628,
+        "evidenceWeight": 0.1341,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.1341,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "security": {
+        "score": 0.7055,
+        "confidence": 0.1559,
+        "evidenceWeight": 0.3693,
+        "sources": [
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.2352,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.1341,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "perf": {
+        "score": 0.7022,
+        "confidence": 0.1746,
+        "evidenceWeight": 0.4229,
+        "sources": [
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.2352,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.1877,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      },
+      "general": {
+        "score": 0.7036,
+        "confidence": 0.2643,
+        "evidenceWeight": 0.7184,
+        "sources": [
+          {
+            "source": "swebench_verified",
+            "benchmark": "swebench_verified",
+            "metric": "resolved",
+            "score": 0.72,
+            "weight": 0.4233,
+            "sampleSize": 500,
+            "date": "2026-06-01",
+            "url": "https://www.swebench.com/",
+            "modelRaw": "Claude Sonnet"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "percent_correct",
+            "score": 0.68,
+            "weight": 0.295,
+            "sampleSize": 225,
+            "date": "2026-06-01",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "Claude Sonnet"
+          }
+        ]
+      }
+    },
+    "codex:gpt-5.4": {
+      "bugfix": {
+        "score": 0.867,
+        "confidence": 0.0519,
+        "evidenceWeight": 0.1095,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.1095,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "feature": {
+        "score": 0.867,
+        "confidence": 0.056,
+        "evidenceWeight": 0.1186,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.1186,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "refactor": {
+        "score": 0.867,
+        "confidence": 0.068,
+        "evidenceWeight": 0.146,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.146,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "test": {
+        "score": 0.867,
+        "confidence": 0.0394,
+        "evidenceWeight": 0.0821,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.0821,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "docs": {
+        "score": 0.867,
+        "confidence": 0.0223,
+        "evidenceWeight": 0.0456,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.0456,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "security": {
+        "score": 0.867,
+        "confidence": 0.0223,
+        "evidenceWeight": 0.0456,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.0456,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "perf": {
+        "score": 0.867,
+        "confidence": 0.0309,
+        "evidenceWeight": 0.0639,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.0639,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      },
+      "general": {
+        "score": 0.867,
+        "confidence": 0.0478,
+        "evidenceWeight": 0.1004,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.867,
+            "weight": 0.1004,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 medium"
+          }
+        ]
+      }
+    },
+    "codex:gpt-5.4-mini": {
+      "bugfix": {
+        "score": 0.7738,
+        "confidence": 0.1057,
+        "evidenceWeight": 0.2364,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.1269,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.1095,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      },
+      "feature": {
+        "score": 0.7618,
+        "confidence": 0.1659,
+        "evidenceWeight": 0.3978,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.2792,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.1186,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      },
+      "refactor": {
+        "score": 0.7791,
+        "confidence": 0.1201,
+        "evidenceWeight": 0.2729,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.146,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          },
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.1269,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          }
+        ]
+      },
+      "test": {
+        "score": 0.7537,
+        "confidence": 0.1795,
+        "evidenceWeight": 0.4375,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.3554,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.0821,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      },
+      "docs": {
+        "score": 0.7746,
+        "confidence": 0.046,
+        "evidenceWeight": 0.0964,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.0508,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.0456,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      },
+      "security": {
+        "score": 0.7673,
+        "confidence": 0.0574,
+        "evidenceWeight": 0.1218,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.0762,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.0456,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      },
+      "perf": {
+        "score": 0.7682,
+        "confidence": 0.0764,
+        "evidenceWeight": 0.1654,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.1015,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.0639,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      },
+      "general": {
+        "score": 0.7623,
+        "confidence": 0.1412,
+        "evidenceWeight": 0.3288,
+        "sources": [
+          {
+            "source": "evalplus",
+            "benchmark": "evalplus",
+            "metric": "pass_at_1",
+            "score": 0.74,
+            "weight": 0.2285,
+            "sampleSize": 378,
+            "date": "2026-05-20",
+            "url": "https://github.com/evalplus/evalplus",
+            "modelRaw": "gpt-5.4-mini"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.813,
+            "weight": 0.1004,
+            "sampleSize": 225,
+            "date": "2025-08-25",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 low"
+          }
+        ]
+      }
+    },
+    "codex:gpt-5.5": {
+      "bugfix": {
+        "score": 0.8528,
+        "confidence": 0.1165,
+        "evidenceWeight": 0.2636,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.155,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.1087,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      },
+      "feature": {
+        "score": 0.8566,
+        "confidence": 0.1064,
+        "evidenceWeight": 0.2382,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.1205,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.1177,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      },
+      "refactor": {
+        "score": 0.8627,
+        "confidence": 0.1035,
+        "evidenceWeight": 0.231,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.1449,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          },
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.0861,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          }
+        ]
+      },
+      "test": {
+        "score": 0.8541,
+        "confidence": 0.0846,
+        "evidenceWeight": 0.1848,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.1033,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.0815,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      },
+      "docs": {
+        "score": 0.88,
+        "confidence": 0.0221,
+        "evidenceWeight": 0.0453,
+        "sources": [
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.0453,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      },
+      "security": {
+        "score": 0.8478,
+        "confidence": 0.0692,
+        "evidenceWeight": 0.1486,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.1033,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.0453,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      },
+      "perf": {
+        "score": 0.8497,
+        "confidence": 0.0842,
+        "evidenceWeight": 0.1839,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.1205,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.0634,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      },
+      "general": {
+        "score": 0.8488,
+        "confidence": 0.1328,
+        "evidenceWeight": 0.3062,
+        "sources": [
+          {
+            "source": "terminal_bench",
+            "benchmark": "terminal_bench",
+            "metric": "accuracy",
+            "score": 0.8337,
+            "weight": 0.2066,
+            "date": "2026-05-01",
+            "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
+            "modelRaw": "Codex CLI GPT-5.5"
+          },
+          {
+            "source": "aider_polyglot",
+            "benchmark": "aider_polyglot",
+            "metric": "pass_rate_2",
+            "score": 0.88,
+            "weight": 0.0996,
+            "sampleSize": 225,
+            "date": "2025-08-23",
+            "url": "https://aider.chat/docs/leaderboards/",
+            "modelRaw": "gpt-5 high"
+          }
+        ]
+      }
+    }
+  }
+}