aiforcecli-chat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/License.MD +49 -0
  2. package/README.md +642 -0
  3. package/aiforcecli.config.example.json +66 -0
  4. package/assets/README.md +14 -0
  5. package/dist/cli.js +2 -0
  6. package/dist/index.js +2 -0
  7. package/package.json +62 -0
  8. package/tools/scorecard/README.md +92 -0
  9. package/tools/scorecard/config.json +134 -0
  10. package/tools/scorecard/fetch.mjs +335 -0
  11. package/tools/scorecard/generate.mjs +289 -0
  12. package/tools/scorecard/generated/example/invalid-rows.json +1 -0
  13. package/tools/scorecard/generated/example/scorecard-report.md +147 -0
  14. package/tools/scorecard/generated/example/scorecard.compact.json +61 -0
  15. package/tools/scorecard/generated/example/scorecard.json +1492 -0
  16. package/tools/scorecard/generated/example/unmapped-models.json +1492 -0
  17. package/tools/scorecard/generated/raw/aider_polyglot.html +21071 -0
  18. package/tools/scorecard/generated/raw/terminal_bench_2_1.html +2 -0
  19. package/tools/scorecard/generated/scorecard/invalid-rows.json +1 -0
  20. package/tools/scorecard/generated/scorecard/scorecard-report.md +133 -0
  21. package/tools/scorecard/generated/scorecard/scorecard.compact.json +51 -0
  22. package/tools/scorecard/generated/scorecard/scorecard.json +1181 -0
  23. package/tools/scorecard/generated/scorecard/unmapped-models.json +1492 -0
  24. package/tools/scorecard/generated/scorecard-example/invalid-rows.json +1 -0
  25. package/tools/scorecard/generated/scorecard-example/scorecard-report.md +40 -0
  26. package/tools/scorecard/generated/scorecard-example/scorecard.compact.json +22 -0
  27. package/tools/scorecard/generated/scorecard-example/scorecard.json +389 -0
  28. package/tools/scorecard/generated/scorecard-example/unmapped-models.json +1 -0
  29. package/tools/scorecard/generated/scorecard-fetch/raw/aider_polyglot.html +21071 -0
  30. package/tools/scorecard/generated/scorecard-fetch/raw/terminal_bench_2_1.html +2 -0
  31. package/tools/scorecard/snapshots/example.normalized.example.json +38 -0
  32. package/tools/scorecard/snapshots/live.aider_polyglot.json +1318 -0
  33. package/tools/scorecard/snapshots/live.terminal_bench_2_1.json +294 -0
@@ -0,0 +1,2 @@
1
+ <!DOCTYPE html><html lang="en" class="geistsans_d5a4f12f-module__Ur3q_a__variable geistmono_157ca88a-module__iaM1Ya__variable google_sans_code_57373b42-module__VZ90Sa__variable"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="stylesheet" href="/_next/static/chunks/7f92b58d201945eb.css" data-precedence="next"/><link rel="stylesheet" href="/_next/static/chunks/50e24f3464fcd113.css" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/_next/static/chunks/1a15a26643b65c70.js"/><script src="/_next/static/chunks/4a0898171cb5614f.js" async=""></script><script src="/_next/static/chunks/6dad3f0875474073.js" async=""></script><script src="/_next/static/chunks/7b52218c954dfa8e.js" async=""></script><script src="/_next/static/chunks/6a4e60fe0d01bde6.js" async=""></script><script src="/_next/static/chunks/turbopack-f14f2b8dcb41d1fa.js" async=""></script><script src="/_next/static/chunks/772cb4e079383472.js" async=""></script><script src="/_next/static/chunks/cc5200c2c4db5d30.js" async=""></script><script src="/_next/static/chunks/d96012bcfc98706a.js" async=""></script><script src="/_next/static/chunks/963c71eec1d89c3f.js" async=""></script><script src="/_next/static/chunks/e7a87ddff7c3d361.js" async=""></script><script src="/_next/static/chunks/befa77c9ca59daf6.js" async=""></script><script src="/_next/static/chunks/b881789f47bb99ed.js" async=""></script><script src="/_next/static/chunks/d48a38d803cc289b.js" async=""></script><script src="/_next/static/chunks/afcfef2b68fe9eaf.js" async=""></script><script src="/_next/static/chunks/9eef2e45c165ec54.js" async=""></script><script src="/_next/static/chunks/9572667c10e1469b.js" async=""></script><script src="/_next/static/chunks/1d410ec419d845b5.js" async=""></script><script src="/_next/static/chunks/a802edd4d3033547.js" async=""></script><title>Terminal-Bench</title><meta name="description" content="A benchmark for terminal agents"/><meta property="og:title" content="Terminal-Bench"/><meta property="og:description" content="A benchmark for terminal agents"/><meta property="og:url" content="https://www.tbench.ai"/><meta property="og:site_name" content="Terminal-Bench"/><meta property="og:locale" content="en_US"/><meta property="og:image" content="https://www.tbench.ai/og/ascii-logo-dark-1200x630.png"/><meta property="og:type" content="website"/><meta name="twitter:card" content="summary_large_image"/><meta name="twitter:title" content="Terminal-Bench"/><meta name="twitter:description" content="A benchmark for terminal agents"/><meta name="twitter:image" content="https://www.tbench.ai/og/ascii-logo-dark-1200x630.png"/><meta name="twitter:image:width" content="1200"/><meta name="twitter:image:height" content="630"/><link rel="icon" href="/favicon.ico"/><meta name="next-size-adjust" content=""/><script src="/_next/static/chunks/a6dad97d9634a72d.js" noModule=""></script></head><body class="flex min-h-screen flex-col"><div hidden=""><!--$--><!--/$--></div><script>((a,b,c,d,e,f,g,h)=>{let i=document.documentElement,j=["light","dark"];function k(b){var c;(Array.isArray(a)?a:[a]).forEach(a=>{let c="class"===a,d=c&&f?e.map(a=>f[a]||a):e;c?(i.classList.remove(...d),i.classList.add(f&&f[b]?f[b]:b)):i.setAttribute(a,b)}),c=b,h&&j.includes(c)&&(i.style.colorScheme=c)}if(d)k(d);else try{let a=localStorage.getItem(b)||c,d=g&&"system"===a?window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":"light":a;k(d)}catch(a){}})("class","theme","system",null,["light","dark"],null,true,true)</script><div class="bg-fd-secondary/50 p-3 empty:hidden"></div><!--$--><!--/$--><main id="nd-home-layout" class="flex flex-1 flex-col pt-14"><header id="nd-nav" class="fixed top-(--fd-banner-height) z-40 left-0 backdrop-blur-lg border-b transition-colors *:mx-auto *:max-w-fd-container bg-fd-background/80" style="right:var(--removed-body-scroll-bar-size, 0px)" aria-label="Main" data-orientation="horizontal" dir="ltr"><div style="position:relative"><nav data-orientation="horizontal" class="flex h-14 w-full items-center px-4" dir="ltr"><a class="inline-flex items-center gap-2.5 font-semibold" href="/"><div class="flex items-center gap-2"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-terminal size-4" aria-hidden="true"><polyline points="4 17 10 11 4 5"></polyline><line x1="12" x2="20" y1="19" y2="19"></line></svg><p class="font-mono text-base font-medium tracking-tight">terminal-bench</p></div></a><ul class="flex flex-row items-center gap-2 px-6 max-sm:hidden"><li class="list-none"><a href="https://harborframework.com/docs/running-tbench" rel="noreferrer noopener" target="_blank" class="inline-flex items-center gap-1 p-2 text-fd-muted-foreground transition-colors hover:text-fd-accent-foreground data-[active=true]:text-fd-primary [&amp;_svg]:size-4 text-sm" data-active="false" data-radix-collection-item="">run terminal-bench</a></li><li class="list-none"><a class="inline-flex items-center gap-1 p-2 text-fd-muted-foreground transition-colors hover:text-fd-accent-foreground data-[active=true]:text-fd-primary [&amp;_svg]:size-4 text-sm" data-active="false" data-radix-collection-item="" href="/leaderboard/terminal-bench/2.0">leaderboard</a></li><li class="list-none"><a class="inline-flex items-center gap-1 p-2 text-fd-muted-foreground transition-colors hover:text-fd-accent-foreground data-[active=true]:text-fd-primary [&amp;_svg]:size-4 text-sm" data-active="false" data-radix-collection-item="" href="/benchmarks">benchmarks</a></li><li class="list-none"><a class="inline-flex items-center gap-1 p-2 text-fd-muted-foreground transition-colors hover:text-fd-accent-foreground data-[active=true]:text-fd-primary [&amp;_svg]:size-4 text-sm" data-active="false" data-radix-collection-item="" href="/contributors">contributors</a></li><li class="list-none"><a class="inline-flex items-center gap-1 p-2 text-fd-muted-foreground transition-colors hover:text-fd-accent-foreground data-[active=true]:text-fd-primary [&amp;_svg]:size-4 text-sm" data-active="false" data-radix-collection-item="" href="/news">news</a></li><li class="list-none"><a href="https://discord.gg/2Pe5uWGcV3" rel="noreferrer noopener" target="_blank" class="inline-flex items-center gap-1 p-2 text-fd-muted-foreground transition-colors hover:text-fd-accent-foreground data-[active=true]:text-fd-primary [&amp;_svg]:size-4 text-sm" data-active="false" data-radix-collection-item="">discord</a></li></ul><div class="flex flex-row items-center justify-end gap-1.5 flex-1"><div class="inline-flex items-center rounded-full border p-1 max-lg:hidden" data-theme-toggle=""><button aria-label="light" class="size-6.5 rounded-full p-1.5 text-fd-muted-foreground"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide size-full"><circle cx="12" cy="12" r="4"></circle><path d="M12 2v2"></path><path d="M12 20v2"></path><path d="m4.93 4.93 1.41 1.41"></path><path d="m17.66 17.66 1.41 1.41"></path><path d="M2 12h2"></path><path d="M20 12h2"></path><path d="m6.34 17.66-1.41 1.41"></path><path d="m19.07 4.93-1.41 1.41"></path></svg></button><button aria-label="dark" class="size-6.5 rounded-full p-1.5 text-fd-muted-foreground"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide size-full"><path d="M12 3a6 6 0 0 0 9 9 9 9 0 1 1-9-9Z"></path></svg></button><button aria-label="system" class="size-6.5 rounded-full p-1.5 text-fd-muted-foreground"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide size-full"><path d="M5 17H4a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h16a2 2 0 0 1 2 2v10a2 2 0 0 1-2 2h-1"></path><path d="m12 15 5 6H7Z"></path></svg></button></div></div><ul class="flex flex-row items-center"><li class="list-none"><a href="https://github.com/laude-institute/terminal-bench" rel="noreferrer noopener" target="_blank" class="inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&amp;_svg]:size-5 max-lg:hidden" aria-label="GitHub" data-active="false" data-radix-collection-item=""><svg role="img" viewBox="0 0 24 24" fill="currentColor"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"></path></svg></a></li><li class="list-none lg:hidden"><button id="radix-_R_cnqjb_-trigger-radix-_R_5bcnqjb_" data-state="closed" aria-expanded="false" aria-controls="radix-_R_cnqjb_-content-radix-_R_5bcnqjb_" class="data-[state=open]:bg-fd-accent/50 inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [&amp;_svg]:size-5 group -me-1.5" aria-label="Toggle Menu" data-radix-collection-item=""><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide !size-5.5 transition-transform duration-300 group-data-[state=open]:rotate-180"><path d="m6 9 6 6 6-6"></path></svg></button></li></ul></nav></div><div class="flex w-full justify-center"></div></header><div class="flex flex-1 flex-col items-center px-4 py-6 sm:pt-12"><div class="flex w-full max-w-7xl flex-col"><nav aria-label="breadcrumb" data-slot="breadcrumb" class="mb-6 hidden font-mono sm:block"><ol data-slot="breadcrumb-list" class="text-muted-foreground flex flex-wrap items-center gap-1.5 text-sm break-words sm:gap-2.5"><li data-slot="breadcrumb-item" class="inline-flex items-center gap-1.5"><a data-slot="breadcrumb-link" class="hover:text-foreground transition-colors" href="/">Home</a></li><li data-slot="breadcrumb-separator" role="presentation" aria-hidden="true" class="[&amp;&gt;svg]:size-3.5"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-right" aria-hidden="true"><path d="m9 18 6-6-6-6"></path></svg></li><li data-slot="breadcrumb-item" class="inline-flex items-center gap-1.5"><a data-slot="breadcrumb-link" class="hover:text-foreground transition-colors" href="/leaderboard">Leaderboards</a></li><li data-slot="breadcrumb-separator" role="presentation" aria-hidden="true" class="[&amp;&gt;svg]:size-3.5"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-right" aria-hidden="true"><path d="m9 18 6-6-6-6"></path></svg></li><li data-slot="breadcrumb-item" class="inline-flex items-center gap-1.5"><span data-slot="breadcrumb-page" role="link" aria-disabled="true" aria-current="page" class="text-foreground font-normal">terminal-bench<!-- -->@<!-- -->2.1</span></li></ol></nav><h2 class="font-mono text-4xl tracking-tighter">terminal-bench<!-- -->@<!-- -->2.1<!-- --> Leaderboard</h2><div dir="ltr" data-orientation="horizontal" class="flex flex-col overflow-hidden rounded-xl border bg-fd-secondary my-6 font-mono"><div role="tablist" aria-orientation="horizontal" class="flex gap-3.5 text-fd-secondary-foreground overflow-x-auto px-4 not-prose" tabindex="-1" data-orientation="horizontal" style="outline:none"><button type="button" role="tab" aria-selected="true" aria-controls="radix-_R_6lubsnpfknqjb_-content-new-model" data-state="active" id="radix-_R_6lubsnpfknqjb_-trigger-new-model" class="inline-flex items-center gap-2 whitespace-nowrap text-fd-muted-foreground border-b border-transparent py-2 text-sm font-medium transition-colors [&amp;_svg]:size-4 hover:text-fd-accent-foreground disabled:pointer-events-none disabled:opacity-50 data-[state=active]:border-fd-primary data-[state=active]:text-fd-primary" tabindex="-1" data-orientation="horizontal" data-radix-collection-item="">New Model</button><button type="button" role="tab" aria-selected="false" aria-controls="radix-_R_6lubsnpfknqjb_-content-custom-agent" data-state="inactive" id="radix-_R_6lubsnpfknqjb_-trigger-custom-agent" class="inline-flex items-center gap-2 whitespace-nowrap text-fd-muted-foreground border-b border-transparent py-2 text-sm font-medium transition-colors [&amp;_svg]:size-4 hover:text-fd-accent-foreground disabled:pointer-events-none disabled:opacity-50 data-[state=active]:border-fd-primary data-[state=active]:text-fd-primary" tabindex="-1" data-orientation="horizontal" data-radix-collection-item="">Custom Agent</button></div><div data-state="active" data-orientation="horizontal" role="tabpanel" aria-labelledby="radix-_R_6lubsnpfknqjb_-trigger-new-model" id="radix-_R_6lubsnpfknqjb_-content-new-model" tabindex="0" class="p-4 text-[15px] bg-fd-background rounded-xl outline-none prose-no-margin data-[state=inactive]:hidden [&amp;&gt;figure:only-child]:-m-4 [&amp;&gt;figure:only-child]:border-none" style="animation-duration:0s"><figure dir="ltr" class="rounded-xl bg-fd-card p-1 shiki relative border outline-none not-prose overflow-hidden text-sm my-0"><div class="flex text-fd-muted-foreground items-center gap-2 ps-3 h-9.5"><figcaption class="flex-1 truncate">Note: submissions may not modify timeouts or resources</figcaption><div class="empty:hidden"><button type="button" class="inline-flex items-center justify-center rounded-md p-2 text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground [&amp;_svg]:size-3.5" aria-label="Copy Text"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"></rect><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"></path></svg></button></div></div><div class="bg-fd-secondary rounded-lg border text-[13px] py-3.5 overflow-auto max-h-[600px] fd-scroll-container"><pre class="min-w-full w-max *:flex *:flex-col shiki shiki-themes github-light github-dark" style="--shiki-light:#24292e;--shiki-dark:#e1e4e8;--shiki-light-bg:#fff;--shiki-dark-bg:#24292e" tabindex="0"><code><span class="line"><span style="--shiki-light:#6F42C1;--shiki-dark:#B392F0">harbor</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> run</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> -d</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> terminal-bench/terminal-bench-2-1</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> -a</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> &quot;agent&quot;</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> -m</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> &quot;model&quot;</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> -k</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> 5</span></span></code></pre></div></figure></div><div data-state="inactive" data-orientation="horizontal" role="tabpanel" aria-labelledby="radix-_R_6lubsnpfknqjb_-trigger-custom-agent" id="radix-_R_6lubsnpfknqjb_-content-custom-agent" tabindex="0" class="p-4 text-[15px] bg-fd-background rounded-xl outline-none prose-no-margin data-[state=inactive]:hidden [&amp;&gt;figure:only-child]:-m-4 [&amp;&gt;figure:only-child]:border-none"><figure dir="ltr" class="rounded-xl bg-fd-card p-1 shiki relative border outline-none not-prose overflow-hidden text-sm my-0"><div class="flex text-fd-muted-foreground items-center gap-2 ps-3 h-9.5"><figcaption class="flex-1 truncate">Note: submissions may not modify timeouts or resources</figcaption><div class="empty:hidden"><button type="button" class="inline-flex items-center justify-center rounded-md p-2 text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground [&amp;_svg]:size-3.5" aria-label="Copy Text"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"></rect><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"></path></svg></button></div></div><div class="bg-fd-secondary rounded-lg border text-[13px] py-3.5 overflow-auto max-h-[600px] fd-scroll-container"><pre class="min-w-full w-max *:flex *:flex-col shiki shiki-themes github-light github-dark" style="--shiki-light:#24292e;--shiki-dark:#e1e4e8;--shiki-light-bg:#fff;--shiki-dark-bg:#24292e" tabindex="0"><code><span class="line"><span style="--shiki-light:#6F42C1;--shiki-dark:#B392F0">harbor</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> run</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> -d</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> terminal-bench/terminal-bench-2-1</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> --agent-import-path</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF"> &quot;path.to.agent:SomeAgent&quot;</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> -k</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF"> 5</span></span></code></pre></div></figure></div></div><div class="-mx-4 flex flex-col md:mx-0"><div class="mb-3 flex items-center justify-between px-4 md:px-0"><p class="text-muted-foreground font-mono text-sm">Showing <!-- -->11<!-- --> entries</p><button class="text-primary disabled:text-muted-foreground font-mono text-sm font-normal underline-offset-4 hover:underline disabled:hover:no-underline" disabled="">Clear filters</button></div><div class="grid grid-cols-1 xl:grid-cols-4"><div class="relative -mb-px flex h-16 border border-x-0 md:border-x xl:-mr-px"><input type="text" placeholder="Search leaderboard" class="placeholder:text-muted-foreground bg-card flex w-full min-w-0 px-6 font-mono text-base outline-none sm:text-sm" value=""/><button class="text-muted-foreground hover:text-foreground absolute inset-y-0 right-0 px-6 hidden"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-x h-4 w-4" aria-hidden="true"><path d="M18 6 6 18"></path><path d="m6 6 12 12"></path></svg></button></div><div class="grid grid-cols-1 lg:grid-cols-4 xl:col-span-3"><div class="bg-card flex flex-1 cursor-pointer flex-row items-center justify-between px-6 -mb-px h-16 border border-x-0 md:border-x lg:-mr-px" type="button" aria-haspopup="dialog" aria-expanded="false" aria-controls="radix-_R_1iolubsnpfknqjb_" data-state="closed" data-slot="popover-trigger"><p class="line-clamp-1 font-mono sm:text-sm text-muted-foreground">Select agents</p><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevrons-up-down ml-2 h-4 w-4 shrink-0 opacity-50" aria-hidden="true"><path d="m7 15 5 5 5-5"></path><path d="m7 9 5-5 5 5"></path></svg></div><div class="bg-card flex flex-1 cursor-pointer flex-row items-center justify-between px-6 -mb-px h-16 border border-x-0 md:border-x lg:-mr-px" type="button" aria-haspopup="dialog" aria-expanded="false" aria-controls="radix-_R_2iolubsnpfknqjb_" data-state="closed" data-slot="popover-trigger"><p class="line-clamp-1 font-mono sm:text-sm text-muted-foreground">Select models</p><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevrons-up-down ml-2 h-4 w-4 shrink-0 opacity-50" aria-hidden="true"><path d="m7 15 5 5 5-5"></path><path d="m7 9 5-5 5 5"></path></svg></div><div class="bg-card flex flex-1 cursor-pointer flex-row items-center justify-between px-6 -mb-px h-16 border border-x-0 md:border-x" type="button" aria-haspopup="dialog" aria-expanded="false" aria-controls="radix-_R_3iolubsnpfknqjb_" data-state="closed" data-slot="popover-trigger"><p class="line-clamp-1 font-mono sm:text-sm text-muted-foreground">Select organizations</p><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevrons-up-down ml-2 h-4 w-4 shrink-0 opacity-50" aria-hidden="true"><path d="m7 15 5 5 5-5"></path><path d="m7 9 5-5 5 5"></path></svg></div><div class="bg-card -mb-px -ml-px flex h-16 cursor-pointer items-center justify-between border border-x-0 px-6 font-mono text-base transition-colors sm:text-sm md:border-x text-muted-foreground"><span class="line-clamp-1">Verified only</span><button type="button" role="switch" aria-checked="false" data-state="unchecked" value="on" data-slot="switch" class="peer data-[state=checked]:bg-primary data-[state=unchecked]:bg-input focus-visible:border-ring focus-visible:ring-ring/50 dark:data-[state=unchecked]:bg-input/80 inline-flex h-[1.15rem] w-8 shrink-0 items-center rounded-[3px] border border-transparent shadow-xs transition-all outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50 ml-3" id="verified-only"><span data-state="unchecked" data-slot="switch-thumb" class="bg-background dark:data-[state=unchecked]:bg-foreground dark:data-[state=checked]:bg-primary-foreground pointer-events-none block size-4 rounded-[2px] ring-0 transition-transform data-[state=checked]:translate-x-[calc(100%-2px)] data-[state=unchecked]:translate-x-0"></span></button><input type="checkbox" aria-hidden="true" style="transform:translateX(-100%);position:absolute;pointer-events:none;opacity:0;margin:0" tabindex="-1" value="on"/></div></div></div><div class="bg-card border-y font-mono md:border-x -mx-4 md:mx-0"><div data-slot="table-container" class="relative w-full overflow-x-auto"><table data-slot="table" class="w-full caption-bottom text-sm [&amp;_tr&gt;td:first-child]:pl-6 [&amp;_tr&gt;td:last-child]:pr-6 [&amp;_tr&gt;th:first-child]:pl-6 [&amp;_tr&gt;th:last-child]:pr-6"><thead data-slot="table-header" class="[&amp;_tr]:border-b"><tr data-slot="table-row" class="data-[state=selected]:bg-muted border-b transition-colors px-6 hover:bg-transparent"><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select all"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Rank</th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Agent</th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Model</th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Date</th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Agent Org</th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Model Org</th><th data-slot="table-head" class="text-foreground h-10 px-2 text-left align-middle font-medium whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right">Accuracy</p></th></tr></thead><tbody data-slot="table-body" class="[&amp;_tr:last-child]:border-0"><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>1</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Codex CLI</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>GPT-5.5</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-01</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">OpenAI</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>OpenAI</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">83.4<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.2</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>2</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Code</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Opus 4.8</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-29</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Anthropic</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Anthropic</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">78.9<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.5</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>3</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Terminus 2</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>GPT-5.5</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-01</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Terminal-Bench</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>OpenAI</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">78.2<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.4</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>4</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Terminus 2</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Opus 4.8</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-29</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Terminal-Bench</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Anthropic</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">74.6<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.4</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>5</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Terminus 2</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Gemini 3 Pro</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-01</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Terminal-Bench</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Google</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">74.4<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.6</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>6</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Gemini CLI</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Gemini 3.1 Pro</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-05</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Google</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Google</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">70.7<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.9</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>7</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Terminus 2</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Gemini 3.1 Pro</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-05</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Terminal-Bench</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Google</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">70.3<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.9</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>8</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Code</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Opus 4.7</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-01</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Anthropic</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Anthropic</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">69.7<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.7</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>9</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Gemini CLI</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Gemini 3 Pro</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-02</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Google</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Google</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">66.3<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.7</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>10</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Terminus 2</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Opus 4.7</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-01</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Terminal-Bench</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Anthropic</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">66.1<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.7</span></p></td></tr><tr data-slot="table-row" class="hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors px-6" data-state="false"><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><button type="button" role="checkbox" aria-checked="false" data-state="unchecked" value="on" data-slot="checkbox" class="peer border-input dark:bg-input/30 data-[state=checked]:bg-primary data-[state=checked]:text-primary-foreground dark:data-[state=checked]:bg-primary data-[state=checked]:border-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive size-4 shrink-0 rounded-[4px] border shadow-xs transition-shadow outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50" aria-label="Select row"></button><input type="checkbox" aria-hidden="true" tabindex="-1" style="position:absolute;pointer-events:none;opacity:0;margin:0;transform:translateX(-100%)" value="on"/></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><div class="flex items-center gap-2"><span>11</span><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" class="size-4" data-state="closed" data-slot="hover-card-trigger"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg></div></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Claude Code</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>GLM 5.1</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">2026-05-02</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base">Anthropic</td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><span>Z-AI</span></td><td data-slot="table-cell" class="p-2 align-middle whitespace-nowrap [&amp;:has([role=checkbox])]:pr-0 [&amp;&gt;[role=checkbox]]:translate-y-[2px] py-4 text-base"><p class="text-right"><span class="font-bold">58.7<!-- -->%</span><span class="text-muted-foreground ml-1">± <!-- -->2.4</span></p></td></tr></tbody></table></div><div class="text-muted-foreground space-y-2 border-t px-6 py-4 text-center text-sm"><p>Results in this leaderboard correspond to<!-- --> <a href="https://hub.harborframework.com/datasets/terminal-bench/terminal-bench-2-1/6" class="text-foreground underline underline-offset-4">terminal-bench/terminal-bench-2-1</a>.</p><p>Use the commands above to run Terminal-Bench 2.1 submissions.</p><div class="mx-auto flex flex-row items-center justify-center gap-2"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" class="fill-foreground size-4"><path fill-rule="evenodd" d="M8.603 3.799A4.49 4.49 0 0 1 12 2.25c1.357 0 2.573.6 3.397 1.549a4.49 4.49 0 0 1 3.498 1.307 4.491 4.491 0 0 1 1.307 3.497A4.49 4.49 0 0 1 21.75 12a4.49 4.49 0 0 1-1.549 3.397 4.491 4.491 0 0 1-1.307 3.497 4.491 4.491 0 0 1-3.497 1.307A4.49 4.49 0 0 1 12 21.75a4.49 4.49 0 0 1-3.397-1.549 4.49 4.49 0 0 1-3.498-1.306 4.491 4.491 0 0 1-1.307-3.498A4.49 4.49 0 0 1 2.25 12c0-1.357.6-2.573 1.549-3.397a4.49 4.49 0 0 1 1.307-3.497 4.49 4.49 0 0 1 3.497-1.307Zm7.007 6.387a.75.75 0 1 0-1.22-.872l-3.236 4.53L9.53 12.22a.75.75 0 0 0-1.06 1.06l2.25 2.25a.75.75 0 0 0 1.14-.094l3.75-5.25Z" clip-rule="evenodd"></path></svg><p>A Terminal-Bench team member ran the evaluation and verified the results.</p></div></div></div><div class="flex flex-col px-4 md:px-0"><p class="text-muted-foreground mt-6 font-mono text-sm">Displaying <!-- -->11<!-- --> of <!-- -->11<!-- --> available entries</p></div></div></div></div><!--$--><!--/$--></main><section aria-label="Notifications alt+T" tabindex="-1" aria-live="polite" aria-relevant="additions text" aria-atomic="false"></section><script src="/_next/static/chunks/1a15a26643b65c70.js" id="_R_" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0])</script><script>self.__next_f.push([1,"1:\"$Sreact.fragment\"\n2:I[143328,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\"],\"QueryProvider\"]\n3:I[78966,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\"],\"RootProvider\"]\n4:I[12985,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\"],\"NuqsAdapter\"]\n5:I[339756,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"default\"]\n6:I[408821,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/e7a87ddff7c3d361.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\"],\"default\"]\n7:I[837457,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"default\"]\n8:I[203484,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\"],\"Toaster\"]\n9:I[276818,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"NavProvider\"]\na:I[691097,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"Navbar\"]\nb:I[647873,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"default\"]\nc:I[691097,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"NavbarLink\"]\n18:I[168027,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"default\"]\n19:I[313701,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"ThemeToggle\"]\n1a:I[278128,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"Menu\"]\n1b:I[278128,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"MenuTrigger\"]\n1c:I[278128,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"MenuContent\"]\n1d:I[278128,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\"],\"MenuLinkItem\"]\n1f:I[897367,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"OutletBoundary\"]\n20:\"$Sreact.suspense\"\n22:I["])</script><script>self.__next_f.push([1,"897367,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"ViewportBoundary\"]\n24:I[897367,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"MetadataBoundary\"]\n26:I[27201,[\"/_next/static/chunks/d96012bcfc98706a.js\",\"/_next/static/chunks/963c71eec1d89c3f.js\"],\"IconMark\"]\n:HL[\"/_next/static/chunks/7f92b58d201945eb.css\",\"style\"]\n:HL[\"/_next/static/chunks/50e24f3464fcd113.css\",\"style\"]\n:HL[\"/_next/static/media/6babc54cc0db7717.p.1a5afaba.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n:HL[\"/_next/static/media/GeistMono_Variable.p.73882635.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n:HL[\"/_next/static/media/Geist_Variable-s.p.f19e4721.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n"])</script><script>self.__next_f.push([1,"0:{\"P\":null,\"b\":\"PzzZfYGTpXLXiP__nRLOa\",\"c\":[\"\",\"leaderboard\",\"terminal-bench\",\"2.1\"],\"q\":\"\",\"i\":false,\"f\":[[[\"\",{\"children\":[\"(home)\",{\"children\":[\"leaderboard\",{\"children\":[[\"name\",\"terminal-bench\",\"d\"],{\"children\":[[\"version\",\"2.1\",\"d\"],{\"children\":[\"__PAGE__\",{}]}]}]}]}]},\"$undefined\",\"$undefined\",true],[[\"$\",\"$1\",\"c\",{\"children\":[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/chunks/7f92b58d201945eb.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\",\"nonce\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/chunks/50e24f3464fcd113.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\",\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-0\",{\"src\":\"/_next/static/chunks/772cb4e079383472.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-1\",{\"src\":\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"async\":true,\"nonce\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"className\":\"geistsans_d5a4f12f-module__Ur3q_a__variable geistmono_157ca88a-module__iaM1Ya__variable google_sans_code_57373b42-module__VZ90Sa__variable\",\"suppressHydrationWarning\":true,\"children\":[\"$\",\"body\",null,{\"className\":\"flex min-h-screen flex-col\",\"children\":[\"$\",\"$L2\",null,{\"children\":[[\"$\",\"$L3\",null,{\"children\":[\"$\",\"$L4\",null,{\"children\":[\"$\",\"$L5\",null,{\"parallelRouterKey\":\"children\",\"error\":\"$6\",\"errorStyles\":[],\"errorScripts\":[[\"$\",\"script\",\"script-0\",{\"src\":\"/_next/static/chunks/e7a87ddff7c3d361.js\",\"async\":true}],[\"$\",\"script\",\"script-1\",{\"src\":\"/_next/static/chunks/befa77c9ca59daf6.js\",\"async\":true}]],\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":404}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],[]],\"forbidden\":\"$undefined\",\"unauthorized\":\"$undefined\"}]}]}],[\"$\",\"$L8\",null,{}]]}]}]}]]}],{\"children\":[[\"$\",\"$1\",\"c\",{\"children\":[[[\"$\",\"script\",\"script-0\",{\"src\":\"/_next/static/chunks/b881789f47bb99ed.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-1\",{\"src\":\"/_next/static/chunks/d48a38d803cc289b.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-2\",{\"src\":\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-3\",{\"src\":\"/_next/static/chunks/befa77c9ca59daf6.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-4\",{\"src\":\"/_next/static/chunks/9eef2e45c165ec54.js\",\"async\":true,\"nonce\":\"$undefined\"}]],[\"$\",\"$L9\",null,{\"transparentMode\":\"$undefined\",\"children\":[\"$\",\"main\",null,{\"id\":\"nd-home-layout\",\"children\":[[\"$\",\"$La\",null,{\"children\":[[\"$\",\"$Lb\",null,{\"href\":\"/\",\"className\":\"inline-flex items-center gap-2.5 font-semibold\",\"children\":[\"$\",\"div\",null,{\"className\":\"flex items-center gap-2\",\"children\":[[\"$\",\"svg\",null,{\"ref\":\"$undefined\",\"xmlns\":\"http://www.w3.org/2000/svg\",\"width\":24,\"height\":24,\"viewBox\":\"0 0 24 24\",\"fill\":\"none\",\"stroke\":\"currentColor\",\"strokeWidth\":2,\"strokeLinecap\":\"round\",\"strokeLinejoin\":\"round\",\"className\":\"lucide lucide-terminal size-4\",\"aria-hidden\":\"true\",\"children\":[[\"$\",\"polyline\",\"akl6gq\",{\"points\":\"4 17 10 11 4 5\"}],[\"$\",\"line\",\"q2wloq\",{\"x1\":\"12\",\"x2\":\"20\",\"y1\":\"19\",\"y2\":\"19\"}],\"$undefined\"]}],[\"$\",\"p\",null,{\"className\":\"font-mono text-base font-medium tracking-tight\",\"children\":\"terminal-bench\"}]]}]}],\"$undefined\",[\"$\",\"ul\",null,{\"className\":\"flex flex-row items-center gap-2 px-6 max-sm:hidden\",\"children\":[[\"$\",\"$Lc\",\"0\",{\"className\":\"text-sm\",\"item\":{\"text\":\"run terminal-bench\",\"url\":\"https://harborframework.com/docs/running-tbench\",\"active\":\"nested-url\"},\"variant\":\"$undefined\",\"aria-label\":\"$undefined\",\"children\":\"run terminal-bench\"}],[\"$\",\"$Lc\",\"1\",{\"className\":\"text-sm\",\"item\":{\"text\":\"leaderboard\",\"url\":\"/leaderboard/terminal-bench/2.0\",\"active\":\"nested-url\"},\"variant\":\"$undefined\",\"aria-label\":\"$undefined\",\"children\":\"leaderboard\"}],[\"$\",\"$Lc\",\"2\",{\"className\":\"text-sm\",\"item\":{\"text\":\"benchmarks\",\"url\":\"/benchmarks\",\"active\":\"nested-url\"},\"variant\":\"$undefined\",\"aria-label\":\"$undefined\",\"children\":\"benchmarks\"}],\"$Ld\",\"$Le\",\"$Lf\"]}],\"$L10\",\"$L11\"]}],\"$L12\"],\"className\":\"flex flex-1 flex-col pt-14\"}]}]]}],{\"children\":[\"$L13\",{\"children\":[\"$L14\",{\"children\":[\"$L15\",{\"children\":[\"$L16\",{},null,false,false]},null,false,false]},null,false,false]},null,false,false]},null,false,false]},null,false,false],\"$L17\",false]],\"m\":\"$undefined\",\"G\":[\"$18\",[]],\"S\":false}\n"])</script><script>self.__next_f.push([1,"d:[\"$\",\"$Lc\",\"3\",{\"className\":\"text-sm\",\"item\":{\"text\":\"contributors\",\"url\":\"/contributors\",\"active\":\"nested-url\"},\"variant\":\"$undefined\",\"aria-label\":\"$undefined\",\"children\":\"contributors\"}]\ne:[\"$\",\"$Lc\",\"4\",{\"className\":\"text-sm\",\"item\":{\"text\":\"news\",\"url\":\"/news\",\"active\":\"nested-url\"},\"variant\":\"$undefined\",\"aria-label\":\"$undefined\",\"children\":\"news\"}]\nf:[\"$\",\"$Lc\",\"5\",{\"className\":\"text-sm\",\"item\":{\"text\":\"discord\",\"url\":\"https://discord.gg/2Pe5uWGcV3\",\"external\":true},\"variant\":\"$undefined\",\"aria-label\":\"$undefined\",\"children\":\"discord\"}]\n10:[\"$\",\"div\",null,{\"className\":\"flex flex-row items-center justify-end gap-1.5 flex-1\",\"children\":[false,[\"$\",\"$L19\",null,{\"className\":\"max-lg:hidden\",\"mode\":\"light-dark-system\"}],null]}]\n"])</script><script>self.__next_f.push([1,"11:[\"$\",\"ul\",null,{\"className\":\"flex flex-row items-center\",\"children\":[[[\"$\",\"$Lc\",\"0\",{\"className\":\"max-lg:hidden\",\"item\":{\"type\":\"icon\",\"url\":\"https://github.com/laude-institute/terminal-bench\",\"text\":\"Github\",\"label\":\"GitHub\",\"icon\":[\"$\",\"svg\",null,{\"role\":\"img\",\"viewBox\":\"0 0 24 24\",\"fill\":\"currentColor\",\"children\":[\"$\",\"path\",null,{\"d\":\"M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12\"}]}],\"external\":true},\"variant\":\"icon\",\"aria-label\":\"GitHub\",\"children\":\"$11:props:children:0:0:props:item:icon\"}]],[\"$\",\"$L1a\",null,{\"className\":\"lg:hidden\",\"children\":[[\"$\",\"$L1b\",null,{\"aria-label\":\"Toggle Menu\",\"className\":\"inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors duration-100 disabled:pointer-events-none disabled:opacity-50 focus-visible:outline-none hover:bg-fd-accent hover:text-fd-accent-foreground p-1.5 [\u0026_svg]:size-5 group -me-1.5\",\"enableHover\":\"$undefined\",\"children\":[\"$\",\"svg\",null,{\"ref\":\"$undefined\",\"xmlns\":\"http://www.w3.org/2000/svg\",\"width\":24,\"height\":24,\"viewBox\":\"0 0 24 24\",\"fill\":\"none\",\"stroke\":\"currentColor\",\"strokeWidth\":2,\"strokeLinecap\":\"round\",\"strokeLinejoin\":\"round\",\"className\":\"lucide !size-5.5 transition-transform duration-300 group-data-[state=open]:rotate-180\",\"children\":[[[\"$\",\"path\",\"qrunsl\",{\"d\":\"m6 9 6 6 6-6\"}]],\"$undefined\"]}]}],[\"$\",\"$L1c\",null,{\"className\":\"sm:flex-row sm:items-center sm:justify-end\",\"children\":[[[\"$\",\"$L1d\",\"0\",{\"item\":\"$0:f:0:1:1:children:0:props:children:1:props:children:props:children:0:props:children:2:props:children:0:props:item\",\"className\":\"sm:hidden\"}],[\"$\",\"$L1d\",\"1\",{\"item\":\"$0:f:0:1:1:children:0:props:children:1:props:children:props:children:0:props:children:2:props:children:1:props:item\",\"className\":\"sm:hidden\"}],[\"$\",\"$L1d\",\"2\",{\"item\":\"$0:f:0:1:1:children:0:props:children:1:props:children:props:children:0:props:children:2:props:children:2:props:item\",\"className\":\"sm:hidden\"}],[\"$\",\"$L1d\",\"3\",{\"item\":\"$d:props:item\",\"className\":\"sm:hidden\"}],[\"$\",\"$L1d\",\"4\",{\"item\":\"$e:props:item\",\"className\":\"sm:hidden\"}],[\"$\",\"$L1d\",\"5\",{\"item\":\"$f:props:item\",\"className\":\"sm:hidden\"}]],[\"$\",\"div\",null,{\"className\":\"-ms-1.5 flex flex-row items-center gap-1.5 max-sm:mt-2\",\"children\":[[[\"$\",\"$L1d\",\"0\",{\"item\":\"$11:props:children:0:0:props:item\",\"className\":\"-me-1.5\"}]],[\"$\",\"div\",null,{\"role\":\"separator\",\"className\":\"flex-1\"}],null,[\"$\",\"$L19\",null,{\"mode\":\"light-dark-system\"}]]}]]}]]}]]}]\n"])</script><script>self.__next_f.push([1,"12:[\"$\",\"$L5\",null,{\"parallelRouterKey\":\"children\",\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":\"$0:f:0:1:0:props:children:1:props:children:props:children:props:children:0:props:children:props:children:props:notFound:0:1:props:style\",\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":\"$0:f:0:1:0:props:children:1:props:children:props:children:props:children:0:props:children:props:children:props:notFound:0:1:props:children:props:children:1:props:style\",\"children\":404}],[\"$\",\"div\",null,{\"style\":\"$0:f:0:1:0:props:children:1:props:children:props:children:props:children:0:props:children:props:children:props:notFound:0:1:props:children:props:children:2:props:style\",\"children\":[\"$\",\"h2\",null,{\"style\":\"$0:f:0:1:0:props:children:1:props:children:props:children:props:children:0:props:children:props:children:props:notFound:0:1:props:children:props:children:2:props:children:props:style\",\"children\":\"This page could not be found.\"}]}]]}]}]],[]],\"forbidden\":\"$undefined\",\"unauthorized\":\"$undefined\"}]\n"])</script><script>self.__next_f.push([1,"13:[\"$\",\"$1\",\"c\",{\"children\":[null,[\"$\",\"$L5\",null,{\"parallelRouterKey\":\"children\",\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"forbidden\":\"$undefined\",\"unauthorized\":\"$undefined\"}]]}]\n14:[\"$\",\"$1\",\"c\",{\"children\":[null,[\"$\",\"$L5\",null,{\"parallelRouterKey\":\"children\",\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"forbidden\":\"$undefined\",\"unauthorized\":\"$undefined\"}]]}]\n15:[\"$\",\"$1\",\"c\",{\"children\":[null,[\"$\",\"$L5\",null,{\"parallelRouterKey\":\"children\",\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L7\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":\"$undefined\",\"forbidden\":\"$undefined\",\"unauthorized\":\"$undefined\"}]]}]\n16:[\"$\",\"$1\",\"c\",{\"children\":[\"$L1e\",[[\"$\",\"script\",\"script-0\",{\"src\":\"/_next/static/chunks/9572667c10e1469b.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-1\",{\"src\":\"/_next/static/chunks/1d410ec419d845b5.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-2\",{\"src\":\"/_next/static/chunks/a802edd4d3033547.js\",\"async\":true,\"nonce\":\"$undefined\"}]],[\"$\",\"$L1f\",null,{\"children\":[\"$\",\"$20\",null,{\"name\":\"Next.MetadataOutlet\",\"children\":\"$@21\"}]}]]}]\n17:[\"$\",\"$1\",\"h\",{\"children\":[null,[\"$\",\"$L22\",null,{\"children\":\"$L23\"}],[\"$\",\"div\",null,{\"hidden\":true,\"children\":[\"$\",\"$L24\",null,{\"children\":[\"$\",\"$20\",null,{\"name\":\"Next.Metadata\",\"children\":\"$L25\"}]}]}],[\"$\",\"meta\",null,{\"name\":\"next-size-adjust\",\"content\":\"\"}]]}]\n23:[[\"$\",\"meta\",\"0\",{\"charSet\":\"utf-8\"}],[\"$\",\"meta\",\"1\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}]]\n21:null\n25:[[\"$\",\"title\",\"0\",{\"children\":\"Terminal-Bench\"}],[\"$\",\"meta\",\"1\",{\"name\":\"description\",\"content\":\"A benchmark for terminal agents\"}],[\"$\",\"meta\",\"2\",{\"property\":\"og:title\",\"content\":\"Terminal-Bench\"}],[\"$\",\"meta\",\"3\",{\"property\":\"og:description\",\"content\":\"A benchmark for terminal agents\"}],[\"$\",\"meta\",\"4\",{\"property\":\"og:url\",\"content\":\"https://www.tbench.ai\"}],[\"$\",\"meta\",\"5\",{\"property\":\"og:site_name\",\"content\":\"Terminal-Bench\"}],[\"$\",\"meta\",\"6\",{\"property\":\"og:locale\",\"content\":\"en_US\"}],[\"$\",\"meta\",\"7\",{\"property\":\"og:image\",\"content\":\"https://www.tbench.ai/og/ascii-logo-dark-1200x630.png\"}],[\"$\",\"meta\",\"8\",{\"property\":\"og:type\",\"content\":\"website\"}],[\"$\",\"meta\",\"9\",{\"name\":\"twitter:card\",\"content\":\"summary_large_image\"}],[\"$\",\"meta\",\"10\",{\"name\":\"twitter:title\",\"content\":\"Terminal-Bench\"}],[\"$\",\"meta\",\"11\",{\"name\":\"twitter:description\",\"content\":\"A benchmark for terminal agents\"}],[\"$\",\"meta\",\"12\",{\"name\":\"twitter:image\",\"content\":\"https://www.tbench.ai/og/ascii-logo-dark-1200x630.png\"}],[\"$\",\"meta\",\"13\",{\"name\":\"twitter:image:width\",\"content\":\"1200\"}],[\"$\",\"meta\",\"14\",{\"name\":\"twitter:image:height\",\"content\":\"630\"}],[\"$\",\"link\",\"15\",{\"rel\":\"icon\",\"href\":\"/favicon.ico\"}],[\"$\",\"$L26\",\"16\",{}]]\n"])</script><script>self.__next_f.push([1,"27:I[213758,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\",\"/_next/static/chunks/9572667c10e1469b.js\",\"/_next/static/chunks/1d410ec419d845b5.js\",\"/_next/static/chunks/a802edd4d3033547.js\"],\"Tabs\"]\n28:I[213758,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\",\"/_next/static/chunks/9572667c10e1469b.js\",\"/_next/static/chunks/1d410ec419d845b5.js\",\"/_next/static/chunks/a802edd4d3033547.js\"],\"Tab\"]\n2b:I[589794,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\",\"/_next/static/chunks/9572667c10e1469b.js\",\"/_next/static/chunks/1d410ec419d845b5.js\",\"/_next/static/chunks/a802edd4d3033547.js\"],\"FilterableLeaderboard\"]\n"])</script><script>self.__next_f.push([1,"1e:[\"$\",\"div\",null,{\"className\":\"flex flex-1 flex-col items-center px-4 py-6 sm:pt-12\",\"children\":[\"$\",\"div\",null,{\"className\":\"flex w-full max-w-7xl flex-col\",\"children\":[[\"$\",\"nav\",null,{\"aria-label\":\"breadcrumb\",\"data-slot\":\"breadcrumb\",\"className\":\"mb-6 hidden font-mono sm:block\",\"children\":[\"$\",\"ol\",null,{\"data-slot\":\"breadcrumb-list\",\"className\":\"text-muted-foreground flex flex-wrap items-center gap-1.5 text-sm break-words sm:gap-2.5\",\"children\":[[\"$\",\"li\",null,{\"data-slot\":\"breadcrumb-item\",\"className\":\"inline-flex items-center gap-1.5\",\"children\":[\"$\",\"a\",null,{\"data-slot\":\"breadcrumb-link\",\"className\":\"hover:text-foreground transition-colors\",\"href\":\"/\",\"children\":\"Home\"}]}],[\"$\",\"li\",null,{\"data-slot\":\"breadcrumb-separator\",\"role\":\"presentation\",\"aria-hidden\":\"true\",\"className\":\"[\u0026\u003esvg]:size-3.5\",\"children\":[\"$\",\"svg\",null,{\"ref\":\"$undefined\",\"xmlns\":\"http://www.w3.org/2000/svg\",\"width\":24,\"height\":24,\"viewBox\":\"0 0 24 24\",\"fill\":\"none\",\"stroke\":\"currentColor\",\"strokeWidth\":2,\"strokeLinecap\":\"round\",\"strokeLinejoin\":\"round\",\"className\":\"lucide lucide-chevron-right\",\"aria-hidden\":\"true\",\"children\":[[\"$\",\"path\",\"mthhwq\",{\"d\":\"m9 18 6-6-6-6\"}],\"$undefined\"]}]}],[\"$\",\"li\",null,{\"data-slot\":\"breadcrumb-item\",\"className\":\"inline-flex items-center gap-1.5\",\"children\":[\"$\",\"a\",null,{\"data-slot\":\"breadcrumb-link\",\"className\":\"hover:text-foreground transition-colors\",\"href\":\"/leaderboard\",\"children\":\"Leaderboards\"}]}],[\"$\",\"li\",null,{\"data-slot\":\"breadcrumb-separator\",\"role\":\"presentation\",\"aria-hidden\":\"true\",\"className\":\"[\u0026\u003esvg]:size-3.5\",\"children\":[\"$\",\"svg\",null,{\"ref\":\"$undefined\",\"xmlns\":\"http://www.w3.org/2000/svg\",\"width\":24,\"height\":24,\"viewBox\":\"0 0 24 24\",\"fill\":\"none\",\"stroke\":\"currentColor\",\"strokeWidth\":2,\"strokeLinecap\":\"round\",\"strokeLinejoin\":\"round\",\"className\":\"lucide lucide-chevron-right\",\"aria-hidden\":\"true\",\"children\":[[\"$\",\"path\",\"mthhwq\",{\"d\":\"m9 18 6-6-6-6\"}],\"$undefined\"]}]}],[\"$\",\"li\",null,{\"data-slot\":\"breadcrumb-item\",\"className\":\"inline-flex items-center gap-1.5\",\"children\":[\"$\",\"span\",null,{\"data-slot\":\"breadcrumb-page\",\"role\":\"link\",\"aria-disabled\":\"true\",\"aria-current\":\"page\",\"className\":\"text-foreground font-normal\",\"children\":[\"terminal-bench\",\"@\",\"2.1\"]}]}]]}]}],[\"$\",\"h2\",null,{\"className\":\"font-mono text-4xl tracking-tighter\",\"children\":[\"terminal-bench\",\"@\",\"2.1\",\" Leaderboard\"]}],[\"$\",\"$L27\",null,{\"items\":[\"New Model\",\"Custom Agent\"],\"className\":\"my-6 font-mono\",\"children\":[[\"$\",\"$L28\",null,{\"value\":\"new model\",\"children\":\"$L29\"}],[\"$\",\"$L28\",null,{\"value\":\"custom agent\",\"children\":\"$L2a\"}]]}],[\"$\",\"$L2b\",null,{\"rows\":[{\"agent\":\"Codex CLI\",\"model\":[\"GPT-5.5\"],\"agentOrganization\":\"OpenAI\",\"modelOrganization\":[\"OpenAI\"],\"date\":\"2026-05-01\",\"accuracy\":0.8337078651685393,\"stderr\":0.011123028018664792,\"integrationMethod\":\"API\",\"agentUrl\":\"https://developers.openai.com/codex/cli\",\"verified\":true,\"agentName\":\"codex\",\"agentVersion\":\"0.125.0\",\"modelNames\":[\"gpt-5.5\"],\"modelProviders\":[\"openai\"],\"key\":\"codex cli__gpt-5.5\"},{\"agent\":\"Claude Code\",\"model\":[\"Claude Opus 4.8\"],\"agentOrganization\":\"Anthropic\",\"modelOrganization\":[\"Anthropic\"],\"date\":\"2026-05-29\",\"accuracy\":0.7887640449438202,\"stderr\":0.012612328270024521,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.claude.com/product/claude-code\",\"verified\":true,\"agentName\":\"claude-code\",\"agentVersion\":\"2.1.152\",\"modelNames\":[\"claude-opus-4-8\"],\"modelProviders\":[\"anthropic\"],\"key\":\"claude code__claude opus 4.8\"},{\"agent\":\"Terminus 2\",\"model\":[\"GPT-5.5\"],\"agentOrganization\":\"Terminal-Bench\",\"modelOrganization\":[\"OpenAI\"],\"date\":\"2026-05-01\",\"accuracy\":0.7820224719101123,\"stderr\":0.011996717137113833,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.tbench.ai/terminus\",\"verified\":true,\"agentName\":\"terminus-2\",\"agentVersion\":\"2.0.0\",\"modelNames\":[\"gpt-5.5\"],\"modelProviders\":[\"openai\"],\"key\":\"terminus 2__gpt-5.5\"},{\"agent\":\"Terminus 2\",\"model\":[\"Claude Opus 4.8\"],\"agentOrganization\":\"Terminal-Bench\",\"modelOrganization\":[\"Anthropic\"],\"date\":\"2026-05-29\",\"accuracy\":0.7460674157303371,\"stderr\":0.012308372078767778,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.tbench.ai/terminus\",\"verified\":true,\"agentName\":\"terminus-2\",\"agentVersion\":\"2.0.0\",\"modelNames\":[\"claude-opus-4-8\"],\"modelProviders\":[\"anthropic\"],\"key\":\"terminus 2__claude opus 4.8\"},{\"agent\":\"Terminus 2\",\"model\":[\"Gemini 3 Pro\"],\"agentOrganization\":\"Terminal-Bench\",\"modelOrganization\":[\"Google\"],\"date\":\"2026-05-01\",\"accuracy\":0.7438202247191011,\"stderr\":0.013199258566821045,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.tbench.ai/terminus\",\"verified\":true,\"agentName\":\"terminus-2\",\"agentVersion\":\"2.0.0\",\"modelNames\":[\"gemini-3-pro-preview\"],\"modelProviders\":[\"gemini\"],\"key\":\"terminus 2__gemini 3 pro\"},{\"agent\":\"Gemini CLI\",\"model\":[\"Gemini 3.1 Pro\"],\"agentOrganization\":\"Google\",\"modelOrganization\":[\"Google\"],\"date\":\"2026-05-05\",\"accuracy\":0.7065543071161049,\"stderr\":0.014843703568740315,\"integrationMethod\":\"API\",\"agentUrl\":\"https://github.com/google-gemini/gemini-cli\",\"verified\":true,\"agentName\":\"gemini-cli\",\"agentVersion\":\"0.40.0\",\"modelNames\":[\"gemini-3.1-pro-preview\"],\"modelProviders\":[\"gemini\"],\"key\":\"gemini cli__gemini 3.1 pro\"},{\"agent\":\"Terminus 2\",\"model\":[\"Gemini 3.1 Pro\"],\"agentOrganization\":\"Terminal-Bench\",\"modelOrganization\":[\"Google\"],\"date\":\"2026-05-05\",\"accuracy\":0.7031835205992509,\"stderr\":0.014791636846043224,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.tbench.ai/terminus\",\"verified\":true,\"agentName\":\"terminus-2\",\"agentVersion\":\"2.0.0\",\"modelNames\":[\"gemini-3.1-pro-preview\"],\"modelProviders\":[\"gemini\"],\"key\":\"terminus 2__gemini 3.1 pro\"},{\"agent\":\"Claude Code\",\"model\":[\"Claude Opus 4.7\"],\"agentOrganization\":\"Anthropic\",\"modelOrganization\":[\"Anthropic\"],\"date\":\"2026-05-01\",\"accuracy\":0.6971910112359551,\"stderr\":0.013864003010396704,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.claude.com/product/claude-code\",\"verified\":true,\"agentName\":\"claude-code\",\"agentVersion\":\"2.1.123\",\"modelNames\":[\"claude-opus-4-7\"],\"modelProviders\":[\"anthropic\"],\"key\":\"claude code__claude opus 4.7\"},{\"agent\":\"Gemini CLI\",\"model\":[\"Gemini 3 Pro\"],\"agentOrganization\":\"Google\",\"modelOrganization\":[\"Google\"],\"date\":\"2026-05-02\",\"accuracy\":0.6629213483146067,\"stderr\":0.013669129281569032,\"integrationMethod\":\"API\",\"agentUrl\":\"https://github.com/google-gemini/gemini-cli\",\"verified\":true,\"agentName\":\"gemini-cli\",\"agentVersion\":\"0.40.0\",\"modelNames\":[\"gemini-3-pro-preview\"],\"modelProviders\":[\"gemini\"],\"key\":\"gemini cli__gemini 3 pro\"},{\"agent\":\"Terminus 2\",\"model\":[\"Claude Opus 4.7\"],\"agentOrganization\":\"Terminal-Bench\",\"modelOrganization\":[\"Anthropic\"],\"date\":\"2026-05-01\",\"accuracy\":0.6606741573033708,\"stderr\":0.013669129281569032,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.tbench.ai/terminus\",\"verified\":true,\"agentName\":\"terminus-2\",\"agentVersion\":\"2.0.0\",\"modelNames\":[\"claude-opus-4-7\"],\"modelProviders\":[\"anthropic\"],\"key\":\"terminus 2__claude opus 4.7\"},{\"agent\":\"Claude Code\",\"model\":[\"GLM 5.1\"],\"agentOrganization\":\"Anthropic\",\"modelOrganization\":[\"Z-AI\"],\"date\":\"2026-05-02\",\"accuracy\":0.5865168539325842,\"stderr\":0.012410517996839619,\"integrationMethod\":\"API\",\"agentUrl\":\"https://www.claude.com/product/claude-code\",\"verified\":true,\"agentName\":\"claude-code\",\"agentVersion\":\"2.1.123\",\"modelNames\":[\"glm-5.1\"],\"modelProviders\":[\"z-ai\"],\"key\":\"claude code__glm 5.1\"}],\"className\":\"-mx-4 md:mx-0\",\"name\":\"terminal-bench\",\"version\":\"2.1\"}]]}]}]\n"])</script><script>self.__next_f.push([1,"2c:I[144324,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\",\"/_next/static/chunks/9572667c10e1469b.js\",\"/_next/static/chunks/1d410ec419d845b5.js\",\"/_next/static/chunks/a802edd4d3033547.js\"],\"CodeBlock\"]\n2d:I[144324,[\"/_next/static/chunks/772cb4e079383472.js\",\"/_next/static/chunks/cc5200c2c4db5d30.js\",\"/_next/static/chunks/b881789f47bb99ed.js\",\"/_next/static/chunks/d48a38d803cc289b.js\",\"/_next/static/chunks/afcfef2b68fe9eaf.js\",\"/_next/static/chunks/befa77c9ca59daf6.js\",\"/_next/static/chunks/9eef2e45c165ec54.js\",\"/_next/static/chunks/9572667c10e1469b.js\",\"/_next/static/chunks/1d410ec419d845b5.js\",\"/_next/static/chunks/a802edd4d3033547.js\"],\"Pre\"]\n"])</script><script>self.__next_f.push([1,"29:[\"$\",\"$L2c\",null,{\"title\":\"Note: submissions may not modify timeouts or resources\",\"className\":\"my-0\",\"children\":[[\"$\",\"$L2d\",\"pre-0\",{\"className\":\"shiki shiki-themes github-light github-dark\",\"style\":{\"--shiki-light\":\"#24292e\",\"--shiki-dark\":\"#e1e4e8\",\"--shiki-light-bg\":\"#fff\",\"--shiki-dark-bg\":\"#24292e\"},\"tabIndex\":\"0\",\"children\":[\"$\",\"code\",\"code-0\",{\"children\":[\"$\",\"span\",\"span-0\",{\"className\":\"line\",\"children\":[[\"$\",\"span\",\"span-0\",{\"style\":{\"--shiki-light\":\"#6F42C1\",\"--shiki-dark\":\"#B392F0\"},\"children\":\"harbor\"}],[\"$\",\"span\",\"span-1\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" run\"}],[\"$\",\"span\",\"span-2\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" -d\"}],[\"$\",\"span\",\"span-3\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" terminal-bench/terminal-bench-2-1\"}],[\"$\",\"span\",\"span-4\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" -a\"}],[\"$\",\"span\",\"span-5\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" \\\"agent\\\"\"}],[\"$\",\"span\",\"span-6\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" -m\"}],[\"$\",\"span\",\"span-7\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" \\\"model\\\"\"}],[\"$\",\"span\",\"span-8\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" -k\"}],[\"$\",\"span\",\"span-9\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" 5\"}]]}]}]}]]}]\n"])</script><script>self.__next_f.push([1,"2a:[\"$\",\"$L2c\",null,{\"title\":\"Note: submissions may not modify timeouts or resources\",\"className\":\"my-0\",\"children\":[[\"$\",\"$L2d\",\"pre-0\",{\"className\":\"shiki shiki-themes github-light github-dark\",\"style\":{\"--shiki-light\":\"#24292e\",\"--shiki-dark\":\"#e1e4e8\",\"--shiki-light-bg\":\"#fff\",\"--shiki-dark-bg\":\"#24292e\"},\"tabIndex\":\"0\",\"children\":[\"$\",\"code\",\"code-0\",{\"children\":[\"$\",\"span\",\"span-0\",{\"className\":\"line\",\"children\":[[\"$\",\"span\",\"span-0\",{\"style\":{\"--shiki-light\":\"#6F42C1\",\"--shiki-dark\":\"#B392F0\"},\"children\":\"harbor\"}],[\"$\",\"span\",\"span-1\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" run\"}],[\"$\",\"span\",\"span-2\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" -d\"}],[\"$\",\"span\",\"span-3\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" terminal-bench/terminal-bench-2-1\"}],[\"$\",\"span\",\"span-4\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" --agent-import-path\"}],[\"$\",\"span\",\"span-5\",{\"style\":{\"--shiki-light\":\"#032F62\",\"--shiki-dark\":\"#9ECBFF\"},\"children\":\" \\\"path.to.agent:SomeAgent\\\"\"}],[\"$\",\"span\",\"span-6\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" -k\"}],[\"$\",\"span\",\"span-7\",{\"style\":{\"--shiki-light\":\"#005CC5\",\"--shiki-dark\":\"#79B8FF\"},\"children\":\" 5\"}]]}]}]}]]}]\n"])</script></body></html><!-- This script is automatically inserted by Netlify for Real User Monitoring (RUM). -->
2
+ <script async id="netlify-rum-container" src="/.netlify/scripts/rum" data-netlify-cwv-token="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzaXRlX2lkIjoiNDRiM2E4MTMtMzM1Ny00MWMzLWJmMGYtZjIyOGYzODI2MGIwIiwiYWNjb3VudF9pZCI6IjY3OWQ1NjE3ZjZkNWQ4NzQ0NTk0YzY1MSIsImRlcGxveV9pZCI6IjZhMzBlODJmN2ZkZDY0MDAwODI3ZmQ4MyIsImlzcyI6Im5ldGxpZnkifQ.mIIcdTa128sFkUfAQ9bZ8xsW2hOEQcnXsyy37Oagwp8"></script>
@@ -0,0 +1,38 @@
1
+ {
2
+ "description": "Example only. Excluded unless --include-examples is passed.",
3
+ "rows": [
4
+ {
5
+ "source": "swebench_verified",
6
+ "benchmark": "swebench_verified",
7
+ "url": "https://www.swebench.com/",
8
+ "modelRaw": "Claude Sonnet",
9
+ "metric": "resolved",
10
+ "score": 72,
11
+ "scoreScale": "percent",
12
+ "sampleSize": 500,
13
+ "date": "2026-06-01"
14
+ },
15
+ {
16
+ "source": "aider_polyglot",
17
+ "benchmark": "aider_polyglot",
18
+ "url": "https://aider.chat/docs/leaderboards/",
19
+ "modelRaw": "Claude Sonnet",
20
+ "metric": "percent_correct",
21
+ "score": 68,
22
+ "scoreScale": "percent",
23
+ "sampleSize": 225,
24
+ "date": "2026-06-01"
25
+ },
26
+ {
27
+ "source": "evalplus",
28
+ "benchmark": "evalplus",
29
+ "url": "https://github.com/evalplus/evalplus",
30
+ "modelRaw": "gpt-5.4-mini",
31
+ "metric": "pass_at_1",
32
+ "score": 0.74,
33
+ "scoreScale": "0-1",
34
+ "sampleSize": 378,
35
+ "date": "2026-05-20"
36
+ }
37
+ ]
38
+ }