thumbgate 1.27.12 → 1.27.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/.well-known/llms.txt +2 -1
  3. package/.well-known/mcp/server-card.json +1 -1
  4. package/README.md +2 -4
  5. package/adapters/claude/.mcp.json +2 -2
  6. package/adapters/mcp/server-stdio.js +1 -1
  7. package/adapters/opencode/opencode.json +1 -1
  8. package/adapters/policy-engine/ethicore-guardian-client.js +68 -0
  9. package/adapters/policy-engine/thumbgate-policy-engine-adapter.js +260 -0
  10. package/bin/cli.js +78 -259
  11. package/config/gate-templates.json +0 -228
  12. package/config/gates/claim-verification.json +0 -18
  13. package/package.json +35 -25
  14. package/public/assets/brand/thumbgate-logo-transparent.svg +22 -0
  15. package/public/assets/brand/thumbgate-mark-inline-v3.svg +19 -0
  16. package/public/assets/brand/thumbgate-mark.svg +11 -5
  17. package/public/blog.html +0 -30
  18. package/public/brand/thumbgate-mark.svg +9 -5
  19. package/public/chatgpt-app.html +2 -2
  20. package/public/compare.html +2 -1
  21. package/public/dashboard.html +1 -1
  22. package/public/federal.html +1 -1
  23. package/public/index.html +95 -216
  24. package/public/learn.html +59 -35
  25. package/public/lessons.html +1 -1
  26. package/public/numbers.html +2 -2
  27. package/public/pro.html +7 -7
  28. package/scripts/agent-readiness.js +142 -0
  29. package/scripts/aws-blocks-guardrails.js +228 -0
  30. package/scripts/cli-schema.js +22 -10
  31. package/scripts/dashboard-chat.js +2 -1
  32. package/scripts/document-intake.js +1 -49
  33. package/scripts/durability/step.js +3 -3
  34. package/scripts/gate-stats.js +5 -11
  35. package/scripts/gates-engine.js +0 -49
  36. package/scripts/gemini-embedding-policy.js +2 -1
  37. package/scripts/hook-stop-anti-claim.js +116 -184
  38. package/scripts/hosted-config.js +0 -12
  39. package/scripts/lesson-search.js +1 -15
  40. package/scripts/llm-client.js +187 -5
  41. package/scripts/plausible-domain-config.js +3 -1
  42. package/scripts/seo-gsd.js +240 -1
  43. package/scripts/tool-registry.js +2 -2
  44. package/scripts/vector-store.js +44 -0
  45. package/scripts/workspace-evolver.js +62 -2
  46. package/src/api/server.js +340 -131
  47. package/public/assets/brand/thumbgate-mark-inline.svg +0 -15
  48. package/public/compare/adopt-ai.html +0 -219
  49. package/public/compare/agentix-labs.html +0 -197
  50. package/public/compare/ai-experience-orchestration.html +0 -216
  51. package/public/compare/anthropic-claude-for-legal.html +0 -260
  52. package/public/compare/anthropic-containment.html +0 -280
  53. package/public/compare/arcade.html +0 -175
  54. package/public/compare/arcjet.html +0 -239
  55. package/public/compare/bumblebee.html +0 -307
  56. package/public/compare/claude-code-hooks.html +0 -294
  57. package/public/compare/databricks-unity-ai-gateway.html +0 -215
  58. package/public/compare/fallow.html +0 -351
  59. package/public/compare/heidi.html +0 -233
  60. package/public/compare/mem0.html +0 -342
  61. package/public/compare/oak-and-sparrow-gatekeeper.html +0 -289
  62. package/public/compare/rein.html +0 -236
  63. package/public/compare/sigmashake.html +0 -256
  64. package/public/compare/speclock.html +0 -342
  65. package/public/guides/agent-harness-optimization.html +0 -342
  66. package/public/guides/agentic-web-governance.html +0 -406
  67. package/public/guides/ai-agent-governance-sprint.html +0 -415
  68. package/public/guides/ai-agent-pre-action-approval-gates.html +0 -401
  69. package/public/guides/ai-agent-workflow-migration-checklist.html +0 -392
  70. package/public/guides/ai-deployment-readiness.html +0 -415
  71. package/public/guides/ai-mode-ads-agent-governance.html +0 -401
  72. package/public/guides/ai-search-topical-presence.html +0 -342
  73. package/public/guides/autoresearch-agent-safety.html +0 -342
  74. package/public/guides/background-agent-governance.html +0 -358
  75. package/public/guides/best-tools-stop-ai-agents-breaking-production.html +0 -363
  76. package/public/guides/browser-automation-safety.html +0 -342
  77. package/public/guides/chatgpt-ads-trust.html +0 -353
  78. package/public/guides/claude-code-feedback.html +0 -339
  79. package/public/guides/claude-code-prevent-repeated-mistakes.html +0 -161
  80. package/public/guides/claude-code-skills-guardrails.html +0 -343
  81. package/public/guides/claude-desktop.html +0 -356
  82. package/public/guides/code-knowledge-graph-guardrails.html +0 -365
  83. package/public/guides/codex-cli-guardrails.html +0 -339
  84. package/public/guides/cursor-agent-guardrails.html +0 -339
  85. package/public/guides/cursor-prevent-repeated-mistakes.html +0 -161
  86. package/public/guides/database-agent-safety.html +0 -406
  87. package/public/guides/deepseek-v4-runtime-guardrails.html +0 -346
  88. package/public/guides/developer-machine-supply-chain-guardrails.html +0 -358
  89. package/public/guides/gcp-mcp-guardrails.html +0 -147
  90. package/public/guides/gemini-cli-feedback-memory.html +0 -339
  91. package/public/guides/gpt-5-5-model-evaluation.html +0 -358
  92. package/public/guides/internal-ai-engineering-stack-guardrails.html +0 -348
  93. package/public/guides/long-running-agent-context-management.html +0 -346
  94. package/public/guides/mcp-tool-governance.html +0 -401
  95. package/public/guides/multica-thumbgate-setup.html +0 -134
  96. package/public/guides/native-messaging-host-security.html +0 -342
  97. package/public/guides/policy-engine-pre-action-gates.html +0 -346
  98. package/public/guides/pre-action-checks.html +0 -342
  99. package/public/guides/pretooluse-hooks-vs-advisory-prompt-rules.html +0 -342
  100. package/public/guides/prompt-tricks-to-workflow-rules.html +0 -365
  101. package/public/guides/proxy-pointer-rag-guardrails.html +0 -352
  102. package/public/guides/rag-precision-tuning-guardrails.html +0 -352
  103. package/public/guides/reasoning-compression-guardrails.html +0 -346
  104. package/public/guides/relational-knowledge-ai-recommendations.html +0 -342
  105. package/public/guides/roo-code-alternative-cline.html +0 -339
  106. package/public/guides/semantic-programmatic-seo-guardrails.html +0 -352
  107. package/public/guides/seo-agent-skills-guardrails.html +0 -344
  108. package/public/guides/stop-repeated-ai-agent-mistakes.html +0 -342
  109. package/public/learn/ac-dc-runtime-enforcement.html +0 -277
  110. package/public/learn/agent-harness-pattern.html +0 -181
  111. package/public/learn/agent-identity-connector-governance.html +0 -146
  112. package/public/learn/agent-swarms-shared-gates.html +0 -173
  113. package/public/learn/agentic-enterprise-context-brain.html +0 -117
  114. package/public/learn/agentic-os-team-governance.html +0 -146
  115. package/public/learn/ai-agent-governance.html +0 -158
  116. package/public/learn/ai-agent-persistent-memory.html +0 -211
  117. package/public/learn/anthropomorphic-claim-gates.html +0 -180
  118. package/public/learn/background-agent-control-layer.html +0 -184
  119. package/public/learn/claude-code-goal-with-rubrics.html +0 -205
  120. package/public/learn/codex-role-plugins-need-governance.html +0 -125
  121. package/public/learn/cost-aware-agent-gate-routing.html +0 -173
  122. package/public/learn/databricks-unity-ai-gateway-runtime-governance.html +0 -157
  123. package/public/learn/deterministic-agent-workflows.html +0 -185
  124. package/public/learn/feedback-loop-vs-decision-layer.html +0 -283
  125. package/public/learn/from-prototype-to-production.html +0 -223
  126. package/public/learn/learn.css +0 -51
  127. package/public/learn/mcp-pre-action-checks-explained.html +0 -172
  128. package/public/learn/pretix-stripe-connect-marketplaces.html +0 -161
  129. package/public/learn/regulated-agent-execution-boundary.html +0 -196
  130. package/public/learn/spec-driven-development.html +0 -168
  131. package/public/learn/stop-ai-agent-force-push.html +0 -134
  132. package/public/learn/vibe-coding-safety-net.html +0 -142
  133. package/scripts/reddit-browser-notification-watch.js +0 -230
@@ -1,346 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>DeepSeek V4 Runtime Guardrails | Sparse Attention, Speculation, and Verified RL</title>
7
- <meta name="description" content="DeepSeek-V4 introduces long-context sparse attention, speculative decoding, KV offload, FP4/FP8 paths, and verified-RL replay concerns. ThumbGate turns those..." />
8
- <meta property="og:title" content="DeepSeek V4 Runtime Guardrails | Sparse Attention, Speculation, and Verified RL" />
9
- <meta property="og:description" content="DeepSeek-V4 introduces long-context sparse attention, speculative decoding, KV offload, FP4/FP8 paths, and verified-RL replay concerns. ThumbGate turns those..." />
10
- <meta property="og:type" content="article" />
11
- <meta property="og:url" content="https://thumbgate.ai/guides/deepseek-v4-runtime-guardrails" />
12
- <link rel="canonical" href="https://thumbgate.ai/guides/deepseek-v4-runtime-guardrails" />
13
- <link rel="llm-context" href="/llm-context.md" type="text/markdown" />
14
- <link rel="icon" type="image/svg+xml" href="/thumbgate-icon.png" />
15
- <link rel="apple-touch-icon" href="/assets/brand/thumbgate-mark.svg" />
16
- <meta property="og:image" content="/og.png" />
17
- <style>
18
- :root {
19
- --bg: #0a0a0b;
20
- --bg-raised: #111113;
21
- --bg-card: #161618;
22
- --line: #222225;
23
- --text: #e8e8ec;
24
- --muted: #8b8b96;
25
- --cyan: #22d3ee;
26
- --green: #4ade80;
27
- --red: #f87171;
28
- }
29
- * { box-sizing: border-box; }
30
- body {
31
- margin: 0;
32
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
33
- background: var(--bg);
34
- color: var(--text);
35
- line-height: 1.65;
36
- }
37
- a { color: var(--cyan); text-decoration: none; }
38
- a:hover { text-decoration: underline; }
39
- .container { max-width: 980px; margin: 0 auto; padding: 0 24px; }
40
- .topbar {
41
- position: sticky;
42
- top: 0;
43
- z-index: 20;
44
- backdrop-filter: blur(12px);
45
- background: rgba(10, 10, 11, 0.88);
46
- border-bottom: 1px solid var(--line);
47
- }
48
- .topbar .container {
49
- display: flex;
50
- justify-content: space-between;
51
- align-items: center;
52
- padding-top: 14px;
53
- padding-bottom: 14px;
54
- }
55
- .brand {
56
- font-weight: 700;
57
- color: var(--text);
58
- display: inline-flex;
59
- align-items: center;
60
- gap: 8px;
61
- text-decoration: none;
62
- }
63
- .brand .logo-mark { width: 28px; height: 28px; display: block; }
64
- .hero { padding: 72px 0 32px; }
65
- .eyebrow {
66
- display: inline-flex;
67
- align-items: center;
68
- gap: 8px;
69
- padding: 6px 12px;
70
- border-radius: 999px;
71
- border: 1px solid rgba(34, 211, 238, 0.22);
72
- background: rgba(34, 211, 238, 0.1);
73
- color: var(--cyan);
74
- text-transform: uppercase;
75
- letter-spacing: 0.08em;
76
- font-size: 12px;
77
- font-weight: 700;
78
- }
79
- h1 {
80
- font-size: clamp(34px, 5vw, 56px);
81
- line-height: 1.06;
82
- letter-spacing: -0.04em;
83
- margin: 16px 0;
84
- max-width: 760px;
85
- }
86
- .hero p {
87
- max-width: 720px;
88
- color: var(--muted);
89
- font-size: 18px;
90
- }
91
- .signal-row {
92
- display: flex;
93
- flex-wrap: wrap;
94
- gap: 12px;
95
- margin: 28px 0 0;
96
- }
97
- .signal-pill {
98
- display: inline-flex;
99
- align-items: center;
100
- gap: 8px;
101
- padding: 10px 14px;
102
- border-radius: 999px;
103
- border: 1px solid var(--line);
104
- background: var(--bg-raised);
105
- font-weight: 600;
106
- font-size: 14px;
107
- }
108
- .signal-pill.up {
109
- border-color: rgba(74, 222, 128, 0.28);
110
- color: #b8f7c8;
111
- background: rgba(74, 222, 128, 0.1);
112
- }
113
- .signal-pill.down {
114
- border-color: rgba(248, 113, 113, 0.28);
115
- color: #ffc0c0;
116
- background: rgba(248, 113, 113, 0.1);
117
- }
118
- .grid {
119
- display: grid;
120
- grid-template-columns: minmax(0, 2fr) minmax(280px, 1fr);
121
- gap: 24px;
122
- padding-bottom: 72px;
123
- }
124
- .card, .detail-section, .sidebar-card {
125
- background: var(--bg-card);
126
- border: 1px solid var(--line);
127
- border-radius: 16px;
128
- }
129
- .card { padding: 24px; }
130
- .detail-section { padding: 24px; margin-bottom: 18px; }
131
- .detail-section h2 { margin: 0 0 12px; font-size: 24px; letter-spacing: -0.03em; }
132
- .detail-section p { color: var(--muted); }
133
- .detail-section ul, .card ul { padding-left: 18px; color: var(--muted); }
134
- .card h2 { margin-top: 0; }
135
- .sidebar {
136
- display: flex;
137
- flex-direction: column;
138
- gap: 18px;
139
- }
140
- .sidebar-card {
141
- padding: 20px;
142
- }
143
- /* Only the first sidebar card sticks. Stacking multiple stickies at the
144
- same top offset makes them overlap each other on scroll. The related-
145
- pages card flows normally below. */
146
- .sidebar-card:first-child {
147
- position: sticky;
148
- top: 84px;
149
- max-height: calc(100vh - 104px);
150
- overflow-y: auto;
151
- -webkit-overflow-scrolling: touch;
152
- }
153
- .proof-links {
154
- display: flex;
155
- flex-wrap: wrap;
156
- gap: 12px;
157
- margin-top: 16px;
158
- }
159
- .cta-button {
160
- display: inline-flex;
161
- align-items: center;
162
- justify-content: center;
163
- margin-top: 18px;
164
- padding: 12px 16px;
165
- border-radius: 10px;
166
- background: var(--cyan);
167
- color: #071116;
168
- font-weight: 700;
169
- text-decoration: none;
170
- }
171
- .faq-item {
172
- border-top: 1px solid var(--line);
173
- padding: 14px 0;
174
- }
175
- .faq-item summary {
176
- cursor: pointer;
177
- font-weight: 600;
178
- }
179
- .faq-item p {
180
- color: var(--muted);
181
- }
182
- .related-card {
183
- display: block;
184
- padding: 14px;
185
- border-radius: 12px;
186
- border: 1px solid var(--line);
187
- background: var(--bg-raised);
188
- margin-top: 12px;
189
- color: var(--text);
190
- }
191
- .related-label {
192
- display: block;
193
- color: var(--muted);
194
- font-size: 12px;
195
- text-transform: uppercase;
196
- letter-spacing: 0.08em;
197
- margin-bottom: 4px;
198
- }
199
- @media (max-width: 860px) {
200
- .grid {
201
- grid-template-columns: 1fr;
202
- }
203
- .sidebar-card:first-child {
204
- position: static;
205
- max-height: none;
206
- overflow: visible;
207
- }
208
- }
209
- </style>
210
- <script type="application/ld+json">
211
- {
212
- "@context": "https://schema.org",
213
- "@type": "TechArticle",
214
- "headline": "DeepSeek-V4 Needs Runtime Guardrails Before Production Routing",
215
- "description": "DeepSeek-V4 introduces long-context sparse attention, speculative decoding, KV offload, FP4/FP8 paths, and verified-RL replay concerns. ThumbGate turns those...",
216
- "about": [
217
- "claude code masterclass guardrails",
218
- "cursor prevent repeated mistakes",
219
- "claude code prevent repeated mistakes",
220
- "codex cli guardrails"
221
- ],
222
- "url": "https://thumbgate.ai/guides/deepseek-v4-runtime-guardrails",
223
- "publisher": {
224
- "@type": "Organization",
225
- "name": "ThumbGate",
226
- "url": "https://thumbgate.ai"
227
- },
228
- "mainEntityOfPage": "https://thumbgate.ai/guides/deepseek-v4-runtime-guardrails"
229
- }
230
- </script>
231
- <script type="application/ld+json">
232
- {
233
- "@context": "https://schema.org",
234
- "@type": "FAQPage",
235
- "mainEntity": [
236
- {
237
- "@type": "Question",
238
- "name": "Should ThumbGate switch to DeepSeek-V4 by default?",
239
- "acceptedAnswer": {
240
- "@type": "Answer",
241
- "text": "No. Treat DeepSeek-V4 as a candidate for self-hosted long-context workloads. Route to it only after ThumbGate benchmarks pass for quality, cache coherence, latency, cost, and rollback behavior."
242
- }
243
- },
244
- {
245
- "@type": "Question",
246
- "name": "What is different about DeepSeek-V4 governance?",
247
- "acceptedAnswer": {
248
- "@type": "Answer",
249
- "text": "The risk is not only model quality. Hybrid sparse attention, speculative decoding, KV offload, mixed precision, and RL replay can all create silent runtime failures unless each change is gated with evidence."
250
- }
251
- }
252
- ]
253
- }
254
- </script>
255
- </head>
256
- <body>
257
- <div class="topbar">
258
- <div class="container">
259
- <a class="brand" href="/"><img src="/assets/brand/thumbgate-mark-inline.svg" alt="ThumbGate" class="logo-mark" width="28" height="28"><span class="logo-text">ThumbGate</span></a>
260
- <a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/VERIFICATION_EVIDENCE.md" target="_blank" rel="noopener">Verification evidence</a>
261
- </div>
262
- </div>
263
-
264
- <main class="container">
265
- <section class="hero">
266
- <div class="eyebrow">guide | deepseek v4 runtime guardrails</div>
267
- <h1>DeepSeek-V4 Needs Runtime Guardrails Before Production Routing</h1>
268
- <p>DeepSeek-V4 introduces long-context sparse attention, speculative decoding, KV offload, FP4/FP8 paths, and verified-RL replay concerns. ThumbGate turns those runtime signals into pre-action checks before model-routing or training changes go live.</p>
269
- <div class="signal-row">
270
- <div class="signal-pill up">👍 Thumbs up reinforces good behavior</div>
271
- <div class="signal-pill down">👎 Thumbs down blocks repeated mistakes</div>
272
- </div>
273
- </section>
274
-
275
- <section class="grid">
276
- <div>
277
- <div class="card">
278
- <h2>Why this page exists</h2>
279
- <ul><li>The high-ROI move is not blindly switching models; it is benchmarking DeepSeek-V4 behind cache, speculation, precision, and replay gates.</li><li>Hybrid sparse attention changes prefix-cache assumptions, so cache coherence and rollback evidence must exist before routing long traces.</li><li>ThumbGate now exposes npx thumbgate deepseek-v4-runtime-guardrails for self-hosted long-context model rollouts.</li></ul>
280
- </div>
281
-
282
- <section class="detail-section">
283
- <h2>Why this helps ThumbGate</h2>
284
- <p>Teams adopting SGLang-style DeepSeek-V4 serving are exactly the buyers who need agent governance: they are optimizing cost and throughput while increasing context length and system complexity.</p><p>ThumbGate can sit above the runtime as the policy layer that blocks unsafe routing changes, requires benchmark proof, and keeps self-hosted model experiments from becoming invisible production risk.</p>
285
-
286
- </section>
287
- <section class="detail-section">
288
- <h2>High-ROI runtime gates</h2>
289
-
290
- <ul><li>Require hybrid prefix-cache coherence eval before enabling long-context cache reuse.</li><li>Checkpoint speculative decoding acceptance length, rollback behavior, and correctness before treating it as a speedup.</li><li>Require long-context KV capacity and offload plans before 128k+ or 1M-token routing.</li><li>Require rollout routing replay, indexer replay, and train-inference drift checks before RL or fine-tuning updates.</li><li>Checkpoint FP4/FP8 mixed-precision determinism before benchmark results update routing.</li><li>CLI path: npx thumbgate deepseek-v4-runtime-guardrails --context-tokens=900000 --hybrid-attention --speculative-decoding --accept-length=1.4 --precision-mode=fp8 --json.</li></ul>
291
- </section>
292
- <section class="detail-section">
293
- <h2>Where this creates revenue</h2>
294
- <p>This gives ThumbGate a serious infrastructure story for teams moving beyond hosted APIs. The offer is a Workflow Hardening Sprint around one model-routing lane: prove the runtime change, gate the risks, and keep the agent from silently routing expensive work through an unverified path.</p>
295
-
296
- </section>
297
- <div class="detail-section">
298
- <h2>FAQ</h2>
299
-
300
- <details class="faq-item">
301
- <summary>Should ThumbGate switch to DeepSeek-V4 by default?</summary>
302
- <p>No. Treat DeepSeek-V4 as a candidate for self-hosted long-context workloads. Route to it only after ThumbGate benchmarks pass for quality, cache coherence, latency, cost, and rollback behavior.</p>
303
- </details>
304
- <details class="faq-item">
305
- <summary>What is different about DeepSeek-V4 governance?</summary>
306
- <p>The risk is not only model quality. Hybrid sparse attention, speculative decoding, KV offload, mixed precision, and RL replay can all create silent runtime failures unless each change is gated with evidence.</p>
307
- </details>
308
- </div>
309
- </div>
310
-
311
- <aside class="sidebar">
312
-
313
-
314
-
315
-
316
- <div class="sidebar-card">
317
- <h2>GSD execution brief</h2>
318
- <p>This page was prioritized because it captures high-intent demand around deepseek v4 runtime guardrails and feeds directly into ThumbGate's proof-led conversion path.</p>
319
- <p><strong>Opportunity score:</strong> 83</p>
320
- <p><strong>Primary persona:</strong> engineering-lead</p>
321
- <p><strong>Keyword cluster:</strong> claude code masterclass guardrails, cursor prevent repeated mistakes, claude code prevent repeated mistakes, codex cli guardrails</p>
322
- <p><strong>Pricing:</strong> Pro $19/mo or $149/yr. Team $49/seat/mo.</p>
323
- <div class="proof-links"><a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/VERIFICATION_EVIDENCE.md" target="_blank" rel="noopener">Verification evidence</a><a href="https://github.com/IgorGanapolsky/ThumbGate/blob/main/proof/automation/report.json" target="_blank" rel="noopener">Automation proof</a><a href="https://github.com/IgorGanapolsky/ThumbGate" target="_blank" rel="noopener">GitHub repository</a></div>
324
- <a class="cta-button" href="/checkout/pro?utm_source=website&amp;utm_medium=seo_page&amp;utm_campaign=guides_deepseek-v4-runtime-guardrails&amp;cta_placement=seo_brief&amp;plan_id=pro" target="_blank" rel="noopener">Go Pro — $19/mo</a>
325
- </div>
326
- <div class="sidebar-card">
327
- <h2>Related pages</h2>
328
-
329
- <a class="related-card" href="/guides/reasoning-compression-guardrails">
330
- <span class="related-label">Related page</span>
331
- <strong>Reasoning Compression Needs Step-Level Safety Checks</strong>
332
- </a>
333
- <a class="related-card" href="/guides/long-running-agent-context-management">
334
- <span class="related-label">Related page</span>
335
- <strong>Long-Running Agents Need Structured Memory, Not Raw Chat Logs</strong>
336
- </a>
337
- <a class="related-card" href="/guides/gpt-5-5-model-evaluation">
338
- <span class="related-label">Related page</span>
339
- <strong>Evaluate GPT-5.5 Before You Route Production Agent Work</strong>
340
- </a>
341
- </div>
342
- </aside>
343
- </section>
344
- </main>
345
- </body>
346
- </html>