@f5xc-salesdemos/pi-ai 14.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/CHANGELOG.md +1991 -0
  2. package/README.md +1160 -0
  3. package/package.json +135 -0
  4. package/src/api-registry.ts +95 -0
  5. package/src/auth-storage.ts +2688 -0
  6. package/src/cli.ts +493 -0
  7. package/src/index.ts +42 -0
  8. package/src/model-cache.ts +97 -0
  9. package/src/model-manager.ts +349 -0
  10. package/src/model-thinking.ts +568 -0
  11. package/src/models.json +48758 -0
  12. package/src/models.json.d.ts +9 -0
  13. package/src/models.ts +56 -0
  14. package/src/prompts/turn-aborted-guidance.md +4 -0
  15. package/src/provider-details.ts +81 -0
  16. package/src/provider-models/descriptors.ts +285 -0
  17. package/src/provider-models/google.ts +90 -0
  18. package/src/provider-models/index.ts +4 -0
  19. package/src/provider-models/openai-compat.ts +2096 -0
  20. package/src/provider-models/special.ts +106 -0
  21. package/src/providers/amazon-bedrock.ts +706 -0
  22. package/src/providers/anthropic.ts +1676 -0
  23. package/src/providers/azure-openai-responses.ts +391 -0
  24. package/src/providers/cursor/gen/agent_pb.ts +15274 -0
  25. package/src/providers/cursor/proto/agent.proto +3526 -0
  26. package/src/providers/cursor/proto/buf.gen.yaml +6 -0
  27. package/src/providers/cursor/proto/buf.yaml +17 -0
  28. package/src/providers/cursor.ts +2218 -0
  29. package/src/providers/github-copilot-headers.ts +138 -0
  30. package/src/providers/gitlab-duo.ts +381 -0
  31. package/src/providers/google-gemini-cli.ts +1133 -0
  32. package/src/providers/google-shared.ts +354 -0
  33. package/src/providers/google-vertex.ts +436 -0
  34. package/src/providers/google.ts +381 -0
  35. package/src/providers/kimi.ts +151 -0
  36. package/src/providers/openai-codex/constants.ts +43 -0
  37. package/src/providers/openai-codex/request-transformer.ts +158 -0
  38. package/src/providers/openai-codex/response-handler.ts +81 -0
  39. package/src/providers/openai-codex-responses.ts +2337 -0
  40. package/src/providers/openai-completions-compat.ts +155 -0
  41. package/src/providers/openai-completions.ts +1161 -0
  42. package/src/providers/openai-responses-shared.ts +452 -0
  43. package/src/providers/openai-responses.ts +515 -0
  44. package/src/providers/register-builtins.ts +329 -0
  45. package/src/providers/synthetic.ts +154 -0
  46. package/src/providers/transform-messages.ts +228 -0
  47. package/src/rate-limit-utils.ts +84 -0
  48. package/src/stream.ts +728 -0
  49. package/src/types.ts +542 -0
  50. package/src/usage/claude.ts +337 -0
  51. package/src/usage/gemini.ts +248 -0
  52. package/src/usage/github-copilot.ts +427 -0
  53. package/src/usage/google-antigravity.ts +200 -0
  54. package/src/usage/kimi.ts +286 -0
  55. package/src/usage/minimax-code.ts +31 -0
  56. package/src/usage/openai-codex.ts +387 -0
  57. package/src/usage/zai.ts +247 -0
  58. package/src/usage.ts +130 -0
  59. package/src/utils/abort.ts +36 -0
  60. package/src/utils/anthropic-auth.ts +212 -0
  61. package/src/utils/discovery/antigravity.ts +261 -0
  62. package/src/utils/discovery/codex.ts +371 -0
  63. package/src/utils/discovery/cursor.ts +306 -0
  64. package/src/utils/discovery/gemini.ts +248 -0
  65. package/src/utils/discovery/index.ts +5 -0
  66. package/src/utils/discovery/openai-compatible.ts +224 -0
  67. package/src/utils/event-stream.ts +209 -0
  68. package/src/utils/http-inspector.ts +78 -0
  69. package/src/utils/idle-iterator.ts +176 -0
  70. package/src/utils/json-parse.ts +28 -0
  71. package/src/utils/oauth/alibaba-coding-plan.ts +59 -0
  72. package/src/utils/oauth/anthropic.ts +134 -0
  73. package/src/utils/oauth/api-key-validation.ts +92 -0
  74. package/src/utils/oauth/callback-server.ts +276 -0
  75. package/src/utils/oauth/cerebras.ts +59 -0
  76. package/src/utils/oauth/cloudflare-ai-gateway.ts +48 -0
  77. package/src/utils/oauth/cursor.ts +157 -0
  78. package/src/utils/oauth/github-copilot.ts +345 -0
  79. package/src/utils/oauth/gitlab-duo.ts +123 -0
  80. package/src/utils/oauth/google-antigravity.ts +275 -0
  81. package/src/utils/oauth/google-gemini-cli.ts +334 -0
  82. package/src/utils/oauth/huggingface.ts +62 -0
  83. package/src/utils/oauth/index.ts +504 -0
  84. package/src/utils/oauth/kagi.ts +47 -0
  85. package/src/utils/oauth/kilo.ts +87 -0
  86. package/src/utils/oauth/kimi.ts +251 -0
  87. package/src/utils/oauth/litellm.ts +47 -0
  88. package/src/utils/oauth/lm-studio.ts +40 -0
  89. package/src/utils/oauth/minimax-code.ts +78 -0
  90. package/src/utils/oauth/moonshot.ts +59 -0
  91. package/src/utils/oauth/nanogpt.ts +51 -0
  92. package/src/utils/oauth/nvidia.ts +70 -0
  93. package/src/utils/oauth/oauth.html +199 -0
  94. package/src/utils/oauth/ollama.ts +47 -0
  95. package/src/utils/oauth/openai-codex.ts +190 -0
  96. package/src/utils/oauth/opencode.ts +49 -0
  97. package/src/utils/oauth/parallel.ts +46 -0
  98. package/src/utils/oauth/perplexity.ts +200 -0
  99. package/src/utils/oauth/pkce.ts +18 -0
  100. package/src/utils/oauth/qianfan.ts +58 -0
  101. package/src/utils/oauth/qwen-portal.ts +60 -0
  102. package/src/utils/oauth/synthetic.ts +60 -0
  103. package/src/utils/oauth/tavily.ts +46 -0
  104. package/src/utils/oauth/together.ts +59 -0
  105. package/src/utils/oauth/types.ts +89 -0
  106. package/src/utils/oauth/venice.ts +59 -0
  107. package/src/utils/oauth/vercel-ai-gateway.ts +47 -0
  108. package/src/utils/oauth/vllm.ts +40 -0
  109. package/src/utils/oauth/xiaomi.ts +88 -0
  110. package/src/utils/oauth/zai.ts +60 -0
  111. package/src/utils/oauth/zenmux.ts +51 -0
  112. package/src/utils/overflow.ts +132 -0
  113. package/src/utils/retry-after.ts +110 -0
  114. package/src/utils/retry.ts +93 -0
  115. package/src/utils/schema/CONSTRAINTS.md +160 -0
  116. package/src/utils/schema/adapt.ts +20 -0
  117. package/src/utils/schema/compatibility.ts +397 -0
  118. package/src/utils/schema/dereference.ts +93 -0
  119. package/src/utils/schema/equality.ts +93 -0
  120. package/src/utils/schema/fields.ts +147 -0
  121. package/src/utils/schema/index.ts +9 -0
  122. package/src/utils/schema/normalize-cca.ts +479 -0
  123. package/src/utils/schema/sanitize-google.ts +212 -0
  124. package/src/utils/schema/strict-mode.ts +353 -0
  125. package/src/utils/schema/types.ts +5 -0
  126. package/src/utils/tool-choice.ts +81 -0
  127. package/src/utils/validation.ts +664 -0
  128. package/src/utils.ts +147 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,1991 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [14.0.0] - 2026-04-08
6
+ ### Breaking Changes
7
+
8
+ - Removed `coerceNullStrings` function and its automatic null-string coercion behavior from JSON parsing
9
+
10
+ ### Added
11
+
12
+ - Added support for OpenRouter provider with strict mode detection
13
+ - Added automatic cleaning of literal escape sequences (`\n`, `\t`, `\r`) in JSON parsing to handle LLM encoding confusion
14
+ - Added support for healing JSON with trailing junk after balanced containers (e.g., `]\n</invoke>`)
15
+ - Added `CODEX_STARTUP_EVENT_CHANNEL` constant and `CodexStartupEvent` type for monitoring Codex provider initialization status
16
+ - Added automatic healing of malformed JSON with single-character bracket errors at the end of strings, improving LLM tool argument parsing robustness
17
+
18
+ ## [13.19.0] - 2026-04-05
19
+
20
+ ### Fixed
21
+
22
+ - Fixed GitHub Copilot model context window detection by correcting fallback priority for maxContextWindowTokens and maxPromptTokens
23
+ - Fixed Gemini 2.5 Pro context window detection in GitHub Copilot model limits test
24
+ - Fixed Claude Opus 4.6 context window detection in GitHub Copilot model limits test
25
+ - Fixed Anthropic streaming to suppress transient SDK console errors for malformed SSE keep-alive frames so the TUI only shows surfaced provider errors
26
+
27
+ - Added environment-based credential fallback for the OpenAI Codex provider.
28
+ ## [13.17.6] - 2026-04-01
29
+
30
+ ### Fixed
31
+
32
+ - Fixed Anthropic first-event timeouts to exclude stream connection setup from the watchdog, preserve timeout-specific retry classification after local aborts, and reset retry state cleanly between attempts
33
+
34
+ ## [13.17.5] - 2026-04-01
35
+ ### Changed
36
+
37
+ - Increased default first-event timeout from 15s to 45s to better accommodate longer request setup times
38
+ - Modified first-event watchdog to inherit idle timeout when it exceeds the default, ensuring consistent timeout behavior across different configurations
39
+
40
+ ### Fixed
41
+
42
+ - Fixed first-event watchdog initialization timing so it no longer starts before the actual stream request is created, preventing premature timeouts during request setup
43
+ - Fixed first-event watchdog timing so OpenAI-family providers no longer count slow request setup against the first streamed event timeout, and raised the default first-event timeout to avoid false aborts after long tool turns
44
+
45
+ ## [13.17.2] - 2026-04-01
46
+
47
+ ### Fixed
48
+
49
+ - Fixed OpenAI-family first-event timeouts to preserve provider-specific timeout errors for retry classification instead of flattening them to generic aborts ([#591](https://github.com/can1357/oh-my-pi/issues/591))
50
+
51
+ ## [13.17.1] - 2026-04-01
52
+
53
+ ### Added
54
+
55
+ - Added `thinkingSignature` field to thinking content blocks to preserve the original reasoning field name (e.g., `reasoning_text`, `reasoning_content`) for accurate follow-up requests
56
+ - Added first-event timeout detection for streaming responses to abort stuck requests before user-visible content arrives
57
+ - Added `PI_STREAM_FIRST_EVENT_TIMEOUT_MS` environment variable to configure first-event timeout (defaults to 15 seconds or idle timeout, whichever is lower)
58
+
59
+ ### Changed
60
+
61
+ - Changed thinking block handling to track and distinguish between different reasoning field types, enabling proper field name preservation across multiple turns
62
+
63
+ ### Fixed
64
+
65
+ - Fixed Anthropic stream timeout errors to be properly retried by recognizing first-event timeout messages
66
+ - Fixed stream stall detection to distinguish between first-event timeouts and idle timeouts, enabling faster recovery for stuck connections
67
+
68
+ ### Added
69
+
70
+ - Added Vercel AI Gateway to `/login` providers for interactive API key setup
71
+
72
+ ### Fixed
73
+ - Fixed `omp commit` failing with HTTP 400 errors when using reasoning-enabled models on OpenAI-compatible endpoints that don't support the `developer` role (e.g., GitHub Copilot, custom proxies). Now falls back to `system` role when `developer` is unsupported.
74
+
75
+ ## [13.17.0] - 2026-03-30
76
+
77
+ ### Changed
78
+
79
+ - Bumped zai provider default model from glm-4.6 to glm-5.1
80
+
81
+ ## [13.16.5] - 2026-03-29
82
+
83
+ ### Added
84
+
85
+ - Added Gemma 3 27B model support for Google Generative AI
86
+
87
+ ### Changed
88
+
89
+ - Updated Kwaipilot KAT-Coder-Pro V2 model display name and pricing information
90
+ - Updated Kwaipilot KAT-Coder-Pro V2 context window from 222,222 to 256,000 tokens and max tokens from 8,888 to 80,000
91
+
92
+ ### Fixed
93
+
94
+ - Fixed normalizeAnthropicBaseUrl returning empty string instead of undefined when baseUrl is empty
95
+
96
+ ## [13.16.4] - 2026-03-28
97
+ ### Added
98
+
99
+ - Added support for Groq Compound and Compound Mini models with extended context window (131K tokens) and configurable thinking levels
100
+ - Added support for OpenAI GPT-OSS-Safeguard-20B model with reasoning capabilities across multiple providers
101
+ - Added support for Kwaipilot KAT-Coder-Pro V2 model across Kilo, NanoGPT, and OpenRouter providers
102
+ - Added support for GLM-5.1 model with extended context window (200K tokens) and max output of 131K tokens
103
+ - Added support for Qwen3.5-27B-Musica-v1 model
104
+ - Added support for zai-org/glm-5.1 model with reasoning capabilities
105
+ - Added support for Sapiens AI Agnes-1.5-Lite model with multimodal input (text and image) and reasoning
106
+ - Added support for Venice openai-gpt-54-mini model
107
+
108
+ ### Changed
109
+
110
+ - Updated Qwen QwQ 32B max tokens from 16,384 to 40,960 across multiple providers
111
+ - Updated OpenAI GPT-OSS-Safeguard-20B model name to 'Safety GPT OSS 20B' and enabled reasoning capabilities
112
+ - Updated OpenAI GPT-OSS-Safeguard-20B context window from 222,222 to 131,072 tokens and max tokens from 8,888 to 65,536
113
+ - Updated OpenRouter Qwen QwQ 32B pricing: input from 0.2 to 0.19, output from 1.17 to 1.15, cache read from 0.1 to 0.095
114
+ - Updated OpenRouter Claude 3.5 Sonnet pricing: input from 0.45 to 0.42, cache read from 0.225 to 0.21
115
+
116
+ ## [13.16.3] - 2026-03-28
117
+ ### Changed
118
+
119
+ - Modified OAuth credential saving to preserve unrelated identities instead of replacing all credentials for a provider
120
+ - Updated credential identity resolution to use provider context for more accurate email deduplication
121
+
122
+ ### Fixed
123
+
124
+ - Fixed OAuth credential updates to replace matching credentials in-place rather than creating disabled rows, preventing unbounded accumulation of soft-deleted credentials
125
+
126
+ ## [13.15.0] - 2026-03-23
127
+
128
+ ### Added
129
+
130
+ - Added `isUsageLimitError()` to `rate-limit-utils` as a single source of truth for detecting usage/quota limit errors across all providers
131
+
132
+ ### Fixed
133
+
134
+ - Fixed lazy stream forwarding to properly handle final results from source streams with `result()` methods
135
+ - Fixed lazy stream error handling to convert iterator failures into terminal error results instead of silently failing
136
+ - Fixed `parseRateLimitReason` to recognize "usage limit" in error messages and correctly classify them as `QUOTA_EXHAUSTED`
137
+ - Fixed Codex `fetchWithRetry` retrying 429 responses for `usage_limit_reached` errors for up to 5 minutes instead of returning immediately for credential switching
138
+ - Removed `usage.?limit` from `TRANSIENT_MESSAGE_PATTERN` in retry utils since usage limits are not transient and require credential rotation
139
+ - Fixed `parseRateLimitReason` not recognizing "usage limit" in Codex error messages, causing incorrect fallback to `UNKNOWN` classification instead of `QUOTA_EXHAUSTED`
140
+
141
+ ## [13.14.2] - 2026-03-21
142
+ ### Changed
143
+
144
+ - Updated thinking configuration format from `levels` array to `minLevel` and `maxLevel` properties for improved clarity
145
+ - Corrected context window from 400000 to 272000 tokens for GPT-5.4 mini and nano variants on Codex transport
146
+ - Normalized GPT-5.4 variant priority handling to use parsed variant instead of special-casing raw model IDs
147
+ - Added support for `mini` variant in OpenAI model parsing regex
148
+
149
+ ### Fixed
150
+
151
+ - Fixed inconsistent thinking level configuration across multiple model definitions
152
+
153
+ ## [13.14.0] - 2026-03-20
154
+
155
+ ### Fixed
156
+
157
+ - Fixed resumed OpenAI Responses sessions to avoid replaying stale same-provider native history on the first follow-up after process restart ([#488](https://github.com/can1357/oh-my-pi/issues/488))
158
+
159
+ ### Added
160
+
161
+ - Added bundled GPT-5.4 mini model metadata for OpenAI, OpenAI Codex, and GitHub Copilot, including low-to-xhigh thinking support and GitHub Copilot premium multiplier metadata
162
+ - Added bundled GPT-5.4 nano model metadata for OpenAI and OpenAI Codex, including low-to-xhigh thinking support
163
+
164
+
165
+ ## [13.13.2] - 2026-03-18
166
+ ### Changed
167
+
168
+ - Modified tool result handling for aborted assistant messages to preserve existing tool results when already recorded, instead of always replacing them with synthetic 'aborted' results
169
+
170
+ ## [13.13.0] - 2026-03-18
171
+ ### Changed
172
+
173
+ - Changed tool argument validation to always normalize optional null values before type coercion, ensuring consistent handling of LLM-generated 'null' strings
174
+
175
+ ### Fixed
176
+
177
+ - Fixed tool argument validation to properly handle string 'null' values from LLMs on optional fields by stripping them during normalization
178
+ - Improved type safety of `validateToolCall` and `validateToolArguments` functions by returning properly typed `ToolCall["arguments"]` instead of `any`
179
+
180
+ ## [13.12.9] - 2026-03-17
181
+ ### Changed
182
+
183
+ - Extracted OpenAI compatibility detection and resolution logic into dedicated `openai-completions-compat` module for improved maintainability and reusability
184
+
185
+ ### Fixed
186
+
187
+ - Fixed `openai-responses` manual history replay to strip replay-only item IDs and preserve normalized tool `call_id` values for GitHub Copilot follow-up turns ([#457](https://github.com/can1357/oh-my-pi/issues/457))
188
+
189
+ ## [13.12.0] - 2026-03-14
190
+
191
+ ### Added
192
+
193
+ - Added support for `qwen-chat-template` thinking format to enable reasoning via `chat_template_kwargs.enable_thinking`
194
+ - Added `reasoningEffortMap` option to `OpenAICompat` for mapping pi-ai reasoning levels to provider-specific `reasoning_effort` values
195
+ - Added `extraBody` to `OpenAICompat` to support provider-specific request body routing fields in OpenAI-completions requests
196
+ - Added support for reading token usage from choice-level `usage` field as fallback when root-level usage is unavailable
197
+ - Added new models: DeepSeek-V3.2 (Bedrock), Llama 3.1 405B Instruct, Magistral Small 1.2, Ministral 3 3B, Mistral Large 3, Pixtral Large (25.02), NVIDIA Nemotron Nano 3 30B, and Qwen3-5-9b
198
+ - Added `close()` method to `AuthStorage` for properly closing the underlying credential store
199
+ - Added `initiatorOverride` option in OpenAI and Anthropic providers to customize message attribution
200
+
201
+ ### Changed
202
+
203
+ - Changed assistant message content serialization to always use plain string format instead of text block arrays to prevent recursive nesting in OpenAI-compatible backends
204
+ - Changed Bedrock Opus 4.6 context window from 1M to 1M and added max tokens limit of 128K
205
+ - Changed OpenCode Zen/Go Sonnet 4.0/4.5 context window from 1M to 200K
206
+ - Changed GitHub Copilot context windows from 200K to 128K for both gpt-4o and gpt-4o-mini
207
+ - Changed Claude 3.5 Sonnet (Anthropic API) pricing: input from $0.5 to $0.25, output from $3 to $1.5, cache read from $0.05 to $0.025, cache write from $0 to $1
208
+ - Changed Devstral 2 model name from '135B' to '123B'
209
+ - Changed ByteDance Seed 2.0-Lite to support reasoning with effort-based thinking mode and image inputs
210
+ - Changed Qwen3-32b (Groq) reasoning effort mapping to normalize all levels to 'default'
211
+ - Changed finish_reason 'end' to map to 'stop' for improved compatibility with additional providers
212
+ - Changed Anthropic reference model merging to prioritize bundled metadata for known models while using models.dev for newly discovered IDs
213
+
214
+ ### Fixed
215
+
216
+ - Fixed reasoning_effort parameter handling to use provider-specific mappings instead of raw effort values
217
+ - Fixed assistant content serialization for GitHub Copilot and other OpenAI-compatible backends that mirror array payloads
218
+ - Fixed token usage calculation to properly extract cached tokens from both root and nested `prompt_tokens_details` fields
219
+ - Fixed stop reason mapping to handle string values and unknown finish reasons gracefully
220
+ - Fixed resource cleanup in `AuthCredentialStore.close()` to properly finalize all prepared statements before closing the database
221
+
222
+ ## [13.11.1] - 2026-03-13
223
+
224
+ ### Fixed
225
+
226
+ - Added `llama.cpp` as local provider
227
+ - Fixed auth schema V0-to-V1 migration crash when the V0 table lacks a `disabled` column
228
+
229
+ ## [13.11.0] - 2026-03-12
230
+ ### Added
231
+
232
+ - Added support for Parallel AI provider with API key authentication
233
+ - Added `PARALLEL_API_KEY` environment variable support for Parallel provider configuration
234
+ - Added automatic websocket reconnection handling for connection limit errors, with fallback to SSE replay when content has already been emitted
235
+
236
+ ### Changed
237
+
238
+ - Enhanced `CodexProviderStreamError` to include an optional error code field for better error categorization and handling
239
+
240
+ ### Fixed
241
+
242
+ - Improved retry logic to handle HTTP/2 stream errors and internal_error responses from Anthropic API
243
+
244
+ ## [13.9.16] - 2026-03-10
245
+ ### Added
246
+
247
+ - Support for `onPayload` callback to replace provider request payloads before sending, enabling request interception and modification
248
+ - Support for structured text signature metadata with phase information (commentary/final_answer) in OpenAI and Azure OpenAI Responses providers
249
+ - Support for OpenAI Codex Spark model selection with plan-based account prioritization
250
+ - Added `modelId` option to `getApiKey()` to enable model-specific credential ranking
251
+
252
+ ### Changed
253
+
254
+ - Enhanced `onPayload` callback signature to accept model parameter and support async payload replacement
255
+ - Improved error messages for `response.failed` events to include detailed error codes, messages, and incomplete reasons
256
+ - Refactored OpenAI Codex response streaming to improve code organization and maintainability with extracted helper functions and type definitions
257
+ - Enhanced websocket fallback logic to safely replay buffered output over SSE when websocket connections fail mid-stream
258
+ - Improved error recovery for websocket streams by distinguishing between fatal connection errors and retryable stream errors
259
+ - Updated credential ranking strategy to prioritize Pro plan accounts when requesting OpenAI Codex Spark models
260
+
261
+ ### Fixed
262
+
263
+ - Fixed websocket stream recovery to properly reset output state and clear buffered items when falling back to SSE after partial output
264
+ - Fixed handling of malformed JSON messages in websocket streams to trigger immediate fallback to SSE without retry attempts
265
+
266
+ ## [13.9.13] - 2026-03-10
267
+ ### Added
268
+
269
+ - Added `isSpecialServiceTier` utility function to validate OpenAI service tier values
270
+
271
+ ## [13.9.12] - 2026-03-09
272
+ ### Added
273
+
274
+ - Added Tavily web search provider support with API key authentication
275
+
276
+ ### Fixed
277
+
278
+ - Fixed OpenAI-family streaming transports to fail with an explicit idle-timeout error instead of hanging indefinitely when the provider stops sending events mid-response
279
+ - Fixed OpenAI Codex OAuth refresh and usage-limit lookups to respect request timeouts instead of waiting indefinitely during account selection or rotation
280
+ - Fixed OpenAI Codex prewarmed websocket requests to fall back quickly when the socket connects but never starts the response stream
281
+
282
+ ## [13.9.10] - 2026-03-08
283
+
284
+ ### Added
285
+
286
+ - Added `identity_key` column to auth credentials storage for improved credential deduplication
287
+ - Added schema versioning system to auth credentials database for safer migrations
288
+ - Added automatic backfilling of identity keys during database schema migrations
289
+
290
+ ### Changed
291
+
292
+ - Changed credential deduplication logic to use single identity key instead of multiple identifiers for better performance
293
+ - Changed database schema to store normalized identity keys alongside credentials
294
+ - Changed auth schema migration to support upgrading from legacy database versions with automatic data backfill
295
+
296
+ ### Fixed
297
+
298
+ - Fixed API key credential matching to correctly identify when the same key is re-stored, preventing unnecessary row duplication on re-login
299
+ - Fixed credential deduplication to correctly handle OAuth accounts with matching emails but different account IDs
300
+ - Fixed API key replacement to reuse existing stored rows instead of accumulating disabled duplicates
301
+ - Fixed auth storage to preserve newer recorded schema versions when opened by older binaries
302
+
303
+ ## [13.9.8] - 2026-03-08
304
+ ### Fixed
305
+
306
+ - Fixed WebSocket stream fallback logic to safely replay buffered output over SSE when WebSocket fails after partial content has been streamed
307
+
308
+ ## [13.9.4] - 2026-03-07
309
+ ### Changed
310
+
311
+ - Simplified API key credential storage to always replace existing credentials on re-login instead of accumulating multiple keys
312
+ - Updated Kagi API key placeholder from `kagi_...` to `KG_...` to match current API key format
313
+ - Updated Kagi login instructions to clarify Search API access is beta-only and provide support contact
314
+ - Disabled usage reporting in streaming responses for Cerebras models due to compatibility issues
315
+
316
+ ### Fixed
317
+
318
+ - Fixed Cerebras model compatibility by preventing `stream_options` usage requests in chat completions
319
+
320
+ ## [13.9.3] - 2026-03-07
321
+ ### Breaking Changes
322
+
323
+ - Changed `reasoning` parameter from `ThinkingLevel | undefined` to `Effort | undefined` in `SimpleStreamOptions`; 'off' is no longer valid (omit the field instead)
324
+ - Removed `supportsXhigh()` function; check `model.thinking?.maxLevel` instead
325
+ - Removed `ThinkingLevel` and `ThinkingEffort` types; use `Effort` enum
326
+ - Removed `getAvailableThinkingLevels()` and `getAvailableThinkingEfforts()` functions
327
+ - Changed `transformRequestBody()` signature to require `Model` parameter as second argument for effort validation
328
+ - Removed `thinking.ts` module export; import from `model-thinking.ts` instead
329
+
330
+ ### Added
331
+
332
+ - Added `incremental` flag to `OpenAIResponsesHistoryPayload` to support building conversation history from multiple assistant messages instead of replacing it
333
+ - Added `dt` flag to `OpenAIResponsesHistoryPayload` for transport-level metadata
334
+ - Added `ThinkingConfig` interface to models for canonical thinking transport metadata with min/max effort levels and provider-specific mode
335
+ - Added `thinking` field to `Model` type containing per-model thinking capabilities used to clamp and map user-facing effort levels
336
+ - Added `Effort` enum (minimal, low, medium, high, xhigh) as canonical user-facing thinking levels replacing `ThinkingLevel`
337
+ - Added `enrichModelThinking()` function to automatically populate thinking metadata on models based on their capabilities
338
+ - Added `mapEffortToAnthropicAdaptiveEffort()` function to map user effort levels to Anthropic adaptive thinking effort
339
+ - Added `mapEffortToGoogleThinkingLevel()` function to map user effort levels to Google thinking levels
340
+ - Added `requireSupportedEffort()` function to validate and clamp effort levels per model, throwing errors for unsupported combinations
341
+ - Added `clampThinkingLevelForModel()` function to clamp thinking levels to model-supported range
342
+ - Added `applyGeneratedModelPolicies()` and `linkSparkPromotionTargets()` exports from model-thinking module
343
+ - Added `serviceTier` option to control OpenAI processing priority and cost (auto, default, flex, scale, priority)
344
+ - Added `providerPayload` field to messages and responses for reconstructing transport-native history
345
+ - Added Gemini usage provider for tracking quota and tier information
346
+ - Added `getCodexAccountId()` utility to extract account ID from Codex JWT tokens
347
+ - Added email extraction from OpenAI Codex OAuth tokens for credential deduplication
348
+
349
+ ### Changed
350
+
351
+ - Changed credential disabling mechanism from boolean `disabled` flag to `disabled_cause` text field for tracking why credentials were disabled
352
+ - Changed `deleteAuthCredential()` and `deleteAuthCredentialsForProvider()` methods to require a `disabledCause` parameter explaining the reason for disabling
353
+ - Changed Gemini model parsing to strip `-preview` suffix for consistent model identification
354
+ - Changed OpenAI Codex websocket error handling to detect fatal connection errors and immediately fall back to SSE without retrying
355
+ - Changed OpenAI Codex to always use websockets v2 protocol (removed v1 support)
356
+ - Changed `reasoning` parameter type from `ThinkingLevel` to `Effort` in `SimpleStreamOptions`, removing 'off' value (callers should omit the field instead)
357
+ - Changed thinking configuration to use model-specific metadata instead of hardcoded provider logic for effort mapping
358
+ - Changed OpenAI Codex request transformer to accept `Model` parameter for effort validation instead of string model ID
359
+ - Changed Anthropic provider to use model thinking metadata for determining adaptive thinking support instead of model ID pattern matching
360
+ - Changed Google Vertex and Google providers to use shorter variable names for thinking config construction
361
+ - Moved thinking-related utilities from `thinking.ts` to new `model-thinking.ts` module with expanded functionality
362
+ - Moved model policy functions from `provider-models/model-policies.ts` to `model-thinking.ts`
363
+ - Moved `googleGeminiCliUsageProvider` from `providers/google-gemini-cli-usage.ts` to `usage/gemini.ts`
364
+ - Changed default OpenAI model from gpt-5.1-codex to gpt-5.4 across all providers
365
+ - Changed `UsageFetchContext` to remove cache and now() dependencies—usage fetchers now use Date.now() directly
366
+ - Removed `resetInMs` field from usage windows; consumers should calculate from `resetsAt` timestamp
367
+ - Changed OpenAI Codex credential ranking to deduplicate by email when accountId matches
368
+ - Improved OpenAI Codex error handling with retryable error detection
369
+
370
+ ### Removed
371
+
372
+ - Removed `thinking.ts` module; use `model-thinking.ts` instead
373
+ - Removed `provider-models/model-policies.ts` module; functionality moved to `model-thinking.ts`
374
+ - Removed `supportsXhigh()` function from models.ts; use model.thinking metadata instead
375
+ - Removed `ThinkingLevel` and `ThinkingEffort` types; use `Effort` enum instead
376
+ - Removed `getAvailableThinkingLevels()` and `getAvailableThinkingEfforts()` functions
377
+ - Removed `model-policies` export from `provider-models/index.ts`
378
+ - Removed hardcoded thinking level clamping logic from OpenAI Codex request transformer; now uses model metadata
379
+ - Removed `UsageCache` and `UsageCacheEntry` interfaces—caching is now handled internally by AuthStorage
380
+ - Removed `google-gemini-cli-usage` export; use new `gemini` usage provider instead
381
+ - Removed `resetInMs` computation from all usage providers
382
+ - Removed cache TTL constants and cache management from usage fetchers (claude, github-copilot, google-antigravity, kimi, openai-codex, zai)
383
+
384
+ ### Fixed
385
+
386
+ - Fixed credential purging to respect disabled credentials when deduplicating by email, preventing re-enablement of intentionally disabled credentials
387
+ - Fixed OpenAI Codex websocket error reporting to include detailed error messages from error events
388
+ - Fixed conversation history reconstruction to support incremental updates from multiple assistant messages while maintaining backward compatibility with full-snapshot payloads
389
+ - Fixed OpenAI Codex to reject unsupported effort levels instead of silently clamping them, providing clear error messages about supported efforts
390
+ - Fixed model cache normalization to properly apply thinking enrichment when loading cached models
391
+ - Fixed dynamic model merging to apply thinking enrichment to merged model results
392
+ - Fixed OpenAI Codex streaming to properly include service_tier in SSE payloads
393
+ - Fixed type safety in OpenAI responses by removing unsafe type casts on image content blocks
394
+ - Fixed credential purging to respect disabled credentials when deduplicating by email
395
+ - Fixed API-key provider re-login to replace the active stored key instead of appending stale credentials that were still selected first
396
+ - Fixed Kagi login guidance to use the correct `KG_...` key format and mention Search API beta access requirements
397
+
398
+ ## [13.9.2] - 2026-03-05
399
+
400
+ ### Added
401
+
402
+ - Support for redacted thinking blocks in Anthropic messages, enabling secure handling of encrypted reasoning content
403
+ - Preservation of latest Anthropic thinking blocks and redacted thinking content during message transformation, even when switching between Anthropic models
404
+
405
+ ### Changed
406
+
407
+ - Assistant message content now includes `RedactedThinkingContent` type alongside existing text, thinking, and tool call blocks
408
+ - Message transformation logic now preserves signed thinking blocks and redacted thinking for the latest assistant message in Anthropic conversations
409
+
410
+ ### Fixed
411
+
412
+ - Fixed Unicode normalization to consistently apply `toWellFormed()` to all text content, including thinking blocks, ensuring proper handling of malformed UTF-16 sequences
413
+
414
+ ## [13.9.1] - 2026-03-05
415
+
416
+ ### Breaking Changes
417
+
418
+ - Removed `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, `ALL_THINKING_MODES`, `THINKING_MODE_DESCRIPTIONS`, and `THINKING_MODE_LABELS` exports
419
+ - Renamed `formatThinking()` to `getThinkingMetadata()` with changed return type from string to `ThinkingMetadata` object
420
+ - Renamed `getAvailableThinkingLevel()` to `getAvailableThinkingLevels()` and added default parameter
421
+ - Renamed `getAvailableEffort()` to `getAvailableEfforts()` and added default parameter
422
+
423
+ ### Added
424
+
425
+ - Added `ThinkingMetadata` type to provide structured access to thinking mode information (value, label, description)
426
+
427
+ ## [13.9.0] - 2026-03-05
428
+
429
+ ### Added
430
+
431
+ - Exported new thinking module with `Effort`, `ThinkingLevel`, and `ThinkingMode` types for managing reasoning effort levels
432
+ - Added `getAvailableEffort()` function to determine supported thinking effort levels based on model capabilities
433
+ - Added `parseEffort()`, `parseThinkingLevel()`, and `parseThinkingMode()` functions for parsing thinking configuration strings
434
+ - Added `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, and `ALL_THINKING_MODES` constants for iterating over available thinking options
435
+ - Added `THINKING_MODE_DESCRIPTIONS` and `THINKING_MODE_LABELS` for displaying thinking modes in user interfaces
436
+ - Added `formatThinking()` function to format thinking modes as compact display labels
437
+
438
+ ### Changed
439
+
440
+ - Refactored thinking level handling to distinguish between `Effort` (provider-level, no "off") and `ThinkingLevel` (user-facing, includes "off")
441
+ - Updated `ThinkingBudgets` type to use `Effort` instead of `ThinkingLevel` for more precise token budget configuration
442
+ - Improved reasoning option handling to explicitly support "off" value for disabling reasoning across all providers
443
+ - Simplified thinking effort mapping logic by centralizing provider-specific clamping behavior
444
+
445
+ ## [13.7.8] - 2026-03-04
446
+
447
+ ### Added
448
+
449
+ - Added ZenMux provider support with mixed API routing: Anthropic-owned models discovered from `https://zenmux.ai/api/v1/models` now use the Anthropic transport (`https://zenmux.ai/api/anthropic`), while other ZenMux models use the OpenAI-compatible transport.
450
+
451
+ ## [13.7.7] - 2026-03-04
452
+
453
+ ### Changed
454
+
455
+ - Modified response ID normalization to preserve existing item ID prefixes when truncating oversized IDs
456
+ - Updated tool call ID normalization to use `fc_` prefix for generated item IDs instead of `item_` prefix
457
+
458
+ ### Fixed
459
+
460
+ - Fixed handling of reasoning item IDs to remain untouched during response normalization while function call IDs are properly normalized
461
+
462
+ ## [13.7.2] - 2026-03-04
463
+
464
+ ### Added
465
+
466
+ - Added support for Kagi API key authentication via `login kagi` command
467
+ - Added Kagi to the list of available OAuth providers
468
+
469
+ ### Fixed
470
+
471
+ - MCP tool schemas with `$ref`/`$defs` are now dereferenced before being sent to LLM providers, fixing dangling references that left models without type definitions
472
+ - Ajv schema validation no longer emits `console.warn()` for non-standard format keywords (e.g. `"uint"`) from MCP servers, preventing TUI corruption
473
+ - Tool schema compilation is now cached per schema identity, eliminating redundant recompilation on every tool call
474
+
475
+ ## [13.6.0] - 2026-03-03
476
+
477
+ ### Added
478
+
479
+ - Added Anthropic Foundry gateway mode controlled by `CLAUDE_CODE_USE_FOUNDRY`, with support for `FOUNDRY_BASE_URL`, `ANTHROPIC_FOUNDRY_API_KEY`, `ANTHROPIC_CUSTOM_HEADERS`, and optional mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS`)
480
+ - Added LM Studio provider support with OpenAI-compatible model discovery and OAuth login.
481
+ - Added support for `LM_STUDIO_API_KEY` and `LM_STUDIO_BASE_URL` environment variables for authentication and custom host configuration.
482
+
483
+ ### Changed
484
+
485
+ - Anthropic key resolution now prefers `ANTHROPIC_FOUNDRY_API_KEY` over `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY` when Foundry mode is enabled
486
+ - Anthropic auth base-URL fallback now prefers `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled
487
+
488
+ ## [13.5.8] - 2026-03-02
489
+
490
+ ### Fixed
491
+
492
+ - Fixed schema compatibility issue where patternProperties in tool parameters caused failures when converting to legacy Antigravity format
493
+
494
+ ## [13.5.5] - 2026-03-01
495
+
496
+ ### Changed
497
+
498
+ - Anthropic Claude system-block cloaking now leaves the agent identity block uncached and applies `cache_control: { type: "ephemeral" }` to injected user system blocks without forcing `ttl: "1h"`
499
+
500
+ ### Fixed
501
+
502
+ - Anthropic request payload construction now enforces a maximum of 4 `cache_control` breakpoints (tools/system/messages priority order) before dispatch
503
+ - Anthropic cache-control normalization now removes later `ttl: "1h"` entries when a default/5m block has already appeared earlier in evaluation order
504
+
505
+ ## [13.5.3] - 2026-03-01
506
+
507
+ ### Fixed
508
+
509
+ - Fixed tool argument coercion to handle malformed JSON with trailing wrapper braces by parsing leading JSON containers
510
+
511
+ ## [13.4.0] - 2026-03-01
512
+
513
+ ### Breaking Changes
514
+
515
+ - Removed `TInput` generic parameter from `ToolResultMessage` interface and removed `$normative` property
516
+
517
+ ### Added
518
+
519
+ - `hasUnrepresentableStrictObjectMap()` pre-flight check in `tryEnforceStrictSchema`: schemas with `patternProperties` or schema-valued `additionalProperties` now degrade gracefully to non-strict mode instead of throwing during enforcement
520
+ - `generateClaudeCloakingUserId()` generates structured user IDs for Anthropic OAuth metadata (`user_{hex64}_account_{uuid}_session_{uuid}`)
521
+ - `isClaudeCloakingUserId()` validates whether a string matches the cloaking user-ID format
522
+ - `mapStainlessOs()` and `mapStainlessArch()` map `process.platform`/`process.arch` to Stainless header values; X-Stainless-Os and X-Stainless-Arch in `claudeCodeHeaders` are now runtime-computed
523
+ - `buildClaudeCodeTlsFetchOptions()` attaches SNI and default TLS ciphers for direct `api.anthropic.com` connections
524
+ - `createClaudeBillingHeader()` generates the `x-anthropic-billing-header` block (SHA-256 payload fingerprint + random build hash)
525
+ - `buildAnthropicSystemBlocks()` now injects a billing header block and the Claude Agent SDK identity block with `ephemeral` 1h cache-control when `includeClaudeCodeInstruction` is set
526
+ - `resolveAnthropicMetadataUserId()` auto-generates a cloaking user ID for OAuth requests when `metadata.user_id` is absent or invalid
527
+ - `AnthropicOAuthFlow` is now exported for direct use
528
+ - OAuth callback server timeout extended from 2 min to 5 min
529
+ - `parseGeminiCliCredentials()` parses Google Cloud credential JSON with support for legacy (`{token,projectId}`), alias (`project_id`/`refresh`/`expires`), and enriched formats
530
+ - `shouldRefreshGeminiCliCredentials()` and proactive token refresh before requests for both Gemini CLI and Antigravity providers (60s pre-expiry buffer)
531
+ - `normalizeAntigravityTools()` converts `parametersJsonSchema` → `parameters` in function declarations for Antigravity compatibility
532
+ - `ANTIGRAVITY_SYSTEM_INSTRUCTION` is now exported for use by search and other consumers
533
+ - `ANTIGRAVITY_LOAD_CODE_ASSIST_METADATA` constant exported from OAuth module with `ANTIGRAVITY` ideType
534
+ - Antigravity project onboarding: `onboardProjectWithRetries()` provisions a new project via `onboardUser` LRO when `loadCodeAssist` returns no existing project (up to 5 attempts, 2s interval)
535
+ - `getOAuthApiKey` now includes `refreshToken`, `expiresAt`, `email`, and `accountId` in the Gemini/Antigravity JSON credential payload to enable proactive refresh
536
+ - Antigravity model discovery now tries the production daily endpoint first, with sandbox as fallback
537
+ - `ANTIGRAVITY_DISCOVERY_DENYLIST` filters low-quality/internal models from discovery results
538
+
539
+ ### Changed
540
+
541
+ - Replaced `sanitizeSurrogates()` utility with native `String.prototype.toWellFormed()` for handling unpaired Unicode surrogates across all providers
542
+ - Extended `ANTHROPIC_OAUTH_BETA` constant in the OpenAI-compat Anthropic route with `interleaved-thinking-2025-05-14`, `context-management-2025-06-27`, and `prompt-caching-scope-2026-01-05` beta flags
543
+ - `claudeCodeVersion` bumped to `2.1.63`; `claudeCodeSystemInstruction` updated to identify as Claude Agent SDK
544
+ - `claudeCodeHeaders`: removed `X-Stainless-Helper-Method`, updated package version to `0.74.0`, runtime version to `v24.3.0`
545
+ - `applyClaudeToolPrefix` / `stripClaudeToolPrefix` now accept an optional prefix override and skip Anthropic built-in tool names (`web_search`, `code_execution`, `text_editor`, `computer`)
546
+ - Accept-Encoding header updated to `gzip, deflate, br, zstd`
547
+ - Non-Anthropic base URLs now receive `Authorization: Bearer` regardless of OAuth status
548
+ - Prompt-caching logic now skips applying breakpoints when any block already carries `cache_control`, instead of stripping then re-applying
549
+ - `fine-grained-tool-streaming-2025-05-14` removed from default beta set
550
+ - Anthropic OAuth token URL changed from `platform.claude.com` to `api.anthropic.com`
551
+ - Anthropic OAuth scopes reduced to `org:create_api_key user:profile user:inference`
552
+ - OAuth code exchange now strips URL fragment from callback code, using the fragment as state override when present
553
+ - Claude usage headers aligned: user-agent updated to `claude-cli/2.1.63 (external, cli)`, anthropic-beta extended with full beta set
554
+ - Antigravity session ID format changed to signed decimal (negative int63 derived from SHA-256 of first user message, or random bounded int63)
555
+ - Antigravity `requestId` now uses `agent-{uuid}` format; non-Antigravity requests no longer include requestId/userAgent/requestType in the payload
556
+ - `ANTIGRAVITY_DAILY_ENDPOINT` corrected to `daily-cloudcode-pa.googleapis.com`; sandbox endpoint kept as fallback only
557
+ - Antigravity discovery: removed `recommended`/`agentModelSorts` filter; now includes all non-internal, non-denylisted models
558
+ - Antigravity discovery no longer sends `project` in the request body
559
+ - Gemini/Antigravity OAuth flows no longer use PKCE (code_challenge removed)
560
+ - Antigravity `loadCodeAssist` metadata ideType changed from `IDE_UNSPECIFIED` to `ANTIGRAVITY`
561
+ - Antigravity `discoverProject` now uses a single canonical production endpoint; falls back to project onboarding instead of a hardcoded default project ID
562
+ - `VALIDATED` tool calling config applied to Antigravity requests with Claude models
563
+ - `maxOutputTokens` removed from Antigravity generation config for non-Claude models
564
+ - System instruction injection for Antigravity scoped to Claude and `gemini-3-pro-high` models only
565
+
566
+ ### Removed
567
+
568
+ - Removed `sanitizeSurrogates()` utility function; use native `String.prototype.toWellFormed()` instead
569
+
570
+ ## [13.3.14] - 2026-02-28
571
+
572
+ ### Added
573
+
574
+ - Exported schema utilities from new `./utils/schema` module, consolidating JSON Schema handling across providers
575
+ - Added `CredentialRankingStrategy` interface for providers to implement usage-based credential selection
576
+ - Added `claudeRankingStrategy` for Anthropic OAuth credentials to enable smart multi-account selection based on usage windows
577
+ - Added `codexRankingStrategy` for OpenAI Codex OAuth credentials with priority boost for fresh 5-hour window starts
578
+ - Added `adaptSchemaForStrict()` helper for unified OpenAI strict schema enforcement across providers
579
+ - Added schema equality and merging utilities: `areJsonValuesEqual()`, `mergeCompatibleEnumSchemas()`, `mergePropertySchemas()`
580
+ - Added Cloud Code Assist schema normalization: `copySchemaWithout()`, `stripResidualCombiners()`, `prepareSchemaForCCA()`
581
+ - Added `sanitizeSchemaForGoogle()` and `sanitizeSchemaForCCA()` for provider-specific schema sanitization
582
+ - Added `StringEnum()` helper for creating string enum schemas compatible with Google and other providers
583
+ - Added `enforceStrictSchema()` and `sanitizeSchemaForStrictMode()` for OpenAI strict mode schema validation
584
+ - Added package exports for `./utils/schema` and `./utils/schema/*` subpaths
585
+ - Added `validateSchemaCompatibility()` to statically audit a JSON Schema against provider-specific rules (`openai-strict`, `google`, `cloud-code-assist-claude`) and return structured violations
586
+ - Added `validateStrictSchemaEnforcement()` to verify the strict-fail-open contract: enforced schemas pass strict validation, failed schemas return the original object identity
587
+ - Added `COMBINATOR_KEYS` (`anyOf`, `allOf`, `oneOf`) and `CCA_UNSUPPORTED_SCHEMA_FIELDS` as exported constants in `fields.ts` to eliminate duplication across modules
588
+ - Added `tryEnforceStrictSchema` result cache (`WeakMap`) to avoid redundant sanitize + enforce work for the same schema object
589
+ - Added comprehensive schema normalization test suite (`schema-normalization.test.ts`) covering strict mode, Google, and Cloud Code Assist normalization paths
590
+ - Added schema compatibility validation test suite (`schema-compatibility.test.ts`) covering all three provider targets
591
+
592
+ ### Changed
593
+
594
+ - Moved schema utilities from `./utils/typebox-helpers` to new `./utils/schema` module with expanded functionality
595
+ - Refactored OpenAI provider tool conversion to use unified `adaptSchemaForStrict()` helper across codex, completions, and responses
596
+ - Updated `AuthStorage` to support generic credential ranking via `CredentialRankingStrategy` instead of Codex-only logic
597
+ - Moved Google schema sanitization functions from `google-shared.ts` to `./utils/schema` module
598
+ - Changed export path: `./utils/typebox-helpers` → `./utils/schema` in main index
599
+ - `sanitizeSchemaForGoogle()` / `sanitizeSchemaForCCA()` now accept a parameterized `unsupportedFields` set internally, enabling code reuse between the two sanitizers
600
+ - `copySchemaWithout()` rewritten using object-rest destructuring for clarity
601
+
602
+ ### Fixed
603
+
604
+ - Fixed cycle detection: `WeakSet` guards added to all recursive schema traversals (`sanitizeSchemaForStrictMode`, `enforceStrictSchema`, `normalizeSchemaForCCA`, `normalizeNullablePropertiesForCloudCodeAssist`, `stripResidualCombiners`, `sanitizeSchemaImpl`, `hasResidualCloudCodeAssistIncompatibilities`) — circular schemas no longer cause infinite loops or stack overflows
605
+ - Fixed `hasResidualCloudCodeAssistIncompatibilities`: cycle detection now returns `false` (not `true`) for already-visited nodes, eliminating false positives that forced the CCA fallback schema on valid recursive inputs
606
+ - Fixed `stripResidualCombiners` to iterate to a fixpoint rather than making a single pass, ensuring chained combiner reductions (where one reduction enables another) are fully resolved
607
+ - Fixed `mergeObjectCombinerVariants` required-field computation: the flattened object now takes the intersection of all variants' `required` arrays (unioned with own-level required properties that exist in the merged schema), preventing required fields from being silently dropped or over-included
608
+ - Fixed `mergeCompatibleEnumSchemas` to use deep structural equality (`areJsonValuesEqual`) instead of `Object.is` when deduplicating object-valued enum members
609
+ - Fixed `sanitizeSchemaForGoogle` const-to-enum deduplication to use deep equality instead of reference equality
610
+ - Fixed `sanitizeSchemaForGoogle` type inference for `anyOf`/`oneOf`-flattened const enums: type is now derived from all variants (must agree), falling back to inference from enum values; mixed null/non-null infers the non-null type and sets `nullable`
611
+ - Fixed `sanitizeSchemaForGoogle` recursion to spread options when descending (previously only `insideProperties`, `normalizeTypeArrayToNullable`, `stripNullableKeyword` were forwarded; new fields `unsupportedFields` and `seen` were silently dropped)
612
+ - Fixed `sanitizeSchemaForGoogle` array-valued `type` filtering to exclude non-string entries before processing
613
+ - Removed incorrect `additionalProperties: false` stripping from `sanitizeSchemaForGoogle` (the field is valid in Google schemas when `false`)
614
+ - Fixed `sanitizeSchemaForStrictMode` to strip the `nullable` keyword and expand it into `anyOf: [schema, {type: "null"}]` in the output, matching what OpenAI strict mode actually expects
615
+ - Fixed `sanitizeSchemaForStrictMode` to infer `type: "array"` when `items` is present but `type` is absent
616
+ - Fixed `sanitizeSchemaForStrictMode` to infer a scalar `type` from uniform `enum` values when `type` is not explicitly set
617
+ - Fixed `sanitizeSchemaForStrictMode` const-to-enum merge to use deep equality, preventing duplicate enum entries when `const` and `enum` both exist with the same value
618
+ - Fixed `enforceStrictSchema` to drop `additionalProperties` unconditionally (previously only object-valued `additionalProperties` was recursed into; non-object values were passed through, violating strict schema requirements)
619
+ - Fixed `enforceStrictSchema` to recurse into `$defs` and `definitions` blocks so referenced sub-schemas are also made strict-compliant
620
+ - Fixed `enforceStrictSchema` to handle tuple-style `items` arrays (previously only single-schema `items` objects were recursed)
621
+ - Fixed `enforceStrictSchema` double-wrapping: optional properties already expressed as `anyOf: [..., {type: "null"}]` are not wrapped again
622
+ - Fixed `enforceStrictSchema` `Array.isArray` type-narrowing for `type` field to filter non-string entries before checking for `"object"`
623
+
624
+ ## [13.3.8] - 2026-02-28
625
+
626
+ ### Fixed
627
+
628
+ - Fixed response body reuse error when handling 429 rate limit responses with retry logic
629
+
630
+ ## [13.3.7] - 2026-02-27
631
+
632
+ ### Added
633
+
634
+ - Added `tryEnforceStrictSchema` function that gracefully downgrades to non-strict mode when schema enforcement fails, enabling better compatibility with malformed or circular schemas
635
+ - Added `sanitizeSchemaForStrictMode` function to normalize JSON schemas by stripping non-structural keywords, converting `const` to `enum`, and expanding type arrays into `anyOf` variants
636
+ - Added Kilo Gateway provider support with OpenAI-compatible model discovery, OAuth `/login kilo`, and `KILO_API_KEY` environment variable support ([#193](https://github.com/can1357/oh-my-pi/issues/193))
637
+
638
+ ### Changed
639
+
640
+ - Changed strict mode handling in OpenAI providers to use `tryEnforceStrictSchema` for safer schema enforcement with automatic fallback to non-strict mode
641
+ - Enhanced `enforceStrictSchema` to properly handle schemas with type arrays containing `object` (e.g., `type: ["object", "null"]`)
642
+
643
+ ### Fixed
644
+
645
+ - Fixed `enforceStrictSchema` to properly handle malformed object schemas with required keys but missing properties
646
+ - Fixed `enforceStrictSchema` to correctly process nested object schemas within `anyOf`, `allOf`, and `oneOf` combinators
647
+
648
+ ## [13.3.1] - 2026-02-26
649
+
650
+ ### Added
651
+
652
+ - Added `topP`, `topK`, `minP`, `presencePenalty`, and `repetitionPenalty` options to `StreamOptions` for fine-grained control over model sampling behavior
653
+
654
+ ## [13.3.0] - 2026-02-26
655
+
656
+ ### Changed
657
+
658
+ - Allowed OAuth provider logins to supply a manual authorization code handler with a default prompt when none is provided
659
+
660
+ ## [13.2.0] - 2026-02-23
661
+
662
+ ### Added
663
+
664
+ - Added support for GitHub Copilot provider in strict mode for both openai-completions and openai-responses tool schemas
665
+
666
+ ### Fixed
667
+
668
+ - Fixed tool descriptions being rejected when undefined by providing empty string fallback across all providers
669
+
670
+ ## [12.19.1] - 2026-02-22
671
+
672
+ ### Added
673
+
674
+ - Exported `isProviderRetryableError` function for detecting rate-limit and transient stream errors
675
+ - Support for retrying malformed JSON stream-envelope parse errors from Anthropic-compatible proxy endpoints
676
+
677
+ ### Changed
678
+
679
+ - Expanded retry detection to include JSON parse errors (unterminated strings, unexpected end of input) in addition to rate-limit errors
680
+
681
+ ## [12.19.0] - 2026-02-22
682
+
683
+ ### Added
684
+
685
+ - Added GitLab Duo provider with support for Claude, GPT-5, and other models via GitLab AI Gateway
686
+ - Added OAuth authentication for GitLab Duo with automatic token refresh and direct access caching
687
+ - Added 16 new GitLab Duo models including Claude Opus/Sonnet/Haiku variants and GPT-5 series models
688
+ - Added `isOAuth` option to Anthropic provider to force OAuth bearer auth mode for proxy tokens
689
+ - Added `streamGitLabDuo` function to route requests through GitLab AI Gateway with direct access tokens
690
+ - Added `getGitLabDuoModels` function to retrieve available GitLab Duo model configurations
691
+ - Added `clearGitLabDuoDirectAccessCache` function to manually clear cached direct access tokens
692
+
693
+ ### Changed
694
+
695
+ - Enhanced `getModelMapping()` to support both GitLab Duo alias IDs (e.g., `duo-chat-gpt-5-codex`) and canonical model IDs (e.g., `gpt-5-codex`) for improved model resolution flexibility
696
+ - Migrated `AuthCredentialStore` and `AuthStorage` into `@oh-my-pi/pi-ai` as shared credential primitives for downstream packages
697
+ - Moved Anthropic auth helpers (`findAnthropicAuth`, `isOAuthToken`, `buildAnthropicSearchHeaders`, `buildAnthropicUrl`) into shared AI utilities for reuse across providers
698
+ - Replaced `CliAuthStorage` with `AuthCredentialStore` for improved credential management with multiple credentials per provider
699
+ - Updated models.json pricing for Claude 3.5 Sonnet (input: 0.23→0.45, output: 3→2.2, added cache read: 0.225) and Claude 3 Opus (input: 0.3→0.95)
700
+ - Moved `mapAnthropicToolChoice` function from gitlab-duo provider to stream module for broader reusability
701
+ - Enhanced HTTP status code extraction to handle string-formatted status codes in error objects
702
+
703
+ ### Removed
704
+
705
+ - Removed `CliAuthStorage` class in favor of new `AuthCredentialStore` with enhanced functionality
706
+
707
+ ## [12.17.2] - 2026-02-21
708
+
709
+ ### Added
710
+
711
+ - Exported `getAntigravityUserAgent()` function for constructing Antigravity User-Agent headers
712
+
713
+ ### Changed
714
+
715
+ - Updated default Antigravity version from 1.15.8 to 1.18.3
716
+ - Unified User-Agent header generation across Antigravity API calls to use centralized `getAntigravityUserAgent()` function
717
+
718
+ ## [12.17.1] - 2026-02-21
719
+
720
+ ### Added
721
+
722
+ - Added new export paths for provider models via `./provider-models` and `./provider-models/*`
723
+ - Added new export paths for Cursor and OpenAI Codex providers via `./providers/cursor/gen/*` and `./providers/openai-codex/*`
724
+ - Added new export paths for usage utilities via `./usage/*`
725
+ - Added new export paths for discovery and OAuth utilities via `./utils/discovery` and `./utils/oauth` with subpath exports
726
+
727
+ ### Changed
728
+
729
+ - Simplified main export path to use wildcard pattern `./src/*.ts` for broader module access
730
+ - Updated `models.json` export to include TypeScript declaration file at `./src/models.json.d.ts`
731
+ - Reorganized package.json field ordering for improved readability
732
+
733
+ ## [12.17.0] - 2026-02-21
734
+
735
+ ### Fixed
736
+
737
+ - Cursor provider: bind `execHandlers` when passing handler methods to the exec protocol so handlers receive correct `this` context (fixes "undefined is not an object (evaluating 'this.options')" when using exec tools such as web search with Cursor)
738
+
739
+ ## [12.16.0] - 2026-02-21
740
+
741
+ ### Added
742
+
743
+ - Exported `readModelCache` and `writeModelCache` functions for direct SQLite-backed model cache access
744
+ - Added `<turn_aborted>` guidance marker as synthetic user message when assistant messages are aborted or errored, informing the model that tools may have partially executed
745
+ - Added support for Sonnet 4.6 models in adaptive thinking detection
746
+
747
+ ### Changed
748
+
749
+ - Updated model cache schema version to support improved global model fallback resolution
750
+ - Improved GitHub Copilot model resolution to prefer provider-specific model definitions over global references when context window is larger, ensuring optimal model capabilities
751
+ - Migrated model cache from per-provider JSON files to unified SQLite database (models.db) for atomic cross-process access
752
+ - Renamed `cachePath` option to `cacheDbPath` in ModelManagerOptions to reflect database-backed storage
753
+ - Improved non-authoritative cache handling with 5-minute retry backoff instead of retrying on every startup
754
+ - Modified handling of aborted/errored assistant messages to preserve tool call structure instead of converting to text summaries, with synthetic 'aborted' tool results injected
755
+ - Updated tool call tracking to use status map (Resolved/Aborted) instead of separate sets for better handling of duplicate and aborted tool results
756
+
757
+ ## [12.15.0] - 2026-02-20
758
+
759
+ ### Fixed
760
+
761
+ - Improved error messages for OAuth token refresh failures by including detailed error information from the provider
762
+ - Separated rate limit and usage limit error handling to provide distinct user-friendly messages for ChatGPT rate limits vs subscription usage limits
763
+
764
+ ### Changed
765
+
766
+ - Increased SDK retry attempts to 5 for OpenAI, Azure OpenAI, and Anthropic clients (was SDK default of 2)
767
+ - Changed 429 retry strategy for OpenAI Codex and Google Gemini CLI to use a 5-minute time budget when the server provides a retry delay, instead of a fixed attempt cap
768
+
769
+ ## [12.14.0] - 2026-02-19
770
+
771
+ ### Added
772
+
773
+ - Added `gemini-3.1-pro` model to opencode provider with text and image input support
774
+ - Added `trinity-large-preview-free` model to opencode provider
775
+ - Added `google/gemini-3.1-pro-preview` model to nanogpt provider
776
+ - Added `google/gemini-3.1-pro-preview` model to openrouter provider with text and image input support
777
+ - Added `gemini-3.1-pro` model to cursor provider
778
+ - Added optional `intent` field to `ToolCall` interface for harness-level intent metadata
779
+
780
+ ### Changed
781
+
782
+ - Changed `big-pickle` model API from `openai-completions` to `anthropic-messages`
783
+ - Changed `big-pickle` model baseUrl from `https://opencode.ai/zen/v1` to `https://opencode.ai/zen`
784
+ - Changed `minimax-m2.5-free` model API from `openai-completions` to `anthropic-messages`
785
+ - Changed `minimax-m2.5-free` model baseUrl from `https://opencode.ai/zen/v1` to `https://opencode.ai/zen`
786
+
787
+ ### Fixed
788
+
789
+ - Fixed tool argument validation to iteratively coerce nested JSON strings across multiple passes, enabling proper handling of deeply nested JSON-serialized objects and arrays
790
+
791
+ ## [12.13.0] - 2026-02-19
792
+
793
+ ### Added
794
+
795
+ - Added NanoGPT provider support with API-key login, dynamic model discovery from `https://nano-gpt.com/api/v1/models`, and text-model filtering for catalog/runtime discovery ([#111](https://github.com/can1357/oh-my-pi/issues/111))
796
+
797
+ ## [12.12.3] - 2026-02-19
798
+
799
+ ### Fixed
800
+
801
+ - Fixed retry logic to recognize 'unable to connect' errors as transient failures
802
+
803
+ ## [12.11.3] - 2026-02-19
804
+
805
+ ### Fixed
806
+
807
+ - Fixed OpenAI Codex streaming to fail truncated responses that end without a terminal completion event, preventing partial outputs from being treated as successful completions.
808
+ - Fixed Codex websocket append fallback by resetting stale turn-state/model-etag session metadata when request shape diverges from appendable history.
809
+
810
+ ## [12.11.1] - 2026-02-19
811
+
812
+ ### Added
813
+
814
+ - Added support for Claude 4.6 Opus and Sonnet models via Cursor API
815
+ - Added support for Composer 1.5 model via Cursor API
816
+ - Added support for GPT-5.1 Codex Mini and GPT-5.1 High models via Cursor API
817
+ - Added support for GPT-5.2 and GPT-5.3 Codex variants (Fast, High, Low, Extra High) via Cursor API
818
+ - Added HTTP/2 transport support for Cursor API requests (required by Cursor API)
819
+
820
+ ### Changed
821
+
822
+ - Updated pricing for Claude 3.5 Sonnet model
823
+ - Updated Claude 3.5 Sonnet context window from 262,144 to 131,072 tokens
824
+ - Simplified Cursor model display names by removing '(Cursor)' suffix
825
+ - Changed Cursor API timeout from 15 seconds to 5 seconds
826
+ - Switched Cursor API transport from HTTP/1.1 to HTTP/2
827
+
828
+ ## [12.11.0] - 2026-02-19
829
+
830
+ ### Added
831
+
832
+ - Added `priority` field to Model interface for provider-assigned model prioritization
833
+ - Added `CatalogDiscoveryConfig` interface to standardize catalog discovery configuration across providers
834
+ - Added type guards `isCatalogDescriptor()` and `allowsUnauthenticatedCatalogDiscovery()` for safer descriptor handling
835
+ - Added `DEFAULT_MODEL_PER_PROVIDER` export from descriptors module for centralized default model management
836
+ - Support for 11 new AI providers: Cloudflare AI Gateway, Hugging Face Inference, LiteLLM, Moonshot, NVIDIA, Ollama, Qianfan, Qwen Portal, Together, Venice, vLLM, and Xiaomi MiMo
837
+ - Login flows for new providers with API key validation and OAuth token support
838
+ - Extended `KnownProvider` type to include all newly supported providers
839
+ - API key environment variable mappings for all new providers in service provider map
840
+ - Model discovery and configuration for Cloudflare AI Gateway, Hugging Face, LiteLLM, Moonshot, NVIDIA, Ollama, Qianfan, Qwen Portal, Together, Venice, vLLM, and Xiaomi MiMo
841
+
842
+ ### Changed
843
+
844
+ - Refactored OAuth credential retrieval to simplify storage lifecycle management in model generation script
845
+ - Parallelized special model discovery sources (Antigravity, Codex) for improved generation performance
846
+ - Reorganized model JSON structure to place `contextWindow` and `maxTokens` before `compat` field for consistency
847
+ - Added `priority` field to OpenAI Codex models for provider-assigned model prioritization
848
+ - Refactored provider descriptors to use helper functions (`descriptor`, `catalog`, `catalogDescriptor`) for reduced code duplication
849
+ - Refactored models.dev provider descriptors to use helper functions (`simpleModelsDevDescriptor`, `openAiCompletionsDescriptor`, `anthropicMessagesDescriptor`) for improved maintainability
850
+ - Unified provider descriptors into single source of truth in `descriptors.ts` for both runtime model discovery and catalog generation, improving maintainability
851
+ - Refactored model generation script to use declarative `CatalogProviderDescriptor` interface instead of separate descriptor types, reducing code duplication
852
+ - Reorganized models.dev provider descriptors into logical groups (Bedrock, Core, Coding Plans, Specialized) for better code organization
853
+ - Simplified API resolution for OpenCode and GitHub Copilot providers using rule-based matching instead of inline conditionals
854
+ - Refactored model generation script to use declarative provider descriptors instead of inline provider-specific logic, improving maintainability and reducing code duplication
855
+ - Extracted model post-processing policies (cache pricing corrections, context window normalization) into dedicated `model-policies.ts` module for better testability and clarity
856
+ - Removed static bundled models for Ollama and vLLM from `models.json` to rely on dynamic discovery instead, reducing static catalog size
857
+ - Updated `OAuthProvider` type to include new provider identifiers
858
+ - Expanded model registry (models.json) with thousands of new model entries across all new providers
859
+ - Modified environment variable resolution to use `$pickenv` for providers with multiple possible env var names
860
+ - Updated README documentation to list all newly supported providers and their authentication requirements
861
+
862
+ ## [12.10.1] - 2026-02-18
863
+
864
+ - Added Synthetic provider
865
+ - Added API-key login helpers for Synthetic and Cerebras providers
866
+
867
+ ## [12.10.0] - 2026-02-18
868
+
869
+ ### Breaking Changes
870
+
871
+ - Renamed public API functions: `getModel()` → `getBundledModel()`, `getModels()` → `getBundledModels()`, `getProviders()` → `getBundledProviders()`
872
+
873
+ ### Added
874
+
875
+ - Exported `ModelManager` API for runtime-aware model resolution with dynamic endpoint discovery
876
+ - Exported provider-specific model manager configuration helpers for Google, OpenAI-compatible, Codex, and Cursor providers
877
+ - Exported discovery utilities for fetching models from Antigravity, Codex, Cursor, Gemini, and OpenAI-compatible endpoints
878
+ - Added `createModelManager()` function to manage bundled and dynamically discovered models with configurable refresh strategies
879
+ - Added support for on-disk model caching with TTL-based invalidation
880
+ - Added `resolveProviderModels()` function for runtime model resolution across multiple providers
881
+ - Added EU cross-region inference variants for Claude Haiku 3.5 on Bedrock
882
+ - Added Claude Sonnet 4.6 and Claude Sonnet 4.6 Thinking models to Antigravity provider
883
+ - Added GLM-5 Free model via OpenCode provider
884
+ - Added GLM-4.7-FlashX model via ZAI provider
885
+ - Added MiniMax-M2.5-highspeed model across multiple providers (minimax-code, minimax-code-cn, minimax, minimax-cn)
886
+ - Added Claude Sonnet 4.6 model to OpenRouter provider
887
+ - Added Qwen 3.5 Plus model to Vercel AI Gateway provider
888
+ - Added Claude Sonnet 4.6 model to Vercel AI Gateway provider
889
+
890
+ ### Changed
891
+
892
+ - Renamed `getModel()` to `getBundledModel()` to clarify it returns compile-time bundled models only
893
+ - Renamed `getModels()` to `getBundledModels()` for consistency
894
+ - Renamed `getProviders()` to `getBundledProviders()` for consistency
895
+ - Refactored model generation script to use modular discovery functions instead of monolithic provider-specific logic
896
+ - Updated models.json with new model entries and pricing updates across multiple providers
897
+ - Updated pricing for deepseek/deepseek-v3 model on OpenRouter
898
+ - Updated maxTokens from 65536 to 4096 for deepseek/deepseek-v3 on OpenRouter
899
+ - Updated pricing and maxTokens for mistralai/mistral-large-2411 on OpenRouter
900
+ - Updated pricing for qwen/qwen-max on Together AI
901
+ - Updated pricing for qwen/qwen-vl-plus on Together AI
902
+ - Updated pricing for qwen/qwen-plus on Together AI
903
+ - Updated pricing for qwen/qwen-turbo on Together AI
904
+ - Expanded EU cross-region inference variant support to all Claude models on Bedrock (previously limited to Haiku, Sonnet, and Opus 4.5)
905
+
906
+ ## [12.8.0] - 2026-02-16
907
+
908
+ ### Added
909
+
910
+ - Added `contextPromotionTarget` model property to specify preferred fallback model when context promotion is triggered
911
+ - Added automatic context promotion target assignment for Spark models to their base model equivalents
912
+ - Added support for Brave search provider with BRAVE_API_KEY environment variable
913
+
914
+ ### Changed
915
+
916
+ - Updated Qwen model context window and max token limits for improved accuracy
917
+
918
+ ## [12.7.0] - 2026-02-16
919
+
920
+ ### Added
921
+
922
+ - Added DeepSeek-V3.2 model support via Amazon Bedrock
923
+ - Added GLM-5 model support via OpenCode
924
+ - Added MiniMax M2.5 model support via OpenCode
925
+
926
+ ### Changed
927
+
928
+ - Updated GLM-4.5, GLM-4.5-Air, GLM-4.5-Flash, GLM-4.5V, GLM-4.6, GLM-4.6V, GLM-4.7, GLM-4.7-Flash, and GLM-5 models to use anthropic-messages API instead of openai-completions
929
+ - Updated GLM models base URL from https://api.z.ai/api/coding/paas/v4 to https://api.z.ai/api/anthropic
930
+ - Updated pricing for multiple models including Mistral, Moonshot, and Qwen variants
931
+ - Updated context window and max tokens for several models to reflect accurate specifications
932
+
933
+ ### Removed
934
+
935
+ - Removed compat field with supportsDeveloperRole and thinkingFormat properties from GLM models
936
+
937
+ ## [12.6.0] - 2026-02-16
938
+
939
+ ### Added
940
+
941
+ - Added source-scoped custom API and OAuth provider registration helpers for extension-defined providers.
942
+
943
+ ### Changed
944
+
945
+ - Expanded `Api` typing to allow extension-defined API identifiers while preserving built-in API exhaustiveness checks.
946
+
947
+ ### Fixed
948
+
949
+ - Fixed custom API registration to reject built-in API identifiers and prevent accidental provider overrides.
950
+
951
+ ## [12.2.0] - 2026-02-13
952
+
953
+ ### Added
954
+
955
+ - Added automatic retry logic for WebSocket stream closures before response completion, with configurable retry budget to improve reliability on flaky connections
956
+ - Added `providerSessionState` option to enable provider-scoped mutable state persistence across agent turns
957
+ - Added WebSocket retry logic with configurable retry budget and delay via `PI_CODEX_WEBSOCKET_RETRY_BUDGET` and `PI_CODEX_WEBSOCKET_RETRY_DELAY_MS` environment variables
958
+ - Added WebSocket idle timeout detection via `PI_CODEX_WEBSOCKET_IDLE_TIMEOUT_MS` environment variable to fail stalled connections
959
+ - Added WebSocket v2 beta header support via `PI_CODEX_WEBSOCKET_V2` environment variable for newer OpenAI API versions
960
+ - Added WebSocket handshake header capture to extract and replay session metadata (turn state, models etag, reasoning flags) across SSE fallback requests
961
+ - Added `preferWebsockets` option to enable WebSocket transport for OpenAI Codex responses when supported
962
+ - Added `prewarmOpenAICodexResponses()` function to establish and reuse WebSocket connections across multiple requests
963
+ - Added `getOpenAICodexTransportDetails()` function to inspect transport layer details including WebSocket status and fallback information
964
+ - Added `getProviderDetails()` function to retrieve formatted provider configuration and transport information
965
+ - Added automatic fallback from WebSocket to SSE when connection fails, with transparent retry logic
966
+ - Added session state management to reuse WebSocket connections and enable request appending across turns
967
+ - Added support for x-codex-turn-state header to maintain conversation state across SSE requests
968
+
969
+ ### Changed
970
+
971
+ - Changed WebSocket session state storage from global maps to provider-scoped session state for multi-agent isolation
972
+ - Changed WebSocket connection initialization to accept idle timeout configuration and handshake header callbacks
973
+ - Changed WebSocket error handling to use standardized transport error messages with `Codex websocket transport error` prefix
974
+ - Changed WebSocket retry behavior to retry transient failures before activating sticky fallback, improving reliability on flaky connections
975
+ - Changed OpenAI Codex model configuration to prefer WebSocket transport by default with `preferWebsockets: true`
976
+ - Changed header handling to use appropriate OpenAI-Beta header values for WebSocket vs SSE transports
977
+ - Perplexity OAuth token refresh now uses JWT expiry extraction instead of Socket.IO RPC, improving reliability when server is unreachable
978
+ - Removed Socket.IO client implementation for Perplexity token refresh; tokens are now validated using embedded JWT expiry claims
979
+
980
+ ### Removed
981
+
982
+ - Removed `refreshPerplexityToken` export; token refresh is now handled internally via JWT expiry detection
983
+
984
+ ### Fixed
985
+
986
+ - Fixed WebSocket stream retry logic to properly handle mid-stream connection closures and retry before falling back to SSE transport
987
+ - Fixed `preferWebsockets` option handling to correctly respect explicit `false` values when determining transport preference
988
+ - Fixed WebSocket append state not being reset after aborted requests, preventing stale state from affecting subsequent turns
989
+ - Fixed WebSocket append state not being reset after stream errors, preventing failed append attempts from blocking future requests
990
+ - Fixed Codex model context window metadata to use 272000 input tokens (instead of 400000 total budget) for non-Spark Codex variants
991
+
992
+ ## [12.0.0] - 2026-02-12
993
+
994
+ ### Added
995
+
996
+ - Added GPT-5.3 Codex Spark model with 128K context window and extended reasoning capabilities
997
+ - Added MiniMax M2.5 and M2.5 Lightning models via OpenAI-compatible API (minimax-code provider)
998
+ - Added MiniMax M2.5 and M2.5 Lightning models via OpenAI-compatible API (minimax-code-cn provider for China region)
999
+ - Added MiniMax M2.5 and M2.5 Lightning models via Anthropic API (minimax and minimax-cn providers)
1000
+ - Added Llama 3.1 8B model via Cerebras API
1001
+ - Added MiniMax M2.5 model via OpenRouter
1002
+ - Added MiniMax M2.5 model via Vercel AI Gateway
1003
+ - Added MiniMax M2.5 Free model via OpenCode
1004
+ - Added Qwen3 VL 32B Instruct multimodal model via OpenRouter
1005
+
1006
+ ### Changed
1007
+
1008
+ - Updated Z.ai GLM-5 pricing and context window configuration on OpenRouter
1009
+ - Updated Qwen3 Max Thinking max tokens from 32768 to 65536 on OpenRouter
1010
+ - Updated OpenAI GPT-5 Image Mini pricing on OpenRouter
1011
+ - Updated OpenAI GPT-5 Pro pricing and context window on OpenRouter
1012
+ - Updated OpenAI o4-mini pricing and context window on OpenRouter
1013
+ - Updated Claude Opus 4.5 Thinking model name formatting (removed parentheses)
1014
+ - Updated Claude Opus 4.6 Thinking model name formatting (removed parentheses)
1015
+ - Updated Claude Sonnet 4.5 Thinking model name formatting (removed parentheses)
1016
+ - Updated Gemini 2.5 Flash Thinking model name formatting (removed parentheses)
1017
+ - Updated Gemini 3 Pro High and Low model name formatting (removed parentheses)
1018
+ - Updated GPT-OSS 120B Medium model name formatting (removed parentheses) and context window to 131072
1019
+
1020
+ ### Removed
1021
+
1022
+ - Removed GLM-5 model from Z.ai provider
1023
+ - Removed Trinity Large Preview Free model from OpenCode provider
1024
+ - Removed MiniMax M2.1 Free model from OpenCode provider
1025
+ - Removed deprecated Anthropic model entries: `claude-3-5-haiku-latest`, `claude-3-5-haiku-20241022`, `claude-3-7-sonnet-20250219`, `claude-3-7-sonnet-latest`, `claude-3-opus-20240229`, `claude-3-sonnet-20240229` ([#33](https://github.com/can1357/oh-my-pi/issues/33))
1026
+
1027
+ ### Fixed
1028
+
1029
+ - Added deprecation filter in model generation script to prevent re-adding deprecated Anthropic models ([#33](https://github.com/can1357/oh-my-pi/issues/33))
1030
+
1031
+ ## [11.14.1] - 2026-02-12
1032
+
1033
+ ### Added
1034
+
1035
+ - Added prompt-caching-scope-2026-01-05 beta feature support
1036
+
1037
+ ### Changed
1038
+
1039
+ - Updated Claude Code version header to 2.1.39
1040
+ - Updated runtime version header to v24.13.1 and package version to 0.73.0
1041
+ - Increased request timeout from 60s to 600s
1042
+ - Reordered Accept-Encoding header values for compression preference
1043
+ - Updated OAuth authorization and token endpoints to use platform.claude.com
1044
+ - Expanded OAuth scopes to include user:sessions:claude_code and user:mcp_servers
1045
+
1046
+ ### Removed
1047
+
1048
+ - Removed claude-code-20250219 beta feature from default models
1049
+ - Removed fine-grained-tool-streaming-2025-05-14 beta feature
1050
+
1051
+ ## [11.13.1] - 2026-02-12
1052
+
1053
+ ### Added
1054
+
1055
+ - Added Perplexity (Pro/Max) OAuth login support via native macOS app extraction or email OTP authentication
1056
+ - Added `loginPerplexity` and `refreshPerplexityToken` functions for Perplexity account integration
1057
+ - Added Socket.IO v4 client implementation for authenticated WebSocket communication with Perplexity API
1058
+
1059
+ ## [11.12.0] - 2026-02-11
1060
+
1061
+ ### Changed
1062
+
1063
+ - Increased maximum retry attempts for Codex requests from 2 to 5 to improve reliability on transient failures
1064
+
1065
+ ### Fixed
1066
+
1067
+ - Fixed tool result content handling in Anthropic provider to provide fallback error message when content is empty
1068
+ - Improved retry delay calculation to parse delay values from error response bodies (e.g., 'Please try again in 225ms')
1069
+
1070
+ ## [11.11.0] - 2026-02-10
1071
+
1072
+ ### Breaking Changes
1073
+
1074
+ - Replaced `./models.generated` export with `./models.json` - update imports from `import { MODELS } from './models.generated'` to `import MODELS from './models.json' with { type: 'json' }`
1075
+
1076
+ ### Added
1077
+
1078
+ - Added TypeScript type declarations for `models.json` to enable proper type inference when importing the JSON file
1079
+
1080
+ ### Changed
1081
+
1082
+ - Updated available models in google-antigravity provider with new model variants and updated context window/token limits
1083
+ - Simplified type signatures for `getModel()` and `getModels()` functions for improved usability
1084
+ - Changed models export from TypeScript module to JSON format for improved performance and reduced bundle size
1085
+ - Updated `@anthropic-ai/sdk` dependency from ^0.72.1 to ^0.74.0
1086
+
1087
+ ## [11.10.0] - 2026-02-10
1088
+
1089
+ ### Added
1090
+
1091
+ - Added support for Kimi K2, K2 Turbo Preview, and K2.5 models with reasoning capabilities
1092
+
1093
+ ### Fixed
1094
+
1095
+ - Fixed Claude Opus 4.6 context window to 200K across all providers (was incorrectly set to 1M)
1096
+ - Fixed Claude Sonnet 4 context window to 200K across multiple providers (was incorrectly set to 1M)
1097
+
1098
+ ## [11.8.0] - 2026-02-10
1099
+
1100
+ ### Added
1101
+
1102
+ - Added `auto` model alias for OpenRouter with automatic model routing
1103
+ - Added `openrouter/aurora-alpha` model with reasoning capabilities
1104
+ - Added `qwen/qwen3-max-thinking` model with extended context window support
1105
+ - Added support for `parametersJsonSchema` in Google Gemini tool definitions for improved JSON Schema compatibility
1106
+
1107
+ ### Changed
1108
+
1109
+ - Updated Claude Sonnet 4 and 4.5 context window from 1M to 200K tokens to reflect actual limits
1110
+ - Updated Claude Opus 4.6 context window to 200K tokens across providers
1111
+ - Changed default `reasoningSummary` for OpenAI Codex from `undefined` to `auto`
1112
+ - Updated Qwen model pricing and context window specifications across multiple variants
1113
+ - Modified Google Gemini CLI system instruction to use compact format
1114
+ - Changed tool parameter handling for Claude models on Google Cloud Code Assist to use legacy `parameters` field for API translation
1115
+
1116
+ ### Removed
1117
+
1118
+ - Removed `glm-4.7-free` model from OpenCode provider
1119
+ - Removed `qwen3-coder` model from OpenCode provider
1120
+ - Removed `ai21/jamba-mini-1.7` model from OpenRouter
1121
+ - Removed `stepfun-ai/step3` model from OpenRouter
1122
+ - Removed duplicate test suite for Google Antigravity Provider with `gemini-3-pro-high`
1123
+
1124
+ ### Fixed
1125
+
1126
+ - Fixed Amazon Bedrock HTTP/1.1 handler import to use direct import instead of dynamic import
1127
+ - Fixed Qwen model context window and pricing inconsistencies across OpenRouter
1128
+ - Fixed cache read pricing for multiple Qwen models
1129
+ - Fixed OpenAI Codex reasoning effort clamping for `gpt-5.3-codex` model
1130
+
1131
+ ## [11.7.1] - 2026-02-07
1132
+
1133
+ ### Added
1134
+
1135
+ - Added Claude Opus 4.6 Thinking model for Antigravity provider
1136
+ - Added Gemini 2.5 Flash, Gemini 2.5 Flash Thinking, and Gemini 2.5 Pro models for Antigravity provider
1137
+ - Added Pony Alpha model via OpenRouter
1138
+
1139
+ ### Changed
1140
+
1141
+ - Updated Antigravity models to use free tier pricing (0 cost) across all models
1142
+ - Changed Antigravity model fetching to dynamically load from API when credentials are available, with hardcoded fallback models
1143
+ - Updated Claude Opus 4.6 context window from 200,000 to 1,000,000 tokens across Bedrock regions
1144
+ - Updated Claude Opus 4.6 cache pricing from 1.5/18.75 to 0.5/6.25 for EU and US regions
1145
+ - Updated Antigravity model pricing to free tier (0 cost) for Claude Opus 4.5 Thinking, Claude Sonnet 4.5 Thinking, Gemini 3 Flash, Gemini 3 Pro variants, and GPT-OSS 120B Medium
1146
+ - Updated GPT-OSS 120B Medium reasoning capability from false to true
1147
+ - Updated Gemini 3 Flash max tokens from 65,535 to 65,536
1148
+ - Updated Claude Opus 4.5 Thinking display name formatting to include parentheses
1149
+ - Updated various model pricing and context window parameters across OpenRouter and other providers
1150
+ - Removed Claude Opus 4.6 20260205 model from Anthropic provider
1151
+
1152
+ ### Fixed
1153
+
1154
+ - Fixed Claude Opus 4.6 model ID format by removing version suffix (:0) in Bedrock configurations
1155
+ - Fixed Llama 3.1 70B Instruct pricing and context window parameters
1156
+ - Fixed Mistral model pricing and cache read costs
1157
+ - Fixed DeepSeek and other model pricing inconsistencies
1158
+ - Fixed Qwen model pricing and token limits
1159
+ - Fixed GLM model pricing and context window specifications
1160
+
1161
+ ## [11.6.0] - 2026-02-07
1162
+
1163
+ ### Added
1164
+
1165
+ - Added Bedrock cache retention support with `PI_CACHE_RETENTION` env var and per-request `cacheRetention` option
1166
+ - Added adaptive thinking support for Bedrock Opus 4.6+ models
1167
+ - Added `AWS_BEDROCK_SKIP_AUTH` env var to support unauthenticated Bedrock proxies
1168
+ - Added `AWS_BEDROCK_FORCE_HTTP1` env var to force HTTP/1.1 for custom Bedrock endpoints
1169
+ - Re-exported `Static`, `TSchema`, and `Type` from `@sinclair/typebox`
1170
+
1171
+ ### Fixed
1172
+
1173
+ - Fixed OpenAI Responses storage disabled by default (`store: false`)
1174
+ - Fixed reasoning effort clamping for gpt-5.3 Codex models (minimal -> low)
1175
+ - Fixed Bedrock `supportsPromptCaching` to also check model cost fields
1176
+
1177
+ ## [11.5.1] - 2026-02-07
1178
+
1179
+ ### Fixed
1180
+
1181
+ - Fixed schema normalization to handle array-valued `type` fields by converting them to a single type with nullable flag for Google provider compatibility
1182
+
1183
+ ## [11.3.0] - 2026-02-06
1184
+
1185
+ ### Added
1186
+
1187
+ - Added `cacheRetention` option to control prompt cache retention preference ('none', 'short', 'long') across providers
1188
+ - Added `maxRetryDelayMs` option to cap server-requested retry delays and fail fast when delays exceed the limit
1189
+ - Added `effort` option for Anthropic Opus 4.6+ models to control adaptive thinking effort levels ('low', 'medium', 'high', 'max')
1190
+ - Added support for Anthropic Opus 4.6+ adaptive thinking mode that lets Claude decide when and how much to think
1191
+ - Added `PI_AI_ANTIGRAVITY_VERSION` environment variable to customize Antigravity sandbox endpoint version
1192
+ - Exported `convertAnthropicMessages` function for converting message formats to Anthropic API
1193
+ - Automatic fallback for Anthropic assistant-prefill requests: appends synthetic user "Continue." message when conversation ends with assistant turn to maintain API compatibility
1194
+
1195
+ ### Changed
1196
+
1197
+ - Changed `supportsXhigh()` to include GPT-5.1 Codex Max and broaden Anthropic support to all Anthropic Messages API models with budget-based thinking capability
1198
+ - Changed Anthropic thinking mode to use adaptive thinking for Opus 4.6+ models instead of budget-based thinking
1199
+ - Changed `supportsXhigh()` to support GPT-5.2/5.3 and Anthropic Opus 4.6+ models with adaptive thinking
1200
+ - Changed prompt caching to respect `cacheRetention` option and support TTL configuration for Anthropic
1201
+ - Changed OpenAI tool definitions to conditionally include `strict` field only when provider supports it
1202
+ - Changed Qwen model support to use `enable_thinking` boolean parameter instead of OpenAI-style reasoning_effort
1203
+
1204
+ ### Fixed
1205
+
1206
+ - Fixed indentation and formatting in `convertAnthropicMessages` function
1207
+ - Fixed handling of conversations ending with assistant messages on Anthropic-routed models that reject assistant prefill requests
1208
+
1209
+ ## [11.2.3] - 2026-02-05
1210
+
1211
+ ### Added
1212
+
1213
+ - Added Claude Opus 4.6 model support across multiple providers (Anthropic, Amazon Bedrock, GitHub Copilot, OpenRouter, OpenCode, Vercel AI Gateway)
1214
+ - Added GPT-5.3 Codex model support for OpenAI
1215
+ - Added `readSseJson` utility import for improved SSE stream handling in Google Gemini CLI provider
1216
+
1217
+ ### Changed
1218
+
1219
+ - Updated Google Gemini CLI provider to use `readSseJson` utility for cleaner SSE stream parsing
1220
+ - Updated pricing for Llama 3.1 405B model on Vercel AI Gateway (cache read rate adjusted)
1221
+ - Updated Llama 3.1 405B context window and max tokens on Vercel AI Gateway (256000 for both)
1222
+
1223
+ ### Removed
1224
+
1225
+ - Removed Kimi K2, Kimi K2 Turbo Preview, and Kimi K2.5 models
1226
+ - Removed Deep Cogito Cogito V2 Preview models from OpenRouter
1227
+
1228
+ ## [11.0.0] - 2026-02-05
1229
+
1230
+ ### Changed
1231
+
1232
+ - Replaced direct `Bun.env` access with `getEnv()` utility from `@oh-my-pi/pi-utils` for consistent environment variable handling across all providers
1233
+ - Updated environment variable names from `OMP_*` prefix to `PI_*` prefix for consistency (e.g., `OMP_CODING_AGENT_DIR` → `PI_CODING_AGENT_DIR`)
1234
+
1235
+ ### Removed
1236
+
1237
+ - Removed automatic environment variable migration from `PI_*` to `OMP_*` prefixes via `migrate-env.ts` module
1238
+
1239
+ ## [10.5.0] - 2026-02-04
1240
+
1241
+ ### Changed
1242
+
1243
+ - Updated @anthropic-ai/sdk to ^0.72.1
1244
+ - Updated @aws-sdk/client-bedrock-runtime to ^3.982.0
1245
+ - Updated @google/genai to ^1.39.0
1246
+ - Updated @smithy/node-http-handler to ^4.4.9
1247
+ - Updated openai to ^6.17.0
1248
+ - Updated @types/node to ^25.2.0
1249
+
1250
+ ### Removed
1251
+
1252
+ - Removed proxy-agent dependency
1253
+ - Removed undici dependency
1254
+
1255
+ ## [9.4.0] - 2026-01-31
1256
+
1257
+ ### Added
1258
+
1259
+ - Added `getEnv()` function to retrieve environment variables from Bun.env, cwd/.env, or ~/.env
1260
+ - Added support for reading .env files from home directory and current working directory
1261
+ - Added support for `exa` and `perplexity` as known providers in `getEnvApiKey()`
1262
+
1263
+ ### Changed
1264
+
1265
+ - Changed `getEnvApiKey()` to check Bun.env, cwd/.env, and ~/.env files in order of precedence
1266
+ - Refactored provider API key resolution to use a declarative service provider map
1267
+
1268
+ ## [9.2.2] - 2026-01-31
1269
+
1270
+ ### Added
1271
+
1272
+ - Added OpenCode Zen provider with API key authentication for accessing multiple AI models
1273
+ - Added 4 new free models via OpenCode: glm-4.7-free, kimi-k2.5-free, minimax-m2.1-free, trinity-large-preview-free
1274
+ - Added glm-4.7-flash model via Zai provider
1275
+ - Added Kimi Code provider with OpenAI and Anthropic API format support
1276
+ - Added prompt cache retention support with PI_CACHE_RETENTION env var
1277
+ - Added overflow patterns for Bedrock, MiniMax, Kimi; reclassified 429 as rate limiting
1278
+ - Added profile endpoint integration to resolve user emails with 24-hour caching
1279
+ - Added automatic token refresh for expired Kimi OAuth credentials
1280
+ - Added Kimi Code OAuth handler with device authorization flow
1281
+ - Added Kimi Code usage provider with quota caching
1282
+ - Added 4 new Kimi Code models (kimi-for-coding, kimi-k2, kimi-k2-turbo-preview, kimi-k2.5)
1283
+ - Added Kimi Code provider integration with OAuth and token management
1284
+ - Added tool-choice utility for mapping unified ToolChoice to provider-specific formats
1285
+ - Added ToolChoice type for controlling tool selection (auto, none, any, required, function)
1286
+
1287
+ ### Changed
1288
+
1289
+ - Updated Kimi K2.5 cache read pricing from 0.1 to 0.08
1290
+ - Updated MiniMax M2 pricing: input 0.6→0.6, output 3→3, cache read 0.1→0.09999999999999999
1291
+ - Updated OpenRouter DeepSeek V3.1 pricing and max tokens: input 0.6→0.5, output 3→2.8, maxTokens 262144→4096
1292
+ - Updated OpenRouter DeepSeek R1 pricing and max tokens: input 0.06→0.049999999999999996, output 0.24→0.19999999999999998, maxTokens 262144→4096
1293
+ - Updated Anthropic Claude 3.5 Sonnet max tokens from 256000 to 65536 on OpenRouter
1294
+ - Updated Vercel AI Gateway Claude 3.5 Sonnet cache read pricing from 0.125 to 0.13
1295
+ - Updated Vercel AI Gateway Claude 3.5 Sonnet New cache read pricing from 0.125 to 0.13
1296
+ - Updated Vercel AI Gateway GPT-5.2 cache read pricing from 0.175 to 0.18 and display name to 'GPT 5.2'
1297
+ - Updated Zai GLM-4.6 cache read pricing from 0.024999999999999998 to 0.03
1298
+ - Updated Zai Qwen QwQ max tokens from 66000 to 16384
1299
+ - Added delta event batching and throttling (50ms, 20 updates/sec max) to AssistantMessageEventStream
1300
+ - Updated MiniMax-M2 pricing: input 1.2→0.6, output 1.2→3, cacheRead 0.6→0.1
1301
+
1302
+ ### Removed
1303
+
1304
+ - Removed OpenRouter google/gemini-2.0-flash-exp:free model
1305
+ - Removed Vercel AI Gateway stealth/sonoma-dusk-alpha and stealth/sonoma-sky-alpha models
1306
+
1307
+ ### Fixed
1308
+
1309
+ - Fixed rate limit issues with Kimi models by always sending max_tokens
1310
+ - Added handling for sensitive stop reason from Anthropic API safety filters
1311
+ - Added optional chaining for safer JSON schema property access in Anthropic provider
1312
+
1313
+ ## [8.6.0] - 2026-01-27
1314
+
1315
+ ### Changed
1316
+
1317
+ - Replaced JSON5 dependency with Bun.JSON5 parsing
1318
+
1319
+ ### Fixed
1320
+
1321
+ - Filtered empty user text blocks for OpenAI-compatible completions and normalized Kimi reasoning_content for OpenRouter tool-call messages
1322
+
1323
+ ## [8.4.0] - 2026-01-25
1324
+
1325
+ ### Added
1326
+
1327
+ - Added Azure OpenAI Responses provider with deployment mapping and resource-based base URL support
1328
+
1329
+ ### Changed
1330
+
1331
+ - Added OpenRouter routing preferences for OpenAI-compatible completions
1332
+
1333
+ ### Fixed
1334
+
1335
+ - Defaulted Google tool call arguments to empty objects when providers omit args
1336
+ - Guarded Responses/Codex streaming deltas against missing content parts and handled arguments.done events
1337
+
1338
+ ## [8.2.1] - 2026-01-24
1339
+
1340
+ ### Fixed
1341
+
1342
+ - Fixed handling of streaming function call arguments in OpenAI responses to properly parse arguments when sent via `response.function_call_arguments.done` events
1343
+
1344
+ ## [8.2.0] - 2026-01-24
1345
+
1346
+ ### Changed
1347
+
1348
+ - Migrated node module imports from named to namespace imports across all packages for consistency with project guidelines
1349
+
1350
+ ## [8.0.0] - 2026-01-23
1351
+
1352
+ ### Fixed
1353
+
1354
+ - Fixed OpenAI Responses API 400 error "function_call without required reasoning item" when switching between models (same provider, different model). The fix omits the `id` field for function_calls from different models to avoid triggering OpenAI's reasoning/function_call pairing validation
1355
+ - Fixed 400 errors when reading multiple images via GitHub Copilot's Claude models. Claude requires tool_use -> tool_result adjacency with no user messages interleaved. Images from consecutive tool results are now batched into a single user message
1356
+
1357
+ ## [7.0.0] - 2026-01-21
1358
+
1359
+ ### Added
1360
+
1361
+ - Added usage tracking system with normalized schema for provider quota/limit endpoints
1362
+ - Added Claude usage provider for 5-hour and 7-day quota windows
1363
+ - Added GitHub Copilot usage provider for chat, completions, and premium requests
1364
+ - Added Google Antigravity usage provider for model quota tracking
1365
+ - Added Google Gemini CLI usage provider for tier-based quota monitoring
1366
+ - Added OpenAI Codex usage provider for primary and secondary rate limit windows
1367
+ - Added ZAI usage provider for token and request quota tracking
1368
+
1369
+ ### Changed
1370
+
1371
+ - Updated Claude usage provider to extract account identifiers from response headers
1372
+ - Updated GitHub Copilot usage provider to include account identifiers in usage reports
1373
+ - Updated Google Gemini CLI usage provider to handle missing reset time gracefully
1374
+
1375
+ ### Fixed
1376
+
1377
+ - Fixed GitHub Copilot usage provider to simplify token handling and improve reliability
1378
+ - Fixed GitHub Copilot usage provider to properly resolve account identifiers for OAuth credentials
1379
+ - Fixed API validation errors when sending empty user messages (resume with `.`) across all providers:
1380
+ - Google Cloud Code Assist (google-shared.ts)
1381
+ - OpenAI Responses API (openai-responses.ts)
1382
+ - OpenAI Codex Responses API (openai-codex-responses.ts)
1383
+ - Cursor (cursor.ts)
1384
+ - Amazon Bedrock (amazon-bedrock.ts)
1385
+ - Clamped OpenAI Codex reasoning effort "minimal" to "low" for gpt-5.2 models to avoid API errors
1386
+ - Fixed GitHub Copilot usage fallback to internal quota endpoints when billing usage is unavailable
1387
+ - Fixed GitHub Copilot usage metadata to include account identifiers for report dedupe
1388
+ - Fixed Anthropic usage metadata extraction to include account identifiers when provided by the usage endpoint
1389
+ - Fixed Gemini CLI usage windows to consistently label quota windows for display suppression
1390
+
1391
+ ## [6.9.69] - 2026-01-21
1392
+
1393
+ ### Added
1394
+
1395
+ - Added duration and time-to-first-token (ttft) metrics to all AI provider responses
1396
+ - Added performance tracking for streaming responses across all providers
1397
+
1398
+ ## [6.9.0] - 2026-01-21
1399
+
1400
+ ### Removed
1401
+
1402
+ - Removed openai-codex provider exports from main package index
1403
+ - Removed openai-codex prompt utilities and moved them inline
1404
+ - Removed vitest configuration file
1405
+
1406
+ ## [6.8.4] - 2026-01-21
1407
+
1408
+ ### Changed
1409
+
1410
+ - Updated prompt caching strategy to follow Anthropic's recommended hierarchy
1411
+ - Fixed token usage tracking to properly handle cumulative output tokens from message_delta events
1412
+ - Improved message validation to filter out empty or invalid content blocks
1413
+ - Increased OAuth callback timeout from 120 seconds to 120,000 milliseconds
1414
+
1415
+ ## [6.8.3] - 2026-01-21
1416
+
1417
+ ### Added
1418
+
1419
+ - Added `headers` option to all providers for custom request headers
1420
+ - Added `onPayload` hook to observe provider request payloads before sending
1421
+ - Added `strictResponsesPairing` option for Azure OpenAI Responses API compatibility
1422
+ - Added `originator` option to `loginOpenAICodex` for custom OAuth flow identification
1423
+ - Added per-request `headers` and `onPayload` hooks to `StreamOptions`
1424
+ - Added `originator` option to `loginOpenAICodex`
1425
+
1426
+ ### Fixed
1427
+
1428
+ - Fixed tool call ID normalization for OpenAI Responses API cross-provider handoffs
1429
+ - Skipped errored or aborted assistant messages during cross-provider transforms
1430
+ - Detected AWS ECS/IRSA credentials for Bedrock authentication checks
1431
+ - Detected AWS ECS/IRSA credentials for Bedrock authentication checks
1432
+ - Normalized Responses API tool call IDs during handoffs and refreshed handoff tests
1433
+ - Enforced strict tool call/result pairing for Azure OpenAI Responses API
1434
+ - Skipped errored or aborted assistant messages during cross-provider transforms
1435
+
1436
+ ### Security
1437
+
1438
+ - Enhanced AWS credential detection to support ECS task roles and IRSA web identity tokens
1439
+
1440
+ ## [6.8.2] - 2026-01-21
1441
+
1442
+ ### Fixed
1443
+
1444
+ - Improved error handling for aborted requests in Google Gemini CLI provider
1445
+ - Enhanced OAuth callback flow to handle manual input errors gracefully
1446
+ - Fixed login cancellation handling in GitHub Copilot OAuth flow
1447
+ - Removed fallback manual input from OpenAI Codex OAuth flow
1448
+
1449
+ ### Security
1450
+
1451
+ - Hardened database file permissions to prevent credential leakage
1452
+ - Set secure directory permissions (0o700) for credential storage
1453
+
1454
+ ## [6.8.0] - 2026-01-20
1455
+
1456
+ ### Added
1457
+
1458
+ - Added `logout` command to CLI for OAuth provider logout
1459
+ - Added `status` command to show logged-in providers and token expiry
1460
+ - Added persistent credential storage using SQLite database
1461
+ - Added OAuth callback server with automatic port fallback
1462
+ - Added HTML callback page with success/error states
1463
+ - Added support for Cursor OAuth provider
1464
+
1465
+ ### Changed
1466
+
1467
+ - Updated Promise.withResolvers usage for better compatibility
1468
+ - Replaced custom sleep implementations with Bun.sleep and abortableSleep
1469
+ - Simplified SSE stream parsing using readLines utility
1470
+ - Updated test framework from vitest to bun:test
1471
+ - Replaced temp directory creation with TempDir API
1472
+ - Changed credential storage from auth.json to ~/.omp/agent/agent.db
1473
+ - Changed CLI command examples from npx to bunx
1474
+ - Refactored OAuth flows to use common callback server base class
1475
+ - Updated OAuth provider interfaces to use controller pattern
1476
+
1477
+ ### Fixed
1478
+
1479
+ - Fixed OAuth callback handling with improved error states
1480
+ - Fixed token refresh for all OAuth providers
1481
+
1482
+ ## [6.7.670] - 2026-01-19
1483
+
1484
+ ### Changed
1485
+
1486
+ - Updated Claude Code compatibility headers and version
1487
+ - Improved OAuth token handling with proper state generation
1488
+ - Enhanced cache control for tool and user message blocks
1489
+ - Simplified tool name prefixing for OAuth traffic
1490
+ - Updated PKCE verifier generation for better security
1491
+
1492
+ ## [5.7.67] - 2026-01-18
1493
+
1494
+ ### Fixed
1495
+
1496
+ - Added error handling for unknown OAuth providers
1497
+
1498
+ ## [5.6.77] - 2026-01-18
1499
+
1500
+ ### Fixed
1501
+
1502
+ - Prevented duplicate tool results for errored or aborted messages when results already exist
1503
+
1504
+ ## [5.6.7] - 2026-01-18
1505
+
1506
+ ### Added
1507
+
1508
+ - Added automatic retry logic for OpenAI Codex responses with configurable delay and max retries
1509
+ - Added tool call ID sanitization for Amazon Bedrock to ensure valid characters
1510
+ - Added tool argument validation that coerces JSON-encoded strings for expected non-string types
1511
+
1512
+ ### Changed
1513
+
1514
+ - Updated environment variable prefix from PI* to OMP* for better consistency
1515
+ - Added automatic migration for legacy PI* environment variables to OMP* equivalents
1516
+ - Adjusted Bedrock Claude thinking budgets to reserve output tokens when maxTokens is too low
1517
+
1518
+ ### Fixed
1519
+
1520
+ - Fixed orphaned tool call handling to ensure proper tool_use/tool_result pairing for all assistant messages
1521
+ - Fixed message transformation to insert synthetic tool results for errored/aborted assistant messages with tool calls
1522
+ - Fixed tool prefix handling in Claude provider to use case-insensitive comparison
1523
+ - Fixed Gemini 3 model handling to treat unsigned tool calls as context-only with anti-mimicry context
1524
+ - Fixed message transformation to filter out empty error messages from conversation history
1525
+ - Fixed OpenAI completions provider compatibility detection to use provider metadata
1526
+ - Fixed OpenAI completions provider to avoid using developer role for opencode provider
1527
+ - Fixed orphaned tool call handling to skip synthetic results for errored assistant messages
1528
+
1529
+ ## [5.5.0] - 2026-01-18
1530
+
1531
+ ### Changed
1532
+
1533
+ - Updated User-Agent header from 'opencode' to 'pi' for OpenAI Codex requests
1534
+ - Simplified Codex system prompt instructions
1535
+ - Removed bridge text override from Codex system prompt builder
1536
+
1537
+ ## [5.3.0] - 2026-01-15
1538
+
1539
+ ### Changed
1540
+
1541
+ - Replaced detailed Codex system instructions with simplified pi assistant instructions
1542
+ - Updated internal documentation references to use pi-internal:// protocol
1543
+
1544
+ ## [5.1.0] - 2026-01-14
1545
+
1546
+ ### Added
1547
+
1548
+ - Added Amazon Bedrock provider with `bedrock-converse-stream` API for Claude models via AWS
1549
+ - Added MiniMax provider with OpenAI-compatible API
1550
+ - Added EU cross-region inference model variants for Claude models on Bedrock
1551
+
1552
+ ### Fixed
1553
+
1554
+ - Fixed Gemini CLI provider retries with proper error handling, retry delays from headers, and empty stream retry logic
1555
+ - Fixed numbered list items showing "1." for all items when code blocks break list continuity (via `start` property)
1556
+
1557
+ ## [5.0.0] - 2026-01-12
1558
+
1559
+ ### Added
1560
+
1561
+ - Added support for `xhigh` thinking level in `thinkingBudgets` configuration
1562
+
1563
+ ### Changed
1564
+
1565
+ - Changed Anthropic thinking token budgets: minimal (1024→3072), low (2048→6144), medium (8192→12288), high (16384→24576)
1566
+ - Changed Google thinking token budgets: minimal (1024), low (2048→4096), medium (8192), high (16384), xhigh (24575)
1567
+ - Changed `supportsXhigh()` to return true for all Anthropic models
1568
+
1569
+ ## [4.6.0] - 2026-01-12
1570
+
1571
+ ### Fixed
1572
+
1573
+ - Fixed incorrect classification of thought signatures in Google Gemini responses—thought signatures are now correctly treated as metadata rather than thinking content indicators
1574
+ - Fixed thought signature handling in Google Gemini CLI and Vertex AI streaming to properly preserve signatures across text deltas
1575
+ - Fixed Google schema sanitization stripping property names that match schema keywords (e.g., "pattern", "format") from tool definitions
1576
+
1577
+ ## [4.4.9] - 2026-01-12
1578
+
1579
+ ### Fixed
1580
+
1581
+ - Fixed Google provider schema sanitization to strip additional unsupported JSON Schema fields (patternProperties, additionalProperties, min/max constraints, pattern, format)
1582
+
1583
+ ## [4.4.8] - 2026-01-12
1584
+
1585
+ ### Fixed
1586
+
1587
+ - Fixed Google provider schema sanitization to properly collapse `anyOf`/`oneOf` with const values into enum arrays
1588
+ - Fixed const-to-enum conversion to infer type from the const value when type is not specified
1589
+
1590
+ ## [4.4.6] - 2026-01-11
1591
+
1592
+ ### Fixed
1593
+
1594
+ - Fixed tool parameter schema sanitization to only apply Google-specific transformations for Gemini models, preserving original schemas for other model types
1595
+
1596
+ ## [4.4.5] - 2026-01-11
1597
+
1598
+ ### Changed
1599
+
1600
+ - Exported `sanitizeSchemaForGoogle` utility function for external use
1601
+
1602
+ ### Fixed
1603
+
1604
+ - Fixed Google provider schema sanitization to strip additional unsupported JSON Schema fields ($schema, $ref, $defs, format, examples, and others)
1605
+ - Fixed Google provider to ignore `additionalProperties: false` which is unsupported by the API
1606
+
1607
+ ## [4.4.4] - 2026-01-11
1608
+
1609
+ ### Fixed
1610
+
1611
+ - Fixed Cursor todo updates to bridge update_todos tool calls to the local todo_write tool
1612
+
1613
+ ## [4.3.0] - 2026-01-11
1614
+
1615
+ ### Added
1616
+
1617
+ - Added debug log filtering and display script for Cursor JSONL logs with follow mode and coalescing support
1618
+ - Added protobuf definition extractor script to reconstruct .proto files from bundled JavaScript
1619
+ - Added conversation state caching to persist context across multiple Cursor API requests in the same session
1620
+ - Added shell streaming support for real-time stdout/stderr output during command execution
1621
+ - Added JSON5 parsing for MCP tool arguments with Python-style boolean and None value normalization
1622
+ - Added Cursor provider with support for Claude, GPT, and Gemini models via Cursor's agent API
1623
+ - Added OAuth authentication flow for Cursor including login, token refresh, and expiry detection
1624
+ - Added `cursor-agent` API type with streaming support and tool execution handlers
1625
+ - Added Cursor model definitions including Claude 4.5, GPT-5.x, Gemini 3, and Grok variants
1626
+ - Added model generation script to automatically fetch and update AI model definitions from models.dev and OpenRouter APIs
1627
+
1628
+ ### Changed
1629
+
1630
+ - Changed Cursor debug logging to use structured JSONL format with automatic MCP argument decoding
1631
+ - Changed MCP tool argument decoding to use protobuf Value schema for improved type handling
1632
+ - Changed tool advertisement to filter Cursor native tools (bash, read, write, delete, ls, grep, lsp) instead of only exposing mcp\_ prefixed tools
1633
+
1634
+ ### Fixed
1635
+
1636
+ - Fixed Cursor conversation history serialization so subagents retain task context and can call complete
1637
+
1638
+ ## [4.2.1] - 2026-01-11
1639
+
1640
+ ### Changed
1641
+
1642
+ - Updated `reasoningSummary` option to accept only `"auto"`, `"concise"`, `"detailed"`, or `null` (removed `"off"` and `"on"` values)
1643
+ - Changed default `reasoningSummary` from `"auto"` to `"detailed"`
1644
+ - OpenAI Codex: switched to bundled system prompt matching opencode, changed originator to "opencode", simplified prompt handling
1645
+
1646
+ ### Fixed
1647
+
1648
+ - Fixed Cloud Code Assist tool schema conversion to avoid unsupported `const` fields
1649
+
1650
+ ## [4.0.0] - 2026-01-10
1651
+
1652
+ ### Added
1653
+
1654
+ - Added `betas` option in `AnthropicOptions` for passing custom Anthropic beta feature flags
1655
+ - OpenCode Zen provider support with 26 models (Claude, GPT, Gemini, Grok, Kimi, GLM, Qwen, etc.). Set `OPENCODE_API_KEY` env var to use.
1656
+ - `thinkingBudgets` option in `SimpleStreamOptions` for customizing token budgets per thinking level on token-based providers
1657
+ - `sessionId` option in `StreamOptions` for providers that support session-based caching. OpenAI Codex provider uses this to set `prompt_cache_key` and routing headers.
1658
+ - `supportsUsageInStreaming` compatibility flag for OpenAI-compatible providers that reject `stream_options: { include_usage: true }`. Defaults to `true`. Set to `false` in model config for providers like gatewayz.ai.
1659
+ - `GOOGLE_APPLICATION_CREDENTIALS` env var support for Vertex AI credential detection (standard for CI/production)
1660
+ - Exported OpenAI Codex utilities: `CacheMetadata`, `getCodexInstructions`, `getModelFamily`, `ModelFamily`, `buildCodexPiBridge`, `buildCodexSystemPrompt`, `CodexSystemPrompt`
1661
+ - Headless OAuth support for all callback-server providers (Google Gemini CLI, Antigravity, OpenAI Codex): paste redirect URL when browser callback is unreachable
1662
+ - Cancellable GitHub Copilot device code polling via AbortSignal
1663
+ - Improved error messages for OpenRouter providers by including raw metadata from upstream errors
1664
+
1665
+ ### Changed
1666
+
1667
+ - Changed Anthropic provider to include Claude Code system instruction for all API key types, not just OAuth tokens (except Haiku models)
1668
+ - Changed Anthropic OAuth tool naming to use `proxy_` prefix instead of mapping to Claude Code tool names, avoiding potential name collisions
1669
+ - Changed Anthropic provider to include Claude Code headers for all requests, not just OAuth tokens
1670
+ - Anthropic provider now maps tool names to Claude Code's exact tool names (Read, Write, Edit, Bash, Grep, Glob) instead of using prefixed names
1671
+ - OpenAI Completions provider now disables strict mode on tools to allow optional parameters without null unions
1672
+
1673
+ ### Fixed
1674
+
1675
+ - Fixed Anthropic OAuth code parsing to accept full redirect URLs in addition to raw authorization codes
1676
+ - Fixed Anthropic token refresh to preserve existing refresh token when server doesn't return a new one
1677
+ - Fixed thinking mode being enabled when tool_choice forces a specific tool, which is unsupported
1678
+ - Fixed max_tokens being too low when thinking budget is set, now auto-adjusts to model's maxTokens
1679
+ - Google Cloud Code Assist OAuth for paid subscriptions: properly handles long-running operations for project provisioning, supports `GOOGLE_CLOUD_PROJECT` / `GOOGLE_CLOUD_PROJECT_ID` env vars for paid tiers
1680
+ - `os.homedir()` calls at module load time; now resolved lazily when needed
1681
+ - OpenAI Responses tool strict flag to use a boolean for LM Studio compatibility
1682
+ - Gemini CLI abort handling: detect native `AbortError` in retry catch block, cancel SSE reader when abort signal fires
1683
+ - Antigravity provider 429 errors by aligning request payload with CLIProxyAPI v6.6.89
1684
+ - Thinking block handling for cross-model conversations: thinking blocks are now converted to plain text when switching models
1685
+ - OpenAI Codex context window from 400,000 to 272,000 tokens to match Codex CLI defaults
1686
+ - Codex SSE error events to surface message, code, and status
1687
+ - Context overflow detection for `context_length_exceeded` error codes
1688
+ - Codex provider now always includes `reasoning.encrypted_content` even when custom `include` options are passed
1689
+ - Codex requests now omit the `reasoning` field entirely when thinking is off
1690
+ - Crash when pasting text with trailing whitespace exceeding terminal width
1691
+
1692
+ ## [3.37.1] - 2026-01-10
1693
+
1694
+ ### Added
1695
+
1696
+ - Added automatic type coercion for tool arguments when LLMs return JSON-encoded strings instead of native types (numbers, booleans, arrays, objects)
1697
+
1698
+ ### Changed
1699
+
1700
+ - Changed tool argument validation to attempt JSON parsing and type coercion before rejecting mismatched types
1701
+ - Changed validation error messages to include both original and normalized arguments when coercion was attempted
1702
+
1703
+ ## [3.37.0] - 2026-01-10
1704
+
1705
+ ### Changed
1706
+
1707
+ - Enabled type coercion in JSON schema validation to automatically convert compatible types
1708
+
1709
+ ## [3.35.0] - 2026-01-09
1710
+
1711
+ ### Added
1712
+
1713
+ - Enhanced error messages to include retry-after timing information from API rate limit headers
1714
+
1715
+ ## [0.42.0] - 2026-01-09
1716
+
1717
+ ### Added
1718
+
1719
+ - Added OpenCode Zen provider support with 26 models (Claude, GPT, Gemini, Grok, Kimi, GLM, Qwen, etc.). Set `OPENCODE_API_KEY` env var to use.
1720
+
1721
+ ## [0.39.0] - 2026-01-08
1722
+
1723
+ ### Fixed
1724
+
1725
+ - Fixed Gemini CLI abort handling: detect native `AbortError` in retry catch block, cancel SSE reader when abort signal fires ([#568](https://github.com/badlogic/pi-mono/pull/568) by [@tmustier](https://github.com/tmustier))
1726
+ - Fixed Antigravity provider 429 errors by aligning request payload with CLIProxyAPI v6.6.89: inject Antigravity system instruction with `role: "user"`, set `requestType: "agent"`, and use `antigravity` userAgent. Added bridge prompt to override Antigravity behavior (identity, paths, web dev guidelines) with Pi defaults. ([#571](https://github.com/badlogic/pi-mono/pull/571) by [@ben-vargas](https://github.com/ben-vargas))
1727
+ - Fixed thinking block handling for cross-model conversations: thinking blocks are now converted to plain text (no `<thinking>` tags) when switching models. Previously, `<thinking>` tags caused models to mimic the pattern and output literal tags. Also fixed empty thinking blocks causing API errors. ([#561](https://github.com/badlogic/pi-mono/issues/561))
1728
+
1729
+ ## [0.38.0] - 2026-01-08
1730
+
1731
+ ### Added
1732
+
1733
+ - `thinkingBudgets` option in `SimpleStreamOptions` for customizing token budgets per thinking level on token-based providers ([#529](https://github.com/badlogic/pi-mono/pull/529) by [@melihmucuk](https://github.com/melihmucuk))
1734
+
1735
+ ### Breaking Changes
1736
+
1737
+ - Removed OpenAI Codex model aliases (`gpt-5`, `gpt-5-mini`, `gpt-5-nano`, `codex-mini-latest`, `gpt-5-codex`, `gpt-5.1-codex`, `gpt-5.1-chat-latest`). Use canonical model IDs: `gpt-5.1`, `gpt-5.1-codex-max`, `gpt-5.1-codex-mini`, `gpt-5.2`, `gpt-5.2-codex`. ([#536](https://github.com/badlogic/pi-mono/pull/536) by [@ghoulr](https://github.com/ghoulr))
1738
+
1739
+ ### Fixed
1740
+
1741
+ - Fixed OpenAI Codex context window from 400,000 to 272,000 tokens to match Codex CLI defaults and prevent 400 errors. ([#536](https://github.com/badlogic/pi-mono/pull/536) by [@ghoulr](https://github.com/ghoulr))
1742
+ - Fixed Codex SSE error events to surface message, code, and status. ([#551](https://github.com/badlogic/pi-mono/pull/551) by [@tmustier](https://github.com/tmustier))
1743
+ - Fixed context overflow detection for `context_length_exceeded` error codes.
1744
+
1745
+ ## [0.37.6] - 2026-01-06
1746
+
1747
+ ### Added
1748
+
1749
+ - Exported OpenAI Codex utilities: `CacheMetadata`, `getCodexInstructions`, `getModelFamily`, `ModelFamily`, `buildCodexPiBridge`, `buildCodexSystemPrompt`, `CodexSystemPrompt` ([#510](https://github.com/badlogic/pi-mono/pull/510) by [@mitsuhiko](https://github.com/mitsuhiko))
1750
+
1751
+ ## [0.37.3] - 2026-01-06
1752
+
1753
+ ### Added
1754
+
1755
+ - `sessionId` option in `StreamOptions` for providers that support session-based caching. OpenAI Codex provider uses this to set `prompt_cache_key` and routing headers.
1756
+
1757
+ ## [0.37.2] - 2026-01-05
1758
+
1759
+ ### Fixed
1760
+
1761
+ - Codex provider now always includes `reasoning.encrypted_content` even when custom `include` options are passed ([#484](https://github.com/badlogic/pi-mono/pull/484) by [@kim0](https://github.com/kim0))
1762
+
1763
+ ## [0.37.0] - 2026-01-05
1764
+
1765
+ ### Breaking Changes
1766
+
1767
+ - OpenAI Codex models no longer have per-thinking-level variants (e.g., `gpt-5.2-codex-high`). Use the base model ID and set thinking level separately. The Codex provider clamps reasoning effort to what each model supports internally. (initial implementation by [@ben-vargas](https://github.com/ben-vargas) in [#472](https://github.com/badlogic/pi-mono/pull/472))
1768
+
1769
+ ### Added
1770
+
1771
+ - Headless OAuth support for all callback-server providers (Google Gemini CLI, Antigravity, OpenAI Codex): paste redirect URL when browser callback is unreachable ([#428](https://github.com/badlogic/pi-mono/pull/428) by [@ben-vargas](https://github.com/ben-vargas), [#468](https://github.com/badlogic/pi-mono/pull/468) by [@crcatala](https://github.com/crcatala))
1772
+ - Cancellable GitHub Copilot device code polling via AbortSignal
1773
+
1774
+ ### Fixed
1775
+
1776
+ - Codex requests now omit the `reasoning` field entirely when thinking is off, letting the backend use its default instead of forcing a value. ([#472](https://github.com/badlogic/pi-mono/pull/472))
1777
+
1778
+ ## [0.36.0] - 2026-01-05
1779
+
1780
+ ### Added
1781
+
1782
+ - OpenAI Codex OAuth provider with Responses API streaming support: `openai-codex-responses` streaming provider with SSE parsing, tool-call handling, usage/cost tracking, and PKCE OAuth flow ([#451](https://github.com/badlogic/pi-mono/pull/451) by [@kim0](https://github.com/kim0))
1783
+
1784
+ ### Fixed
1785
+
1786
+ - Vertex AI dummy value for `getEnvApiKey()`: Returns `"<authenticated>"` when Application Default Credentials are configured (`~/.config/gcloud/application_default_credentials.json` exists) and both `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) and `GOOGLE_CLOUD_LOCATION` are set. This allows `streamSimple()` to work with Vertex AI without explicit `apiKey` option. The ADC credentials file existence check is cached per-process to avoid repeated filesystem access.
1787
+
1788
+ ## [0.32.3] - 2026-01-03
1789
+
1790
+ ### Fixed
1791
+
1792
+ - Google Vertex AI models no longer appear in available models list without explicit authentication. Previously, `getEnvApiKey()` returned a dummy value for `google-vertex`, causing models to show up even when Google Cloud ADC was not configured.
1793
+
1794
+ ## [0.32.0] - 2026-01-03
1795
+
1796
+ ### Added
1797
+
1798
+ - Vertex AI provider with ADC (Application Default Credentials) support. Authenticate with `gcloud auth application-default login`, set `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`, and access Gemini models via Vertex AI. ([#300](https://github.com/badlogic/pi-mono/pull/300) by [@default-anton](https://github.com/default-anton))
1799
+
1800
+ ### Fixed
1801
+
1802
+ - **Gemini CLI rate limit handling**: Added automatic retry with server-provided delay for 429 errors. Parses delay from error messages like "Your quota will reset after 39s" and waits accordingly. Falls back to exponential backoff for other transient errors. ([#370](https://github.com/badlogic/pi-mono/issues/370))
1803
+
1804
+ ## [0.31.0] - 2026-01-02
1805
+
1806
+ ### Breaking Changes
1807
+
1808
+ - **Agent API moved**: All agent functionality (`agentLoop`, `agentLoopContinue`, `AgentContext`, `AgentEvent`, `AgentTool`, `AgentToolResult`, etc.) has moved to `@mariozechner/pi-agent-core`. Import from that package instead of `@oh-my-pi/pi-ai`.
1809
+
1810
+ ### Added
1811
+
1812
+ - **`GoogleThinkingLevel` type**: Exported type that mirrors Google's `ThinkingLevel` enum values (`"THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH"`). Allows configuring Gemini thinking levels without importing from `@google/genai`.
1813
+ - **`ANTHROPIC_OAUTH_TOKEN` env var**: Now checked before `ANTHROPIC_API_KEY` in `getEnvApiKey()`, allowing OAuth tokens to take precedence.
1814
+ - **`event-stream.js` export**: `AssistantMessageEventStream` utility now exported from package index.
1815
+
1816
+ ### Changed
1817
+
1818
+ - **OAuth uses Web Crypto API**: PKCE generation and OAuth flows now use Web Crypto API (`crypto.subtle`) instead of Node.js `crypto` module. This improves browser compatibility while still working in Node.js 20+.
1819
+ - **Deterministic model generation**: `generate-models.ts` now sorts providers and models alphabetically for consistent output across runs. ([#332](https://github.com/badlogic/pi-mono/pull/332) by [@mrexodia](https://github.com/mrexodia))
1820
+
1821
+ ### Fixed
1822
+
1823
+ - **OpenAI completions empty content blocks**: Empty text or thinking blocks in assistant messages are now filtered out before sending to the OpenAI completions API, preventing validation errors. ([#344](https://github.com/badlogic/pi-mono/pull/344) by [@default-anton](https://github.com/default-anton))
1824
+ - **Thinking token duplication**: Fixed thinking content duplication with chutes.ai provider. The provider was returning thinking content in both `reasoning_content` and `reasoning` fields, causing each chunk to be processed twice. Now only the first non-empty reasoning field is used.
1825
+ - **zAi provider API mapping**: Fixed zAi models to use `openai-completions` API with correct base URL (`https://api.z.ai/api/coding/paas/v4`) instead of incorrect Anthropic API mapping. ([#344](https://github.com/badlogic/pi-mono/pull/344), [#358](https://github.com/badlogic/pi-mono/pull/358) by [@default-anton](https://github.com/default-anton))
1826
+
1827
+ ## [0.28.0] - 2025-12-25
1828
+
1829
+ ### Breaking Changes
1830
+
1831
+ - **OAuth storage removed** ([#296](https://github.com/badlogic/pi-mono/issues/296)): All storage functions (`loadOAuthCredentials`, `saveOAuthCredentials`, `setOAuthStorage`, etc.) removed. Callers are responsible for storing credentials.
1832
+ - **OAuth login functions**: `loginAnthropic`, `loginGitHubCopilot`, `loginGeminiCli`, `loginAntigravity` now return `OAuthCredentials` instead of saving to disk.
1833
+ - **refreshOAuthToken**: Now takes `(provider, credentials)` and returns new `OAuthCredentials` instead of saving.
1834
+ - **getOAuthApiKey**: Now takes `(provider, credentials)` and returns `{ newCredentials, apiKey }` or null.
1835
+ - **OAuthCredentials type**: No longer includes `type: "oauth"` discriminator. Callers add discriminator when storing.
1836
+ - **setApiKey, resolveApiKey**: Removed. Callers must manage their own API key storage/resolution.
1837
+ - **getApiKey**: Renamed to `getEnvApiKey`. Only checks environment variables for known providers.
1838
+
1839
+ ## [0.27.7] - 2025-12-24
1840
+
1841
+ ### Fixed
1842
+
1843
+ - **Thinking tag leakage**: Fixed Claude mimicking literal `</thinking>` tags in responses. Unsigned thinking blocks (from aborted streams) are now converted to plain text without `<thinking>` tags. The TUI still displays them as thinking blocks. ([#302](https://github.com/badlogic/pi-mono/pull/302) by [@nicobailon](https://github.com/nicobailon))
1844
+
1845
+ ## [0.25.1] - 2025-12-21
1846
+
1847
+ ### Added
1848
+
1849
+ - **xhigh thinking level support**: Added `supportsXhigh()` function to check if a model supports xhigh reasoning level. Also clamps xhigh to high for OpenAI models that don't support it. ([#236](https://github.com/badlogic/pi-mono/pull/236) by [@theBucky](https://github.com/theBucky))
1850
+
1851
+ ### Fixed
1852
+
1853
+ - **Gemini multimodal tool results**: Fixed images in tool results causing flaky/broken responses with Gemini models. For Gemini 3, images are now nested inside `functionResponse.parts` per the [docs](https://ai.google.dev/gemini-api/docs/function-calling#multimodal). For older models (which don't support multimodal function responses), images are sent in a separate user message.
1854
+
1855
+ - **Queued message steering**: When `getQueuedMessages` is provided, the agent loop now checks for queued user messages after each tool call and skips remaining tool calls in the current assistant message when a queued message arrives (emitting error tool results).
1856
+
1857
+ - **Double API version path in Google provider URL**: Fixed Gemini API calls returning 404 after baseUrl support was added. The SDK was appending its default apiVersion to baseUrl which already included the version path. ([#251](https://github.com/badlogic/pi-mono/pull/251) by [@shellfyred](https://github.com/shellfyred))
1858
+
1859
+ - **Anthropic SDK retries disabled**: Re-enabled SDK-level retries (default 2) for transient HTTP failures. ([#252](https://github.com/badlogic/pi-mono/issues/252))
1860
+
1861
+ ## [0.23.5] - 2025-12-19
1862
+
1863
+ ### Added
1864
+
1865
+ - **Gemini 3 Flash thinking support**: Extended thinking level support for Gemini 3 Flash models (MINIMAL, LOW, MEDIUM, HIGH) to match Pro models' capabilities. ([#212](https://github.com/badlogic/pi-mono/pull/212) by [@markusylisiurunen](https://github.com/markusylisiurunen))
1866
+
1867
+ - **GitHub Copilot thinking models**: Added thinking support for additional Copilot models (o3-mini, o1-mini, o1-preview). ([#234](https://github.com/badlogic/pi-mono/pull/234) by [@aadishv](https://github.com/aadishv))
1868
+
1869
+ ### Fixed
1870
+
1871
+ - **Gemini tool result format**: Fixed tool result format for Gemini 3 Flash Preview which strictly requires `{ output: value }` for success and `{ error: value }` for errors. Previous format using `{ result, isError }` was rejected by newer Gemini models. Also improved type safety by removing `as any` casts. ([#213](https://github.com/badlogic/pi-mono/issues/213), [#220](https://github.com/badlogic/pi-mono/pull/220))
1872
+
1873
+ - **Google baseUrl configuration**: Google provider now respects `baseUrl` configuration for custom endpoints or API proxies. ([#216](https://github.com/badlogic/pi-mono/issues/216), [#221](https://github.com/badlogic/pi-mono/pull/221) by [@theBucky](https://github.com/theBucky))
1874
+
1875
+ - **GitHub Copilot vision requests**: Added `Copilot-Vision-Request` header when sending images to GitHub Copilot models. ([#222](https://github.com/badlogic/pi-mono/issues/222))
1876
+
1877
+ - **GitHub Copilot X-Initiator header**: Fixed X-Initiator logic to check last message role instead of any message in history. This ensures proper billing when users send follow-up messages. ([#209](https://github.com/badlogic/pi-mono/issues/209))
1878
+
1879
+ ## [0.22.3] - 2025-12-16
1880
+
1881
+ ### Added
1882
+
1883
+ - **Image limits test suite**: Added comprehensive tests for provider-specific image limitations (max images, max size, max dimensions). Discovered actual limits: Anthropic (100 images, 5MB, 8000px), OpenAI (500 images, ≥25MB), Gemini (~2500 images, ≥40MB), Mistral (8 images, ~15MB), OpenRouter (~40 images context-limited, ~15MB). ([#120](https://github.com/badlogic/pi-mono/pull/120))
1884
+
1885
+ - **Tool result streaming**: Added `tool_execution_update` event and optional `onUpdate` callback to `AgentTool.execute()` for streaming tool output during execution. Tools can now emit partial results (e.g., bash stdout) that are forwarded to subscribers. ([#44](https://github.com/badlogic/pi-mono/issues/44))
1886
+
1887
+ - **X-Initiator header for GitHub Copilot**: Added X-Initiator header handling for GitHub Copilot provider to ensure correct call accounting (agent calls are not deducted from quota). Sets initiator based on last message role. ([#200](https://github.com/badlogic/pi-mono/pull/200) by [@kim0](https://github.com/kim0))
1888
+
1889
+ ### Changed
1890
+
1891
+ - **Normalized tool_execution_end result**: `tool_execution_end` event now always contains `AgentToolResult` (no longer `AgentToolResult | string`). Errors are wrapped in the standard result format.
1892
+
1893
+ ### Fixed
1894
+
1895
+ - **Reasoning disabled by default**: When `reasoning` option is not specified, thinking is now explicitly disabled for all providers. Previously, some providers like Gemini with "dynamic thinking" would use their default (thinking ON), causing unexpected token usage. This was the original intended behavior. ([#180](https://github.com/badlogic/pi-mono/pull/180) by [@markusylisiurunen](https://github.com/markusylisiurunen))
1896
+
1897
+ ## [0.22.2] - 2025-12-15
1898
+
1899
+ ### Added
1900
+
1901
+ - **Interleaved thinking for Anthropic**: Added `interleavedThinking` option to `AnthropicOptions`. When enabled, Claude 4 models can think between tool calls and reason after receiving tool results. Enabled by default (no extra token cost, just unlocks the capability). Set `interleavedThinking: false` to disable.
1902
+
1903
+ ## [0.22.1] - 2025-12-15
1904
+
1905
+ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
1906
+
1907
+ ### Added
1908
+
1909
+ - **Interleaved thinking for Anthropic**: Enabled interleaved thinking in the Anthropic provider, allowing Claude models to output thinking blocks interspersed with text responses.
1910
+
1911
+ ## [0.22.0] - 2025-12-15
1912
+
1913
+ ### Added
1914
+
1915
+ - **GitHub Copilot provider**: Added `github-copilot` as a known provider with models sourced from models.dev. Includes Claude, GPT, Gemini, Grok, and other models available through GitHub Copilot. ([#191](https://github.com/badlogic/pi-mono/pull/191) by [@cau1k](https://github.com/cau1k))
1916
+
1917
+ ### Fixed
1918
+
1919
+ - **GitHub Copilot gpt-5 models**: Fixed API selection for gpt-5 models to use `openai-responses` instead of `openai-completions` (gpt-5 models are not accessible via completions endpoint)
1920
+
1921
+ - **GitHub Copilot cross-model context handoff**: Fixed context handoff failing when switching between GitHub Copilot models using different APIs (e.g., gpt-5 to claude-sonnet-4). Tool call IDs from OpenAI Responses API were incompatible with other models. ([#198](https://github.com/badlogic/pi-mono/issues/198))
1922
+
1923
+ - **Gemini 3 Pro thinking levels**: Thinking level configuration now works correctly for Gemini 3 Pro models. Previously all levels mapped to -1 (minimal thinking). Now LOW/MEDIUM/HIGH properly control test-time computation. ([#176](https://github.com/badlogic/pi-mono/pull/176) by [@markusylisiurunen](https://github.com/markusylisiurunen))
1924
+
1925
+ ## [0.18.2] - 2025-12-11
1926
+
1927
+ ### Changed
1928
+
1929
+ - **Anthropic SDK retries disabled**: Set `maxRetries: 0` on Anthropic client to allow application-level retry handling. The SDK's built-in retries were interfering with coding-agent's retry logic. ([#157](https://github.com/badlogic/pi-mono/issues/157))
1930
+
1931
+ ## [0.18.1] - 2025-12-10
1932
+
1933
+ ### Added
1934
+
1935
+ - **Mistral provider**: Added support for Mistral AI models via the OpenAI-compatible API. Includes automatic handling of Mistral-specific requirements (tool call ID format). Set `MISTRAL_API_KEY` environment variable to use.
1936
+
1937
+ ### Fixed
1938
+
1939
+ - Fixed Mistral 400 errors after aborted assistant messages by skipping empty assistant messages (no content, no tool calls) ([#165](https://github.com/badlogic/pi-mono/issues/165))
1940
+
1941
+ - Removed synthetic assistant bridge message after tool results for Mistral (no longer required as of Dec 2025) ([#165](https://github.com/badlogic/pi-mono/issues/165))
1942
+
1943
+ - Fixed bug where `ANTHROPIC_API_KEY` environment variable was deleted globally after first OAuth token usage, causing subsequent prompts to fail ([#164](https://github.com/badlogic/pi-mono/pull/164))
1944
+
1945
+ ## [0.17.0] - 2025-12-09
1946
+
1947
+ ### Added
1948
+
1949
+ - **`agentLoopContinue` function**: Continue an agent loop from existing context without adding a new user message. Validates that the last message is `user` or `toolResult`. Useful for retry after context overflow or resuming from manually-added tool results.
1950
+
1951
+ ### Breaking Changes
1952
+
1953
+ - Removed provider-level tool argument validation. Validation now happens in `agentLoop` via `executeToolCalls`, allowing models to retry on validation errors. For manual tool execution, use `validateToolCall(tools, toolCall)` or `validateToolArguments(tool, toolCall)`.
1954
+
1955
+ ### Added
1956
+
1957
+ - Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments.
1958
+
1959
+ - **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)
1960
+
1961
+ - **xhigh reasoning level**: Added `xhigh` to `ReasoningEffort` type for OpenAI codex-max models. For non-OpenAI providers (Anthropic, Google), `xhigh` is automatically mapped to `high`. ([#143](https://github.com/badlogic/pi-mono/issues/143))
1962
+
1963
+ ### Changed
1964
+
1965
+ - **Updated SDK versions**: OpenAI SDK 5.21.0 → 6.10.0, Anthropic SDK 0.61.0 → 0.71.2, Google GenAI SDK 1.30.0 → 1.31.0
1966
+
1967
+ ## [0.13.0] - 2025-12-06
1968
+
1969
+ ### Breaking Changes
1970
+
1971
+ - **Added `totalTokens` field to `Usage` type**: All code that constructs `Usage` objects must now include the `totalTokens` field. This field represents the total tokens processed by the LLM (input + output + cache). For OpenAI and Google, this uses native API values (`total_tokens`, `totalTokenCount`). For Anthropic, it's computed as `input + output + cacheRead + cacheWrite`.
1972
+
1973
+ ## [0.12.10] - 2025-12-04
1974
+
1975
+ ### Added
1976
+
1977
+ - Added `gpt-5.1-codex-max` model support
1978
+
1979
+ ### Fixed
1980
+
1981
+ - **OpenAI Token Counting**: Fixed `usage.input` to exclude cached tokens for OpenAI providers. Previously, `input` included cached tokens, causing double-counting when calculating total context size via `input + cacheRead`. Now `input` represents non-cached input tokens across all providers, making `input + output + cacheRead + cacheWrite` the correct formula for total context size.
1982
+
1983
+ - **Fixed Claude Opus 4.5 cache pricing** (was 3x too expensive)
1984
+ - Corrected cache_read: $1.50 → $0.50 per MTok
1985
+ - Corrected cache_write: $18.75 → $6.25 per MTok
1986
+ - Added manual override in `scripts/generate-models.ts` until upstream fix is merged
1987
+ - Submitted PR to models.dev: https://github.com/sst/models.dev/pull/439
1988
+
1989
+ ## [0.9.4] - 2025-11-26
1990
+
1991
+ Initial release with multi-provider LLM support.