@oh-my-pi/pi-ai 13.17.1 → 13.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [13.17.5] - 2026-04-01
6
+ ### Changed
7
+
8
+ - Increased default first-event timeout from 15s to 45s to better accommodate longer request setup times
9
+ - Modified first-event watchdog to inherit idle timeout when it exceeds the default, ensuring consistent timeout behavior across different configurations
10
+
11
+ ### Fixed
12
+
13
+ - Fixed first-event watchdog initialization timing so it no longer starts before the actual stream request is created, preventing premature timeouts during request setup
14
+ - Fixed first-event watchdog timing so OpenAI-family providers no longer count slow request setup against the first streamed event timeout, and raised the default first-event timeout to avoid false aborts after long tool turns
15
+
16
+ ## [13.17.2] - 2026-04-01
17
+
18
+ ### Fixed
19
+
20
+ - Fixed OpenAI-family first-event timeouts to preserve provider-specific timeout errors for retry classification instead of flattening them to generic aborts ([#591](https://github.com/can1357/oh-my-pi/issues/591))
21
+
5
22
  ## [13.17.1] - 2026-04-01
6
23
 
7
24
  ### Added
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "13.17.1",
4
+ "version": "13.17.5",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -41,7 +41,7 @@
41
41
  "@aws-sdk/client-bedrock-runtime": "^3",
42
42
  "@bufbuild/protobuf": "^2.11",
43
43
  "@google/genai": "^1.43",
44
- "@oh-my-pi/pi-utils": "13.17.1",
44
+ "@oh-my-pi/pi-utils": "13.17.5",
45
45
  "@sinclair/typebox": "^0.34",
46
46
  "@smithy/node-http-handler": "^4.4",
47
47
  "ajv": "^8.18",
package/src/models.json CHANGED
@@ -8117,7 +8117,7 @@
8117
8117
  },
8118
8118
  "anthropic/claude-opus-4": {
8119
8119
  "id": "anthropic/claude-opus-4",
8120
- "name": "Claude Opus 4",
8120
+ "name": "Claude Opus 4 (latest)",
8121
8121
  "api": "openai-completions",
8122
8122
  "provider": "kilo",
8123
8123
  "baseUrl": "https://api.kilo.ai/api/gateway",
@@ -8133,7 +8133,7 @@
8133
8133
  "cacheWrite": 0
8134
8134
  },
8135
8135
  "contextWindow": 200000,
8136
- "maxTokens": 64000,
8136
+ "maxTokens": 32000,
8137
8137
  "thinking": {
8138
8138
  "mode": "effort",
8139
8139
  "minLevel": "minimal",
@@ -8366,6 +8366,25 @@
8366
8366
  "contextWindow": 222222,
8367
8367
  "maxTokens": 8888
8368
8368
  },
8369
+ "arcee-ai/trinity-large-thinking": {
8370
+ "id": "arcee-ai/trinity-large-thinking",
8371
+ "name": "Arcee AI: Trinity Large Thinking",
8372
+ "api": "openai-completions",
8373
+ "provider": "kilo",
8374
+ "baseUrl": "https://api.kilo.ai/api/gateway",
8375
+ "reasoning": false,
8376
+ "input": [
8377
+ "text"
8378
+ ],
8379
+ "cost": {
8380
+ "input": 0,
8381
+ "output": 0,
8382
+ "cacheRead": 0,
8383
+ "cacheWrite": 0
8384
+ },
8385
+ "contextWindow": 222222,
8386
+ "maxTokens": 8888
8387
+ },
8369
8388
  "arcee-ai/trinity-mini": {
8370
8389
  "id": "arcee-ai/trinity-mini",
8371
8390
  "name": "Arcee AI: Trinity Mini",
@@ -9233,7 +9252,7 @@
9233
9252
  },
9234
9253
  "google/gemini-3-pro-image-preview": {
9235
9254
  "id": "google/gemini-3-pro-image-preview",
9236
- "name": "Gemini 3 Pro Image Preview",
9255
+ "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)",
9237
9256
  "api": "openai-completions",
9238
9257
  "provider": "kilo",
9239
9258
  "baseUrl": "https://api.kilo.ai/api/gateway",
@@ -9248,8 +9267,8 @@
9248
9267
  "cacheRead": 0,
9249
9268
  "cacheWrite": 0
9250
9269
  },
9251
- "contextWindow": 1048000,
9252
- "maxTokens": 64000,
9270
+ "contextWindow": 222222,
9271
+ "maxTokens": 8888,
9253
9272
  "thinking": {
9254
9273
  "mode": "effort",
9255
9274
  "minLevel": "low",
@@ -9485,6 +9504,44 @@
9485
9504
  "contextWindow": 128000,
9486
9505
  "maxTokens": 4096
9487
9506
  },
9507
+ "google/lyria-3-clip-preview": {
9508
+ "id": "google/lyria-3-clip-preview",
9509
+ "name": "Google: Lyria 3 Clip Preview",
9510
+ "api": "openai-completions",
9511
+ "provider": "kilo",
9512
+ "baseUrl": "https://api.kilo.ai/api/gateway",
9513
+ "reasoning": false,
9514
+ "input": [
9515
+ "text"
9516
+ ],
9517
+ "cost": {
9518
+ "input": 0,
9519
+ "output": 0,
9520
+ "cacheRead": 0,
9521
+ "cacheWrite": 0
9522
+ },
9523
+ "contextWindow": 222222,
9524
+ "maxTokens": 8888
9525
+ },
9526
+ "google/lyria-3-pro-preview": {
9527
+ "id": "google/lyria-3-pro-preview",
9528
+ "name": "Google: Lyria 3 Pro Preview",
9529
+ "api": "openai-completions",
9530
+ "provider": "kilo",
9531
+ "baseUrl": "https://api.kilo.ai/api/gateway",
9532
+ "reasoning": false,
9533
+ "input": [
9534
+ "text"
9535
+ ],
9536
+ "cost": {
9537
+ "input": 0,
9538
+ "output": 0,
9539
+ "cacheRead": 0,
9540
+ "cacheWrite": 0
9541
+ },
9542
+ "contextWindow": 222222,
9543
+ "maxTokens": 8888
9544
+ },
9488
9545
  "gryphe/mythomax-l2-13b": {
9489
9546
  "id": "gryphe/mythomax-l2-13b",
9490
9547
  "name": "MythoMax 13B",
@@ -13813,6 +13870,25 @@
13813
13870
  "contextWindow": 222222,
13814
13871
  "maxTokens": 8888
13815
13872
  },
13873
+ "qwen/qwen3.6-plus-preview:free": {
13874
+ "id": "qwen/qwen3.6-plus-preview:free",
13875
+ "name": "Qwen: Qwen3.6 Plus Preview (free)",
13876
+ "api": "openai-completions",
13877
+ "provider": "kilo",
13878
+ "baseUrl": "https://api.kilo.ai/api/gateway",
13879
+ "reasoning": false,
13880
+ "input": [
13881
+ "text"
13882
+ ],
13883
+ "cost": {
13884
+ "input": 0,
13885
+ "output": 0,
13886
+ "cacheRead": 0,
13887
+ "cacheWrite": 0
13888
+ },
13889
+ "contextWindow": 222222,
13890
+ "maxTokens": 8888
13891
+ },
13816
13892
  "qwen/qwq-32b": {
13817
13893
  "id": "qwen/qwq-32b",
13818
13894
  "name": "Qwen: QwQ 32B",
@@ -13870,6 +13946,44 @@
13870
13946
  "contextWindow": 222222,
13871
13947
  "maxTokens": 8888
13872
13948
  },
13949
+ "rekaai/reka-edge": {
13950
+ "id": "rekaai/reka-edge",
13951
+ "name": "Reka Edge",
13952
+ "api": "openai-completions",
13953
+ "provider": "kilo",
13954
+ "baseUrl": "https://api.kilo.ai/api/gateway",
13955
+ "reasoning": false,
13956
+ "input": [
13957
+ "text"
13958
+ ],
13959
+ "cost": {
13960
+ "input": 0,
13961
+ "output": 0,
13962
+ "cacheRead": 0,
13963
+ "cacheWrite": 0
13964
+ },
13965
+ "contextWindow": 222222,
13966
+ "maxTokens": 8888
13967
+ },
13968
+ "rekaai/reka-flash-3": {
13969
+ "id": "rekaai/reka-flash-3",
13970
+ "name": "Reka Flash 3",
13971
+ "api": "openai-completions",
13972
+ "provider": "kilo",
13973
+ "baseUrl": "https://api.kilo.ai/api/gateway",
13974
+ "reasoning": false,
13975
+ "input": [
13976
+ "text"
13977
+ ],
13978
+ "cost": {
13979
+ "input": 0,
13980
+ "output": 0,
13981
+ "cacheRead": 0,
13982
+ "cacheWrite": 0
13983
+ },
13984
+ "contextWindow": 222222,
13985
+ "maxTokens": 8888
13986
+ },
13873
13987
  "relace/relace-apply-3": {
13874
13988
  "id": "relace/relace-apply-3",
13875
13989
  "name": "Relace: Relace Apply 3",
@@ -14382,6 +14496,25 @@
14382
14496
  "maxLevel": "xhigh"
14383
14497
  }
14384
14498
  },
14499
+ "x-ai/grok-4.20": {
14500
+ "id": "x-ai/grok-4.20",
14501
+ "name": "xAI: Grok 4.20",
14502
+ "api": "openai-completions",
14503
+ "provider": "kilo",
14504
+ "baseUrl": "https://api.kilo.ai/api/gateway",
14505
+ "reasoning": false,
14506
+ "input": [
14507
+ "text"
14508
+ ],
14509
+ "cost": {
14510
+ "input": 0,
14511
+ "output": 0,
14512
+ "cacheRead": 0,
14513
+ "cacheWrite": 0
14514
+ },
14515
+ "contextWindow": 222222,
14516
+ "maxTokens": 8888
14517
+ },
14385
14518
  "x-ai/grok-4.20-beta": {
14386
14519
  "id": "x-ai/grok-4.20-beta",
14387
14520
  "name": "xAI: Grok 4.20 Beta",
@@ -14401,6 +14534,25 @@
14401
14534
  "contextWindow": 222222,
14402
14535
  "maxTokens": 8888
14403
14536
  },
14537
+ "x-ai/grok-4.20-multi-agent": {
14538
+ "id": "x-ai/grok-4.20-multi-agent",
14539
+ "name": "xAI: Grok 4.20 Multi-Agent",
14540
+ "api": "openai-completions",
14541
+ "provider": "kilo",
14542
+ "baseUrl": "https://api.kilo.ai/api/gateway",
14543
+ "reasoning": false,
14544
+ "input": [
14545
+ "text"
14546
+ ],
14547
+ "cost": {
14548
+ "input": 0,
14549
+ "output": 0,
14550
+ "cacheRead": 0,
14551
+ "cacheWrite": 0
14552
+ },
14553
+ "contextWindow": 222222,
14554
+ "maxTokens": 8888
14555
+ },
14404
14556
  "x-ai/grok-4.20-multi-agent-beta": {
14405
14557
  "id": "x-ai/grok-4.20-multi-agent-beta",
14406
14558
  "name": "xAI: Grok 4.20 Multi-Agent Beta",
@@ -14446,7 +14598,7 @@
14446
14598
  },
14447
14599
  "x-ai/grok-code-fast-1:optimized:free": {
14448
14600
  "id": "x-ai/grok-code-fast-1:optimized:free",
14449
- "name": "xAI: Grok Code Fast 1 Optimized (experimental, free)",
14601
+ "name": "xAI: Grok Code Fast 1 Optimized (free)",
14450
14602
  "api": "openai-completions",
14451
14603
  "provider": "kilo",
14452
14604
  "baseUrl": "https://api.kilo.ai/api/gateway",
@@ -14814,6 +14966,25 @@
14814
14966
  "minLevel": "minimal",
14815
14967
  "maxLevel": "xhigh"
14816
14968
  }
14969
+ },
14970
+ "z-ai/glm-5v-turbo": {
14971
+ "id": "z-ai/glm-5v-turbo",
14972
+ "name": "Z.ai: GLM 5V Turbo",
14973
+ "api": "openai-completions",
14974
+ "provider": "kilo",
14975
+ "baseUrl": "https://api.kilo.ai/api/gateway",
14976
+ "reasoning": false,
14977
+ "input": [
14978
+ "text"
14979
+ ],
14980
+ "cost": {
14981
+ "input": 0,
14982
+ "output": 0,
14983
+ "cacheRead": 0,
14984
+ "cacheWrite": 0
14985
+ },
14986
+ "contextWindow": 222222,
14987
+ "maxTokens": 8888
14817
14988
  }
14818
14989
  },
14819
14990
  "kimi-code": {
@@ -17444,25 +17615,55 @@
17444
17615
  "contextWindow": 128000,
17445
17616
  "maxTokens": 16384
17446
17617
  },
17618
+ "mistral-small-2603": {
17619
+ "id": "mistral-small-2603",
17620
+ "name": "Mistral Small 4",
17621
+ "api": "openai-completions",
17622
+ "provider": "mistral",
17623
+ "baseUrl": "https://api.mistral.ai/v1",
17624
+ "reasoning": true,
17625
+ "input": [
17626
+ "text",
17627
+ "image"
17628
+ ],
17629
+ "cost": {
17630
+ "input": 0.15,
17631
+ "output": 0.6,
17632
+ "cacheRead": 0,
17633
+ "cacheWrite": 0
17634
+ },
17635
+ "contextWindow": 256000,
17636
+ "maxTokens": 256000,
17637
+ "thinking": {
17638
+ "mode": "effort",
17639
+ "minLevel": "minimal",
17640
+ "maxLevel": "xhigh"
17641
+ }
17642
+ },
17447
17643
  "mistral-small-latest": {
17448
17644
  "id": "mistral-small-latest",
17449
17645
  "name": "Mistral Small (latest)",
17450
17646
  "api": "openai-completions",
17451
17647
  "provider": "mistral",
17452
17648
  "baseUrl": "https://api.mistral.ai/v1",
17453
- "reasoning": false,
17649
+ "reasoning": true,
17454
17650
  "input": [
17455
17651
  "text",
17456
17652
  "image"
17457
17653
  ],
17458
17654
  "cost": {
17459
- "input": 0.1,
17460
- "output": 0.3,
17655
+ "input": 0.15,
17656
+ "output": 0.6,
17461
17657
  "cacheRead": 0,
17462
17658
  "cacheWrite": 0
17463
17659
  },
17464
- "contextWindow": 128000,
17465
- "maxTokens": 16384
17660
+ "contextWindow": 256000,
17661
+ "maxTokens": 256000,
17662
+ "thinking": {
17663
+ "mode": "effort",
17664
+ "minLevel": "minimal",
17665
+ "maxLevel": "xhigh"
17666
+ }
17466
17667
  },
17467
17668
  "open-mistral-7b": {
17468
17669
  "id": "open-mistral-7b",
@@ -17930,6 +18131,25 @@
17930
18131
  "contextWindow": 222222,
17931
18132
  "maxTokens": 8888
17932
18133
  },
18134
+ "anthropic/claude-haiku-latest": {
18135
+ "id": "anthropic/claude-haiku-latest",
18136
+ "name": "anthropic/claude-haiku-latest",
18137
+ "api": "openai-completions",
18138
+ "provider": "nanogpt",
18139
+ "baseUrl": "https://nano-gpt.com/api/v1",
18140
+ "reasoning": false,
18141
+ "input": [
18142
+ "text"
18143
+ ],
18144
+ "cost": {
18145
+ "input": 0,
18146
+ "output": 0,
18147
+ "cacheRead": 0,
18148
+ "cacheWrite": 0
18149
+ },
18150
+ "contextWindow": 222222,
18151
+ "maxTokens": 8888
18152
+ },
17933
18153
  "anthropic/claude-opus-4.6": {
17934
18154
  "id": "anthropic/claude-opus-4.6",
17935
18155
  "name": "Claude Opus 4.6",
@@ -17955,6 +18175,25 @@
17955
18175
  "maxLevel": "xhigh"
17956
18176
  }
17957
18177
  },
18178
+ "anthropic/claude-opus-latest": {
18179
+ "id": "anthropic/claude-opus-latest",
18180
+ "name": "anthropic/claude-opus-latest",
18181
+ "api": "openai-completions",
18182
+ "provider": "nanogpt",
18183
+ "baseUrl": "https://nano-gpt.com/api/v1",
18184
+ "reasoning": false,
18185
+ "input": [
18186
+ "text"
18187
+ ],
18188
+ "cost": {
18189
+ "input": 0,
18190
+ "output": 0,
18191
+ "cacheRead": 0,
18192
+ "cacheWrite": 0
18193
+ },
18194
+ "contextWindow": 222222,
18195
+ "maxTokens": 8888
18196
+ },
17958
18197
  "anthropic/claude-sonnet-4.6": {
17959
18198
  "id": "anthropic/claude-sonnet-4.6",
17960
18199
  "name": "Claude Sonnet 4.6",
@@ -17980,6 +18219,25 @@
17980
18219
  "maxLevel": "xhigh"
17981
18220
  }
17982
18221
  },
18222
+ "anthropic/claude-sonnet-latest": {
18223
+ "id": "anthropic/claude-sonnet-latest",
18224
+ "name": "anthropic/claude-sonnet-latest",
18225
+ "api": "openai-completions",
18226
+ "provider": "nanogpt",
18227
+ "baseUrl": "https://nano-gpt.com/api/v1",
18228
+ "reasoning": false,
18229
+ "input": [
18230
+ "text"
18231
+ ],
18232
+ "cost": {
18233
+ "input": 0,
18234
+ "output": 0,
18235
+ "cacheRead": 0,
18236
+ "cacheWrite": 0
18237
+ },
18238
+ "contextWindow": 222222,
18239
+ "maxTokens": 8888
18240
+ },
17983
18241
  "arcee-ai/trinity-large": {
17984
18242
  "id": "arcee-ai/trinity-large",
17985
18243
  "name": "arcee-ai/trinity-large",
@@ -21532,6 +21790,63 @@
21532
21790
  "contextWindow": 222222,
21533
21791
  "maxTokens": 8888
21534
21792
  },
21793
+ "google/gemini-flash-latest": {
21794
+ "id": "google/gemini-flash-latest",
21795
+ "name": "google/gemini-flash-latest",
21796
+ "api": "openai-completions",
21797
+ "provider": "nanogpt",
21798
+ "baseUrl": "https://nano-gpt.com/api/v1",
21799
+ "reasoning": false,
21800
+ "input": [
21801
+ "text"
21802
+ ],
21803
+ "cost": {
21804
+ "input": 0,
21805
+ "output": 0,
21806
+ "cacheRead": 0,
21807
+ "cacheWrite": 0
21808
+ },
21809
+ "contextWindow": 222222,
21810
+ "maxTokens": 8888
21811
+ },
21812
+ "google/gemini-flash-lite-latest": {
21813
+ "id": "google/gemini-flash-lite-latest",
21814
+ "name": "google/gemini-flash-lite-latest",
21815
+ "api": "openai-completions",
21816
+ "provider": "nanogpt",
21817
+ "baseUrl": "https://nano-gpt.com/api/v1",
21818
+ "reasoning": false,
21819
+ "input": [
21820
+ "text"
21821
+ ],
21822
+ "cost": {
21823
+ "input": 0,
21824
+ "output": 0,
21825
+ "cacheRead": 0,
21826
+ "cacheWrite": 0
21827
+ },
21828
+ "contextWindow": 222222,
21829
+ "maxTokens": 8888
21830
+ },
21831
+ "google/gemini-pro-latest": {
21832
+ "id": "google/gemini-pro-latest",
21833
+ "name": "google/gemini-pro-latest",
21834
+ "api": "openai-completions",
21835
+ "provider": "nanogpt",
21836
+ "baseUrl": "https://nano-gpt.com/api/v1",
21837
+ "reasoning": false,
21838
+ "input": [
21839
+ "text"
21840
+ ],
21841
+ "cost": {
21842
+ "input": 0,
21843
+ "output": 0,
21844
+ "cacheRead": 0,
21845
+ "cacheWrite": 0
21846
+ },
21847
+ "contextWindow": 222222,
21848
+ "maxTokens": 8888
21849
+ },
21535
21850
  "grok-3-beta": {
21536
21851
  "id": "grok-3-beta",
21537
21852
  "name": "grok-3-beta",
@@ -21627,6 +21942,30 @@
21627
21942
  "contextWindow": 222222,
21628
21943
  "maxTokens": 8888
21629
21944
  },
21945
+ "holo3-35b-a3b": {
21946
+ "id": "holo3-35b-a3b",
21947
+ "name": "holo3-35b-a3b",
21948
+ "api": "openai-completions",
21949
+ "provider": "nanogpt",
21950
+ "baseUrl": "https://nano-gpt.com/api/v1",
21951
+ "reasoning": true,
21952
+ "input": [
21953
+ "text"
21954
+ ],
21955
+ "cost": {
21956
+ "input": 0,
21957
+ "output": 0,
21958
+ "cacheRead": 0,
21959
+ "cacheWrite": 0
21960
+ },
21961
+ "contextWindow": 222222,
21962
+ "maxTokens": 8888,
21963
+ "thinking": {
21964
+ "mode": "effort",
21965
+ "minLevel": "minimal",
21966
+ "maxLevel": "xhigh"
21967
+ }
21968
+ },
21630
21969
  "huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated": {
21631
21970
  "id": "huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated",
21632
21971
  "name": "huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated",
@@ -25170,6 +25509,25 @@
25170
25509
  "maxLevel": "xhigh"
25171
25510
  }
25172
25511
  },
25512
+ "openai/gpt-latest": {
25513
+ "id": "openai/gpt-latest",
25514
+ "name": "openai/gpt-latest",
25515
+ "api": "openai-completions",
25516
+ "provider": "nanogpt",
25517
+ "baseUrl": "https://nano-gpt.com/api/v1",
25518
+ "reasoning": false,
25519
+ "input": [
25520
+ "text"
25521
+ ],
25522
+ "cost": {
25523
+ "input": 0,
25524
+ "output": 0,
25525
+ "cacheRead": 0,
25526
+ "cacheWrite": 0
25527
+ },
25528
+ "contextWindow": 222222,
25529
+ "maxTokens": 8888
25530
+ },
25173
25531
  "openai/gpt-oss-120b": {
25174
25532
  "id": "openai/gpt-oss-120b",
25175
25533
  "name": "GPT OSS 120B",
@@ -26429,6 +26787,44 @@
26429
26787
  "maxLevel": "high"
26430
26788
  }
26431
26789
  },
26790
+ "qwen3.5-omni-flash": {
26791
+ "id": "qwen3.5-omni-flash",
26792
+ "name": "qwen3.5-omni-flash",
26793
+ "api": "openai-completions",
26794
+ "provider": "nanogpt",
26795
+ "baseUrl": "https://nano-gpt.com/api/v1",
26796
+ "reasoning": false,
26797
+ "input": [
26798
+ "text"
26799
+ ],
26800
+ "cost": {
26801
+ "input": 0,
26802
+ "output": 0,
26803
+ "cacheRead": 0,
26804
+ "cacheWrite": 0
26805
+ },
26806
+ "contextWindow": 222222,
26807
+ "maxTokens": 8888
26808
+ },
26809
+ "qwen3.5-omni-plus": {
26810
+ "id": "qwen3.5-omni-plus",
26811
+ "name": "qwen3.5-omni-plus",
26812
+ "api": "openai-completions",
26813
+ "provider": "nanogpt",
26814
+ "baseUrl": "https://nano-gpt.com/api/v1",
26815
+ "reasoning": false,
26816
+ "input": [
26817
+ "text"
26818
+ ],
26819
+ "cost": {
26820
+ "input": 0,
26821
+ "output": 0,
26822
+ "cacheRead": 0,
26823
+ "cacheWrite": 0
26824
+ },
26825
+ "contextWindow": 222222,
26826
+ "maxTokens": 8888
26827
+ },
26432
26828
  "qwq-32b": {
26433
26829
  "id": "qwq-32b",
26434
26830
  "name": "qwq-32b",
@@ -28145,6 +28541,25 @@
28145
28541
  "contextWindow": 222222,
28146
28542
  "maxTokens": 8888
28147
28543
  },
28544
+ "x-ai/grok-4.20": {
28545
+ "id": "x-ai/grok-4.20",
28546
+ "name": "x-ai/grok-4.20",
28547
+ "api": "openai-completions",
28548
+ "provider": "nanogpt",
28549
+ "baseUrl": "https://nano-gpt.com/api/v1",
28550
+ "reasoning": false,
28551
+ "input": [
28552
+ "text"
28553
+ ],
28554
+ "cost": {
28555
+ "input": 0,
28556
+ "output": 0,
28557
+ "cacheRead": 0,
28558
+ "cacheWrite": 0
28559
+ },
28560
+ "contextWindow": 222222,
28561
+ "maxTokens": 8888
28562
+ },
28148
28563
  "x-ai/grok-4.20-beta-non-reasoning": {
28149
28564
  "id": "x-ai/grok-4.20-beta-non-reasoning",
28150
28565
  "name": "x-ai/grok-4.20-beta-non-reasoning",
@@ -28183,6 +28598,25 @@
28183
28598
  "contextWindow": 222222,
28184
28599
  "maxTokens": 8888
28185
28600
  },
28601
+ "x-ai/grok-4.20-multi-agent": {
28602
+ "id": "x-ai/grok-4.20-multi-agent",
28603
+ "name": "x-ai/grok-4.20-multi-agent",
28604
+ "api": "openai-completions",
28605
+ "provider": "nanogpt",
28606
+ "baseUrl": "https://nano-gpt.com/api/v1",
28607
+ "reasoning": false,
28608
+ "input": [
28609
+ "text"
28610
+ ],
28611
+ "cost": {
28612
+ "input": 0,
28613
+ "output": 0,
28614
+ "cacheRead": 0,
28615
+ "cacheWrite": 0
28616
+ },
28617
+ "contextWindow": 222222,
28618
+ "maxTokens": 8888
28619
+ },
28186
28620
  "x-ai/grok-4.20-multi-agent-beta": {
28187
28621
  "id": "x-ai/grok-4.20-multi-agent-beta",
28188
28622
  "name": "x-ai/grok-4.20-multi-agent-beta",
@@ -30682,6 +31116,26 @@
30682
31116
  "maxLevel": "xhigh"
30683
31117
  }
30684
31118
  },
31119
+ "gpt-5.3-chat-latest": {
31120
+ "id": "gpt-5.3-chat-latest",
31121
+ "name": "GPT-5.3 Chat (latest)",
31122
+ "api": "openai-responses",
31123
+ "provider": "openai",
31124
+ "baseUrl": "https://api.openai.com/v1",
31125
+ "reasoning": false,
31126
+ "input": [
31127
+ "text",
31128
+ "image"
31129
+ ],
31130
+ "cost": {
31131
+ "input": 1.75,
31132
+ "output": 14,
31133
+ "cacheRead": 0.175,
31134
+ "cacheWrite": 0
31135
+ },
31136
+ "contextWindow": 128000,
31137
+ "maxTokens": 16384
31138
+ },
30685
31139
  "gpt-5.3-codex": {
30686
31140
  "id": "gpt-5.3-codex",
30687
31141
  "name": "GPT-5.3 Codex",
@@ -32616,6 +33070,30 @@
32616
33070
  "maxLevel": "xhigh"
32617
33071
  }
32618
33072
  },
33073
+ "qwen3.6-plus-free": {
33074
+ "id": "qwen3.6-plus-free",
33075
+ "name": "Qwen3.6 Plus Free",
33076
+ "api": "openai-completions",
33077
+ "provider": "opencode-zen",
33078
+ "baseUrl": "https://opencode.ai/zen/v1",
33079
+ "reasoning": true,
33080
+ "input": [
33081
+ "text"
33082
+ ],
33083
+ "cost": {
33084
+ "input": 0,
33085
+ "output": 0,
33086
+ "cacheRead": 0,
33087
+ "cacheWrite": 0
33088
+ },
33089
+ "contextWindow": 1048576,
33090
+ "maxTokens": 64000,
33091
+ "thinking": {
33092
+ "mode": "effort",
33093
+ "minLevel": "minimal",
33094
+ "maxLevel": "high"
33095
+ }
33096
+ },
32619
33097
  "trinity-large-preview-free": {
32620
33098
  "id": "trinity-large-preview-free",
32621
33099
  "name": "Trinity Large Preview",
@@ -32947,7 +33425,7 @@
32947
33425
  },
32948
33426
  "anthropic/claude-opus-4": {
32949
33427
  "id": "anthropic/claude-opus-4",
32950
- "name": "Claude Opus 4",
33428
+ "name": "Claude Opus 4 (latest)",
32951
33429
  "api": "openai-completions",
32952
33430
  "baseUrl": "https://openrouter.ai/api/v1",
32953
33431
  "provider": "openrouter",
@@ -32963,7 +33441,7 @@
32963
33441
  "cacheWrite": 18.75
32964
33442
  },
32965
33443
  "contextWindow": 200000,
32966
- "maxTokens": 64000,
33444
+ "maxTokens": 32000,
32967
33445
  "thinking": {
32968
33446
  "mode": "effort",
32969
33447
  "minLevel": "minimal",
@@ -33142,6 +33620,30 @@
33142
33620
  "supportsToolChoice": false
33143
33621
  }
33144
33622
  },
33623
+ "arcee-ai/trinity-large-thinking": {
33624
+ "id": "arcee-ai/trinity-large-thinking",
33625
+ "name": "Arcee AI: Trinity Large Thinking",
33626
+ "api": "openai-completions",
33627
+ "provider": "openrouter",
33628
+ "baseUrl": "https://openrouter.ai/api/v1",
33629
+ "reasoning": true,
33630
+ "input": [
33631
+ "text"
33632
+ ],
33633
+ "cost": {
33634
+ "input": 0.25,
33635
+ "output": 0.8999999999999999,
33636
+ "cacheRead": 0.06,
33637
+ "cacheWrite": 0
33638
+ },
33639
+ "contextWindow": 262144,
33640
+ "maxTokens": 80000,
33641
+ "thinking": {
33642
+ "mode": "effort",
33643
+ "minLevel": "minimal",
33644
+ "maxLevel": "high"
33645
+ }
33646
+ },
33145
33647
  "arcee-ai/trinity-mini": {
33146
33648
  "id": "arcee-ai/trinity-mini",
33147
33649
  "name": "Arcee AI: Trinity Mini",
@@ -34377,9 +34879,9 @@
34377
34879
  "text"
34378
34880
  ],
34379
34881
  "cost": {
34380
- "input": 0.19,
34381
- "output": 1.15,
34382
- "cacheRead": 0.095,
34882
+ "input": 0.12,
34883
+ "output": 1,
34884
+ "cacheRead": 0.06,
34383
34885
  "cacheWrite": 0
34384
34886
  },
34385
34887
  "contextWindow": 204800,
@@ -36211,6 +36713,44 @@
36211
36713
  "maxLevel": "xhigh"
36212
36714
  }
36213
36715
  },
36716
+ "openai/gpt-audio": {
36717
+ "id": "openai/gpt-audio",
36718
+ "name": "OpenAI: GPT Audio",
36719
+ "api": "openai-completions",
36720
+ "provider": "openrouter",
36721
+ "baseUrl": "https://openrouter.ai/api/v1",
36722
+ "reasoning": false,
36723
+ "input": [
36724
+ "text"
36725
+ ],
36726
+ "cost": {
36727
+ "input": 2.5,
36728
+ "output": 10,
36729
+ "cacheRead": 0,
36730
+ "cacheWrite": 0
36731
+ },
36732
+ "contextWindow": 128000,
36733
+ "maxTokens": 16384
36734
+ },
36735
+ "openai/gpt-audio-mini": {
36736
+ "id": "openai/gpt-audio-mini",
36737
+ "name": "OpenAI: GPT Audio Mini",
36738
+ "api": "openai-completions",
36739
+ "provider": "openrouter",
36740
+ "baseUrl": "https://openrouter.ai/api/v1",
36741
+ "reasoning": false,
36742
+ "input": [
36743
+ "text"
36744
+ ],
36745
+ "cost": {
36746
+ "input": 0.6,
36747
+ "output": 2.4,
36748
+ "cacheRead": 0,
36749
+ "cacheWrite": 0
36750
+ },
36751
+ "contextWindow": 128000,
36752
+ "maxTokens": 16384
36753
+ },
36214
36754
  "openai/gpt-oss-120b": {
36215
36755
  "id": "openai/gpt-oss-120b",
36216
36756
  "name": "GPT OSS 120B",
@@ -37658,7 +38198,7 @@
37658
38198
  "cacheWrite": 0
37659
38199
  },
37660
38200
  "contextWindow": 256000,
37661
- "maxTokens": 65536,
38201
+ "maxTokens": 32768,
37662
38202
  "thinking": {
37663
38203
  "mode": "effort",
37664
38204
  "minLevel": "minimal",
@@ -37715,6 +38255,30 @@
37715
38255
  "maxLevel": "high"
37716
38256
  }
37717
38257
  },
38258
+ "qwen/qwen3.6-plus-preview:free": {
38259
+ "id": "qwen/qwen3.6-plus-preview:free",
38260
+ "name": "Qwen: Qwen3.6 Plus Preview (free)",
38261
+ "api": "openai-completions",
38262
+ "provider": "openrouter",
38263
+ "baseUrl": "https://openrouter.ai/api/v1",
38264
+ "reasoning": true,
38265
+ "input": [
38266
+ "text"
38267
+ ],
38268
+ "cost": {
38269
+ "input": 0,
38270
+ "output": 0,
38271
+ "cacheRead": 0,
38272
+ "cacheWrite": 0
38273
+ },
38274
+ "contextWindow": 1000000,
38275
+ "maxTokens": 32000,
38276
+ "thinking": {
38277
+ "mode": "effort",
38278
+ "minLevel": "minimal",
38279
+ "maxLevel": "high"
38280
+ }
38281
+ },
37718
38282
  "qwen/qwq-32b": {
37719
38283
  "id": "qwen/qwq-32b",
37720
38284
  "name": "Qwen: QwQ 32B",
@@ -37759,6 +38323,26 @@
37759
38323
  "contextWindow": 16384,
37760
38324
  "maxTokens": 16384
37761
38325
  },
38326
+ "rekaai/reka-edge": {
38327
+ "id": "rekaai/reka-edge",
38328
+ "name": "Reka Edge",
38329
+ "api": "openai-completions",
38330
+ "provider": "openrouter",
38331
+ "baseUrl": "https://openrouter.ai/api/v1",
38332
+ "reasoning": false,
38333
+ "input": [
38334
+ "text",
38335
+ "image"
38336
+ ],
38337
+ "cost": {
38338
+ "input": 0.09999999999999999,
38339
+ "output": 0.09999999999999999,
38340
+ "cacheRead": 0,
38341
+ "cacheWrite": 0
38342
+ },
38343
+ "contextWindow": 16384,
38344
+ "maxTokens": 16384
38345
+ },
37762
38346
  "relace/relace-search": {
37763
38347
  "id": "relace/relace-search",
37764
38348
  "name": "Relace: Relace Search",
@@ -38160,6 +38744,31 @@
38160
38744
  "maxLevel": "high"
38161
38745
  }
38162
38746
  },
38747
+ "x-ai/grok-4.20": {
38748
+ "id": "x-ai/grok-4.20",
38749
+ "name": "xAI: Grok 4.20",
38750
+ "api": "openai-completions",
38751
+ "provider": "openrouter",
38752
+ "baseUrl": "https://openrouter.ai/api/v1",
38753
+ "reasoning": true,
38754
+ "input": [
38755
+ "text",
38756
+ "image"
38757
+ ],
38758
+ "cost": {
38759
+ "input": 2,
38760
+ "output": 6,
38761
+ "cacheRead": 0.19999999999999998,
38762
+ "cacheWrite": 0
38763
+ },
38764
+ "contextWindow": 2000000,
38765
+ "maxTokens": 8888,
38766
+ "thinking": {
38767
+ "mode": "effort",
38768
+ "minLevel": "minimal",
38769
+ "maxLevel": "high"
38770
+ }
38771
+ },
38163
38772
  "x-ai/grok-4.20-beta": {
38164
38773
  "id": "x-ai/grok-4.20-beta",
38165
38774
  "name": "xAI: Grok 4.20 Beta",
@@ -38562,6 +39171,31 @@
38562
39171
  "minLevel": "minimal",
38563
39172
  "maxLevel": "high"
38564
39173
  }
39174
+ },
39175
+ "z-ai/glm-5v-turbo": {
39176
+ "id": "z-ai/glm-5v-turbo",
39177
+ "name": "Z.ai: GLM 5V Turbo",
39178
+ "api": "openai-completions",
39179
+ "provider": "openrouter",
39180
+ "baseUrl": "https://openrouter.ai/api/v1",
39181
+ "reasoning": true,
39182
+ "input": [
39183
+ "text",
39184
+ "image"
39185
+ ],
39186
+ "cost": {
39187
+ "input": 1.2,
39188
+ "output": 4,
39189
+ "cacheRead": 0.24,
39190
+ "cacheWrite": 0
39191
+ },
39192
+ "contextWindow": 202752,
39193
+ "maxTokens": 131072,
39194
+ "thinking": {
39195
+ "mode": "effort",
39196
+ "minLevel": "minimal",
39197
+ "maxLevel": "high"
39198
+ }
38565
39199
  }
38566
39200
  },
38567
39201
  "qianfan": {
@@ -38968,6 +39602,25 @@
38968
39602
  },
38969
39603
  "contextWindow": 196608,
38970
39604
  "maxTokens": 8192
39605
+ },
39606
+ "hf:zai-org/GLM-5": {
39607
+ "id": "hf:zai-org/GLM-5",
39608
+ "name": "zai-org/GLM-5",
39609
+ "api": "openai-completions",
39610
+ "provider": "synthetic",
39611
+ "baseUrl": "https://api.synthetic.new/openai/v1",
39612
+ "reasoning": false,
39613
+ "input": [
39614
+ "text"
39615
+ ],
39616
+ "cost": {
39617
+ "input": 0,
39618
+ "output": 0,
39619
+ "cacheRead": 0,
39620
+ "cacheWrite": 0
39621
+ },
39622
+ "contextWindow": 196608,
39623
+ "maxTokens": 8192
38971
39624
  }
38972
39625
  },
38973
39626
  "together": {
@@ -41077,7 +41730,7 @@
41077
41730
  },
41078
41731
  "anthropic/claude-opus-4": {
41079
41732
  "id": "anthropic/claude-opus-4",
41080
- "name": "Claude Opus 4",
41733
+ "name": "Claude Opus 4 (latest)",
41081
41734
  "api": "anthropic-messages",
41082
41735
  "baseUrl": "https://ai-gateway.vercel.sh",
41083
41736
  "provider": "vercel-ai-gateway",
@@ -41093,7 +41746,7 @@
41093
41746
  "cacheWrite": 18.75
41094
41747
  },
41095
41748
  "contextWindow": 200000,
41096
- "maxTokens": 64000,
41749
+ "maxTokens": 32000,
41097
41750
  "thinking": {
41098
41751
  "mode": "budget",
41099
41752
  "minLevel": "minimal",
@@ -44620,7 +45273,7 @@
44620
45273
  "cacheWrite": 18.75
44621
45274
  },
44622
45275
  "contextWindow": 200000,
44623
- "maxTokens": 64000,
45276
+ "maxTokens": 32000,
44624
45277
  "thinking": {
44625
45278
  "mode": "budget",
44626
45279
  "minLevel": "minimal",
@@ -45434,6 +46087,25 @@
45434
46087
  "contextWindow": 256000,
45435
46088
  "maxTokens": 64000
45436
46089
  },
46090
+ "kuaishou/kat-coder-pro-v2": {
46091
+ "id": "kuaishou/kat-coder-pro-v2",
46092
+ "name": "KwaiKAT: KAT-Coder-Pro-V2",
46093
+ "api": "openai-completions",
46094
+ "provider": "zenmux",
46095
+ "baseUrl": "https://zenmux.ai/api/v1",
46096
+ "reasoning": false,
46097
+ "input": [
46098
+ "text"
46099
+ ],
46100
+ "cost": {
46101
+ "input": 0.3,
46102
+ "output": 1.2,
46103
+ "cacheRead": 0.06,
46104
+ "cacheWrite": 0
46105
+ },
46106
+ "contextWindow": 256000,
46107
+ "maxTokens": 8888
46108
+ },
45437
46109
  "meta/llama-3.3-70b-instruct": {
45438
46110
  "id": "meta/llama-3.3-70b-instruct",
45439
46111
  "name": "Llama 3.3 70b Instruct",
@@ -46531,7 +47203,7 @@
46531
47203
  "input": 0.2,
46532
47204
  "output": 1.6,
46533
47205
  "cacheRead": 0.04,
46534
- "cacheWrite": 0
47206
+ "cacheWrite": 0.25
46535
47207
  },
46536
47208
  "contextWindow": 262144,
46537
47209
  "maxTokens": 8888,
@@ -18,6 +18,7 @@ import {
18
18
  type Tool,
19
19
  type ToolChoice,
20
20
  } from "../types";
21
+ import { createAbortSourceTracker } from "../utils/abort";
21
22
  import { AssistantMessageEventStream } from "../utils/event-stream";
22
23
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
23
24
  import {
@@ -39,6 +40,8 @@ import {
39
40
  import { transformMessages } from "./transform-messages";
40
41
 
41
42
  const DEFAULT_AZURE_API_VERSION = "v1";
43
+ const AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE =
44
+ "Azure OpenAI responses stream timed out while waiting for the first event";
42
45
 
43
46
  function parseDeploymentNameMap(value: string | undefined): Map<string, string> {
44
47
  const map = new Map<string, string>();
@@ -115,6 +118,9 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
115
118
  timestamp: Date.now(),
116
119
  };
117
120
  let rawRequestDump: RawHttpRequestDump | undefined;
121
+ const abortTracker = createAbortSourceTracker(options?.signal);
122
+ const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
123
+ const { requestAbortController, requestSignal } = abortTracker;
118
124
 
119
125
  try {
120
126
  // Create Azure OpenAI client
@@ -122,14 +128,7 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
122
128
  const client = createClient(model, apiKey, options);
123
129
  const { baseUrl } = resolveAzureConfig(model, options);
124
130
  const params = buildParams(model, context, options, deploymentName, baseUrl);
125
- const requestAbortController = new AbortController();
126
- const requestSignal = options?.signal
127
- ? AbortSignal.any([options.signal, requestAbortController.signal])
128
- : requestAbortController.signal;
129
131
  const idleTimeoutMs = getOpenAIStreamIdleTimeoutMs();
130
- const firstEventWatchdog = createFirstEventWatchdog(getStreamFirstEventTimeoutMs(idleTimeoutMs), () =>
131
- requestAbortController.abort(),
132
- );
133
132
  options?.onPayload?.(params);
134
133
  rawRequestDump = {
135
134
  provider: model.provider,
@@ -140,6 +139,9 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
140
139
  body: params,
141
140
  };
142
141
  const openaiStream = await client.responses.create(params, { signal: requestSignal });
142
+ const firstEventWatchdog = createFirstEventWatchdog(getStreamFirstEventTimeoutMs(idleTimeoutMs), () =>
143
+ abortTracker.abortLocally(firstEventTimeoutAbortError),
144
+ );
143
145
  stream.push({ type: "start", partial: output });
144
146
 
145
147
  await processResponsesStream(
@@ -158,7 +160,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
158
160
  },
159
161
  );
160
162
 
161
- if (options?.signal?.aborted) {
163
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
164
+ if (firstEventTimeoutError) {
165
+ throw firstEventTimeoutError;
166
+ }
167
+ if (abortTracker.wasCallerAbort()) {
162
168
  throw new Error("Request was aborted");
163
169
  }
164
170
 
@@ -172,8 +178,9 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
172
178
  stream.end();
173
179
  } catch (error) {
174
180
  for (const block of output.content) delete (block as { index?: number }).index;
175
- output.stopReason = options?.signal?.aborted ? "aborted" : "error";
176
- output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
181
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
182
+ output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
183
+ output.errorMessage = firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
177
184
  output.duration = Date.now() - startTime;
178
185
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
179
186
  stream.push({ type: "error", reason: output.stopReason, error: output });
@@ -29,6 +29,7 @@ import {
29
29
  type ToolChoice,
30
30
  type ToolResultMessage,
31
31
  } from "../types";
32
+ import { createAbortSourceTracker } from "../utils/abort";
32
33
  import { AssistantMessageEventStream } from "../utils/event-stream";
33
34
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
34
35
  import {
@@ -162,6 +163,9 @@ function getTrailingPartialTag(text: string, tags: readonly string[]): string {
162
163
  return text.slice(-maxLength);
163
164
  }
164
165
 
166
+ const OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE =
167
+ "OpenAI completions stream timed out while waiting for the first event";
168
+
165
169
  export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
166
170
  model: Model<"openai-completions">,
167
171
  context: Context,
@@ -191,17 +195,13 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
191
195
  timestamp: Date.now(),
192
196
  };
193
197
  let rawRequestDump: RawHttpRequestDump | undefined;
198
+ const abortTracker = createAbortSourceTracker(options?.signal);
199
+ const firstEventTimeoutAbortError = new Error(OPENAI_COMPLETIONS_FIRST_EVENT_TIMEOUT_MESSAGE);
200
+ const { requestAbortController, requestSignal } = abortTracker;
194
201
 
195
202
  try {
196
203
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
197
- const requestAbortController = new AbortController();
198
- const requestSignal = options?.signal
199
- ? AbortSignal.any([options.signal, requestAbortController.signal])
200
- : requestAbortController.signal;
201
204
  const idleTimeoutMs = getOpenAIStreamIdleTimeoutMs();
202
- const firstEventWatchdog = createFirstEventWatchdog(getStreamFirstEventTimeoutMs(idleTimeoutMs), () =>
203
- requestAbortController.abort(),
204
- );
205
205
  const { client, copilotPremiumRequests, baseUrl } = await createClient(
206
206
  model,
207
207
  context,
@@ -220,6 +220,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
220
220
  body: params,
221
221
  };
222
222
  const openaiStream = await client.chat.completions.create(params, { signal: requestSignal });
223
+ const firstEventWatchdog = createFirstEventWatchdog(getStreamFirstEventTimeoutMs(idleTimeoutMs), () =>
224
+ abortTracker.abortLocally(firstEventTimeoutAbortError),
225
+ );
223
226
  if (copilotPremiumRequests !== undefined) output.usage.premiumRequests = copilotPremiumRequests;
224
227
  stream.push({ type: "start", partial: output });
225
228
 
@@ -485,7 +488,11 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
485
488
 
486
489
  finishCurrentBlock(currentBlock);
487
490
 
488
- if (options?.signal?.aborted) {
491
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
492
+ if (firstEventTimeoutError) {
493
+ throw firstEventTimeoutError;
494
+ }
495
+ if (abortTracker.wasCallerAbort()) {
489
496
  throw new Error("Request was aborted");
490
497
  }
491
498
 
@@ -502,8 +509,9 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
502
509
  stream.end();
503
510
  } catch (error) {
504
511
  for (const block of output.content) delete (block as any).index;
505
- output.stopReason = options?.signal?.aborted ? "aborted" : "error";
506
- output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
512
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
513
+ output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
514
+ output.errorMessage = firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
507
515
  // Some providers via OpenRouter include extra details here.
508
516
  const rawMetadata = (error as { error?: { metadata?: { raw?: string } } })?.error?.metadata?.raw;
509
517
  if (rawMetadata) output.errorMessage += `\n${rawMetadata}`;
@@ -28,6 +28,7 @@ import {
28
28
  resolveCacheRetention,
29
29
  sanitizeOpenAIResponsesHistoryItemsForReplay,
30
30
  } from "../utils";
31
+ import { createAbortSourceTracker } from "../utils/abort";
31
32
  import { AssistantMessageEventStream } from "../utils/event-stream";
32
33
  import { finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
33
34
  import {
@@ -82,6 +83,8 @@ export interface OpenAIResponsesOptions extends StreamOptions {
82
83
  }
83
84
 
84
85
  const OPENAI_RESPONSES_PROVIDER_SESSION_STATE_PREFIX = "openai-responses:";
86
+ const OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE =
87
+ "OpenAI responses stream timed out while waiting for the first event";
85
88
 
86
89
  interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
87
90
  nativeHistoryReplayWarmed: boolean;
@@ -161,6 +164,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
161
164
  timestamp: Date.now(),
162
165
  };
163
166
  let rawRequestDump: RawHttpRequestDump | undefined;
167
+ const abortTracker = createAbortSourceTracker(options?.signal);
168
+ const firstEventTimeoutAbortError = new Error(OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
169
+ const { requestAbortController, requestSignal } = abortTracker;
164
170
 
165
171
  try {
166
172
  // Create OpenAI client
@@ -174,14 +180,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
174
180
  );
175
181
  const providerSessionState = getOpenAIResponsesProviderSessionState(model, options?.providerSessionState);
176
182
  const { params } = buildParams(model, context, options, providerSessionState, baseUrl);
177
- const requestAbortController = new AbortController();
178
- const requestSignal = options?.signal
179
- ? AbortSignal.any([options.signal, requestAbortController.signal])
180
- : requestAbortController.signal;
181
183
  const idleTimeoutMs = getOpenAIStreamIdleTimeoutMs();
182
- const firstEventWatchdog = createFirstEventWatchdog(getStreamFirstEventTimeoutMs(idleTimeoutMs), () =>
183
- requestAbortController.abort(),
184
- );
185
184
  options?.onPayload?.(params);
186
185
  rawRequestDump = {
187
186
  provider: model.provider,
@@ -192,6 +191,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
192
191
  body: params,
193
192
  };
194
193
  const openaiStream = await client.responses.create(params, { signal: requestSignal });
194
+ const firstEventWatchdog = createFirstEventWatchdog(getStreamFirstEventTimeoutMs(idleTimeoutMs), () =>
195
+ abortTracker.abortLocally(firstEventTimeoutAbortError),
196
+ );
195
197
  if (copilotPremiumRequests !== undefined) output.usage.premiumRequests = copilotPremiumRequests;
196
198
  stream.push({ type: "start", partial: output });
197
199
 
@@ -216,7 +218,11 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
216
218
  );
217
219
  if (copilotPremiumRequests !== undefined) output.usage.premiumRequests = copilotPremiumRequests;
218
220
 
219
- if (options?.signal?.aborted) {
221
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
222
+ if (firstEventTimeoutError) {
223
+ throw firstEventTimeoutError;
224
+ }
225
+ if (abortTracker.wasCallerAbort()) {
220
226
  throw new Error("Request was aborted");
221
227
  }
222
228
 
@@ -233,8 +239,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
233
239
  stream.end();
234
240
  } catch (error) {
235
241
  for (const block of output.content) delete (block as { index?: number }).index;
236
- output.stopReason = options?.signal?.aborted ? "aborted" : "error";
237
- output.errorMessage = await finalizeErrorMessage(error, rawRequestDump);
242
+ const firstEventTimeoutError = abortTracker.getLocalAbortReason();
243
+ output.stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
244
+ output.errorMessage = firstEventTimeoutError?.message ?? (await finalizeErrorMessage(error, rawRequestDump));
238
245
  output.duration = Date.now() - startTime;
239
246
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
240
247
  stream.push({ type: "error", reason: output.stopReason, error: output });
@@ -0,0 +1,36 @@
1
+ export interface AbortSourceTracker {
2
+ requestAbortController: AbortController;
3
+ requestSignal: AbortSignal;
4
+ abortLocally(reason: Error): Error;
5
+ getLocalAbortReason(): Error | undefined;
6
+ wasCallerAbort(): boolean;
7
+ }
8
+
9
+ /**
10
+ * Tracks whether a merged request signal was aborted by the caller or by provider-local logic.
11
+ */
12
+ export function createAbortSourceTracker(callerSignal?: AbortSignal): AbortSourceTracker {
13
+ const requestAbortController = new AbortController();
14
+ const requestSignal = callerSignal
15
+ ? AbortSignal.any([callerSignal, requestAbortController.signal])
16
+ : requestAbortController.signal;
17
+ let localAbortReason: Error | undefined;
18
+
19
+ return {
20
+ requestAbortController,
21
+ requestSignal,
22
+ abortLocally(reason) {
23
+ localAbortReason = reason;
24
+ requestAbortController.abort(reason);
25
+ return reason;
26
+ },
27
+ getLocalAbortReason() {
28
+ if (!localAbortReason) return undefined;
29
+ return requestSignal.reason === localAbortReason ? localAbortReason : undefined;
30
+ },
31
+ wasCallerAbort() {
32
+ if (!callerSignal?.aborted) return false;
33
+ return requestSignal.reason !== localAbortReason;
34
+ },
35
+ };
36
+ }
@@ -1,7 +1,7 @@
1
1
  import { $env } from "@oh-my-pi/pi-utils";
2
2
 
3
3
  const DEFAULT_OPENAI_STREAM_IDLE_TIMEOUT_MS = 45_000;
4
- const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 15_000;
4
+ const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 45_000;
5
5
 
6
6
  function normalizeIdleTimeoutMs(value: string | undefined, fallback: number): number | undefined {
7
7
  if (value === undefined) return fallback;
@@ -22,14 +22,16 @@ export function getOpenAIStreamIdleTimeoutMs(): number | undefined {
22
22
 
23
23
  /**
24
24
  * Returns the timeout used while waiting for the first stream event.
25
+ * The first token can legitimately take longer than later inter-event gaps,
26
+ * so the default never undershoots the steady-state idle timeout.
25
27
  *
26
28
  * Set `PI_STREAM_FIRST_EVENT_TIMEOUT_MS=0` to disable the watchdog.
27
29
  */
28
30
  export function getStreamFirstEventTimeoutMs(idleTimeoutMs?: number): number | undefined {
29
- const fallback = Math.min(
30
- DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS,
31
- idleTimeoutMs ?? DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS,
32
- );
31
+ const fallback =
32
+ idleTimeoutMs === undefined
33
+ ? DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS
34
+ : Math.max(DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS, idleTimeoutMs);
33
35
  return normalizeIdleTimeoutMs($env.PI_STREAM_FIRST_EVENT_TIMEOUT_MS, fallback);
34
36
  }
35
37