lm-deluge 0.0.57__py3-none-any.whl → 0.0.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -10,6 +10,18 @@ ANTHROPIC_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  #
13
+ "claude-4.5-haiku": {
14
+ "id": "claude-4.5-haiku",
15
+ "name": "claude-haiku-4-5-20251001",
16
+ "api_base": "https://api.anthropic.com/v1",
17
+ "api_key_env_var": "ANTHROPIC_API_KEY",
18
+ "supports_json": False,
19
+ "api_spec": "anthropic",
20
+ "input_cost": 1.0,
21
+ "cached_input_cost": 0.10,
22
+ "cache_write_cost": 1.25,
23
+ "output_cost": 3.0,
24
+ },
13
25
  "claude-4.5-sonnet": {
14
26
  "id": "claude-4.5-sonnet",
15
27
  "name": "claude-sonnet-4-5-20250929",
@@ -21,8 +33,6 @@ ANTHROPIC_MODELS = {
21
33
  "cached_input_cost": 0.30,
22
34
  "cache_write_cost": 3.75,
23
35
  "output_cost": 15.0,
24
- "requests_per_minute": 4_000,
25
- "tokens_per_minute": 400_000,
26
36
  },
27
37
  "claude-4.1-opus": {
28
38
  "id": "claude-4.1-opus",
@@ -35,8 +45,6 @@ ANTHROPIC_MODELS = {
35
45
  "cached_input_cost": 1.50,
36
46
  "cache_write_cost": 18.75,
37
47
  "output_cost": 75.0,
38
- "requests_per_minute": 4_000,
39
- "tokens_per_minute": 400_000,
40
48
  "reasoning_model": True,
41
49
  },
42
50
  "claude-4-opus": {
@@ -50,8 +58,6 @@ ANTHROPIC_MODELS = {
50
58
  "cached_input_cost": 1.50,
51
59
  "cache_write_cost": 18.75,
52
60
  "output_cost": 75.0,
53
- "requests_per_minute": 4_000,
54
- "tokens_per_minute": 400_000,
55
61
  "reasoning_model": True,
56
62
  },
57
63
  "claude-4-sonnet": {
@@ -65,8 +71,6 @@ ANTHROPIC_MODELS = {
65
71
  "cached_input_cost": 0.30,
66
72
  "cache_write_cost": 3.75,
67
73
  "output_cost": 15.0,
68
- "requests_per_minute": 4_000,
69
- "tokens_per_minute": 400_000,
70
74
  },
71
75
  "claude-3.7-sonnet": {
72
76
  "id": "claude-3.7-sonnet",
@@ -79,8 +83,6 @@ ANTHROPIC_MODELS = {
79
83
  "cached_input_cost": 0.30,
80
84
  "cache_write_cost": 3.75,
81
85
  "output_cost": 15.0,
82
- "requests_per_minute": 4_000,
83
- "tokens_per_minute": 400_000,
84
86
  "reasoning_model": True,
85
87
  },
86
88
  "claude-3.6-sonnet": {
@@ -94,8 +96,6 @@ ANTHROPIC_MODELS = {
94
96
  "cached_input_cost": 0.30,
95
97
  "cache_write_cost": 3.75,
96
98
  "output_cost": 15.0,
97
- "requests_per_minute": 4_000,
98
- "tokens_per_minute": 400_000,
99
99
  },
100
100
  "claude-3.5-sonnet": {
101
101
  "id": "claude-3.5-sonnet",
@@ -108,8 +108,6 @@ ANTHROPIC_MODELS = {
108
108
  "cached_input_cost": 0.30,
109
109
  "cache_write_cost": 3.75,
110
110
  "output_cost": 15.0,
111
- "requests_per_minute": 4_000,
112
- "tokens_per_minute": 400_000,
113
111
  },
114
112
  "claude-3-opus": {
115
113
  "id": "claude-3-opus",
@@ -120,8 +118,6 @@ ANTHROPIC_MODELS = {
120
118
  "api_spec": "anthropic",
121
119
  "input_cost": 15.0,
122
120
  "output_cost": 75.0,
123
- "requests_per_minute": 4_000,
124
- "tokens_per_minute": 400_000,
125
121
  },
126
122
  "claude-3.5-haiku": {
127
123
  "id": "claude-3.5-haiku",
@@ -134,8 +130,6 @@ ANTHROPIC_MODELS = {
134
130
  "cached_input_cost": 0.08,
135
131
  "cache_write_cost": 1.00,
136
132
  "output_cost": 4.00,
137
- "requests_per_minute": 20_000,
138
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
139
133
  },
140
134
  "claude-3-haiku": {
141
135
  "id": "claude-3-haiku",
@@ -148,7 +142,5 @@ ANTHROPIC_MODELS = {
148
142
  "cache_write_cost": 0.30,
149
143
  "cached_input_cost": 0.03,
150
144
  "output_cost": 1.25,
151
- "requests_per_minute": 10_000,
152
- "tokens_per_minute": 4_000_000, # supposed to be this but they fucked up
153
145
  },
154
146
  }
@@ -16,8 +16,6 @@ BEDROCK_MODELS = {
16
16
  "api_spec": "bedrock",
17
17
  "input_cost": 0.25,
18
18
  "output_cost": 1.25,
19
- "requests_per_minute": 4_000,
20
- "tokens_per_minute": 8_000_000,
21
19
  },
22
20
  "claude-3.5-haiku-bedrock": {
23
21
  "id": "claude-3.5-haiku-bedrock",
@@ -28,8 +26,6 @@ BEDROCK_MODELS = {
28
26
  "api_spec": "bedrock",
29
27
  "input_cost": 0.25,
30
28
  "output_cost": 1.25,
31
- "requests_per_minute": 4_000,
32
- "tokens_per_minute": 8_000_000,
33
29
  },
34
30
  "claude-3.5-sonnet-bedrock": {
35
31
  "id": "claude-3.5-sonnet-bedrock",
@@ -40,8 +36,6 @@ BEDROCK_MODELS = {
40
36
  "api_spec": "bedrock",
41
37
  "input_cost": 3.0,
42
38
  "output_cost": 15.0,
43
- "requests_per_minute": 4_000,
44
- "tokens_per_minute": 400_000,
45
39
  "reasoning_model": False,
46
40
  },
47
41
  "claude-3.6-sonnet-bedrock": {
@@ -53,8 +47,6 @@ BEDROCK_MODELS = {
53
47
  "api_spec": "bedrock",
54
48
  "input_cost": 3.0,
55
49
  "output_cost": 15.0,
56
- "requests_per_minute": 4_000,
57
- "tokens_per_minute": 400_000,
58
50
  "reasoning_model": False,
59
51
  },
60
52
  "claude-3.7-sonnet-bedrock": {
@@ -66,8 +58,6 @@ BEDROCK_MODELS = {
66
58
  "api_spec": "bedrock",
67
59
  "input_cost": 3.0,
68
60
  "output_cost": 15.0,
69
- "requests_per_minute": 4_000,
70
- "tokens_per_minute": 400_000,
71
61
  "reasoning_model": True,
72
62
  },
73
63
  "claude-4-sonnet-bedrock": {
@@ -79,8 +69,6 @@ BEDROCK_MODELS = {
79
69
  "api_spec": "bedrock",
80
70
  "input_cost": 3.0,
81
71
  "output_cost": 15.0,
82
- "requests_per_minute": 4_000,
83
- "tokens_per_minute": 400_000,
84
72
  "reasoning_model": True,
85
73
  },
86
74
  "claude-4-opus-bedrock": {
@@ -92,8 +80,6 @@ BEDROCK_MODELS = {
92
80
  "api_spec": "bedrock",
93
81
  "input_cost": 3.0,
94
82
  "output_cost": 15.0,
95
- "requests_per_minute": 4_000,
96
- "tokens_per_minute": 400_000,
97
83
  "reasoning_model": True,
98
84
  },
99
85
  # GPT-OSS on AWS Bedrock
@@ -15,8 +15,6 @@ COHERE_MODELS = {
15
15
  "api_spec": "openai",
16
16
  "input_cost": 0.5,
17
17
  "output_cost": 1.5,
18
- "requests_per_minute": 10_000,
19
- "tokens_per_minute": None,
20
18
  },
21
19
  "aya-expanse-32b": {
22
20
  "id": "aya-expanse-32b",
@@ -26,8 +24,6 @@ COHERE_MODELS = {
26
24
  "api_spec": "openai",
27
25
  "input_cost": 0.5,
28
26
  "output_cost": 1.5,
29
- "requests_per_minute": 10_000,
30
- "tokens_per_minute": None,
31
27
  },
32
28
  "aya-vision-8b": {
33
29
  "id": "aya-vision-8b",
@@ -37,8 +33,6 @@ COHERE_MODELS = {
37
33
  "api_spec": "openai",
38
34
  "input_cost": 0.5,
39
35
  "output_cost": 1.5,
40
- "requests_per_minute": 10_000,
41
- "tokens_per_minute": None,
42
36
  },
43
37
  "aya-vision-32b": {
44
38
  "id": "aya-vision-32b",
@@ -48,8 +42,6 @@ COHERE_MODELS = {
48
42
  "api_spec": "openai",
49
43
  "input_cost": 0.5,
50
44
  "output_cost": 1.5,
51
- "requests_per_minute": 10_000,
52
- "tokens_per_minute": None,
53
45
  },
54
46
  "command-a": {
55
47
  "id": "command-a",
@@ -59,8 +51,6 @@ COHERE_MODELS = {
59
51
  "api_spec": "openai",
60
52
  "input_cost": 0.5,
61
53
  "output_cost": 1.5,
62
- "requests_per_minute": 10_000,
63
- "tokens_per_minute": None,
64
54
  },
65
55
  "command-r-7b": {
66
56
  "id": "command-r-cohere",
@@ -70,8 +60,6 @@ COHERE_MODELS = {
70
60
  "api_spec": "openai",
71
61
  "input_cost": 0.5,
72
62
  "output_cost": 1.5,
73
- "requests_per_minute": 10_000,
74
- "tokens_per_minute": None,
75
63
  },
76
64
  "command-r": {
77
65
  "id": "command-r",
@@ -81,8 +69,6 @@ COHERE_MODELS = {
81
69
  "api_spec": "openai",
82
70
  "input_cost": 0.5,
83
71
  "output_cost": 1.5,
84
- "requests_per_minute": 10_000,
85
- "tokens_per_minute": None,
86
72
  },
87
73
  "command-r-plus": {
88
74
  "id": "command-r-plus",
@@ -92,7 +78,5 @@ COHERE_MODELS = {
92
78
  "api_spec": "openai",
93
79
  "input_cost": 3.0,
94
80
  "output_cost": 15.0,
95
- "requests_per_minute": 10_000,
96
- "tokens_per_minute": None,
97
81
  },
98
82
  }
@@ -20,8 +20,6 @@ GOOGLE_MODELS = {
20
20
  "input_cost": 0.1,
21
21
  "cached_input_cost": 0.025,
22
22
  "output_cost": 0.4,
23
- "requests_per_minute": 20,
24
- "tokens_per_minute": 100_000,
25
23
  "reasoning_model": False,
26
24
  },
27
25
  "gemini-2.0-flash-lite-compat": {
@@ -34,8 +32,6 @@ GOOGLE_MODELS = {
34
32
  "api_spec": "openai",
35
33
  "input_cost": 0.075,
36
34
  "output_cost": 0.3,
37
- "requests_per_minute": 20,
38
- "tokens_per_minute": 100_000,
39
35
  "reasoning_model": False,
40
36
  },
41
37
  "gemini-2.5-pro-compat": {
@@ -49,8 +45,6 @@ GOOGLE_MODELS = {
49
45
  "input_cost": 1.25,
50
46
  "cached_input_cost": 0.31,
51
47
  "output_cost": 10.0,
52
- "requests_per_minute": 20,
53
- "tokens_per_minute": 100_000,
54
48
  "reasoning_model": True,
55
49
  },
56
50
  "gemini-2.5-flash-compat": {
@@ -64,8 +58,6 @@ GOOGLE_MODELS = {
64
58
  "input_cost": 0.3,
65
59
  "cached_input_cost": 0.075,
66
60
  "output_cost": 2.5,
67
- "requests_per_minute": 20,
68
- "tokens_per_minute": 100_000,
69
61
  "reasoning_model": True,
70
62
  },
71
63
  "gemini-2.5-flash-lite-compat": {
@@ -79,8 +71,6 @@ GOOGLE_MODELS = {
79
71
  "input_cost": 0.1,
80
72
  "cached_input_cost": 0.025,
81
73
  "output_cost": 0.4,
82
- "requests_per_minute": 20,
83
- "tokens_per_minute": 100_000,
84
74
  "reasoning_model": True,
85
75
  },
86
76
  # Native Gemini API versions with file support
@@ -95,8 +85,6 @@ GOOGLE_MODELS = {
95
85
  "input_cost": 0.1,
96
86
  "cached_input_cost": 0.025,
97
87
  "output_cost": 0.4,
98
- "requests_per_minute": 20,
99
- "tokens_per_minute": 100_000,
100
88
  "reasoning_model": False,
101
89
  },
102
90
  "gemini-2.0-flash-lite": {
@@ -109,8 +97,6 @@ GOOGLE_MODELS = {
109
97
  "api_spec": "gemini",
110
98
  "input_cost": 0.075,
111
99
  "output_cost": 0.3,
112
- "requests_per_minute": 20,
113
- "tokens_per_minute": 100_000,
114
100
  "reasoning_model": False,
115
101
  },
116
102
  "gemini-2.5-pro": {
@@ -124,8 +110,6 @@ GOOGLE_MODELS = {
124
110
  "input_cost": 1.25,
125
111
  "cached_input_cost": 0.31,
126
112
  "output_cost": 10.0,
127
- "requests_per_minute": 20,
128
- "tokens_per_minute": 100_000,
129
113
  "reasoning_model": True,
130
114
  },
131
115
  "gemini-2.5-flash": {
@@ -139,8 +123,6 @@ GOOGLE_MODELS = {
139
123
  "input_cost": 0.3,
140
124
  "cached_input_cost": 0.075,
141
125
  "output_cost": 2.5,
142
- "requests_per_minute": 20,
143
- "tokens_per_minute": 100_000,
144
126
  "reasoning_model": True,
145
127
  },
146
128
  "gemini-2.5-flash-lite": {
@@ -154,8 +136,6 @@ GOOGLE_MODELS = {
154
136
  "input_cost": 0.1,
155
137
  "cached_input_cost": 0.025,
156
138
  "output_cost": 0.4,
157
- "requests_per_minute": 20,
158
- "tokens_per_minute": 100_000,
159
139
  "reasoning_model": True,
160
140
  },
161
141
  }
lm_deluge/models/grok.py CHANGED
@@ -7,6 +7,54 @@ XAI_MODELS = {
7
7
  # 888 888 888 888 888 888888K
8
8
  # Y88b d88P 888 Y88..88P 888 "88b
9
9
  # "Y8888P88 888 "Y88P" 888 888
10
+ "grok-code-fast-1": {
11
+ "id": "grok-code-fast-1",
12
+ "name": "grok-code-fast-1",
13
+ "api_base": "https://api.x.ai/v1",
14
+ "api_key_env_var": "GROK_API_KEY",
15
+ "supports_json": True,
16
+ "supports_logprobs": True,
17
+ "api_spec": "openai",
18
+ "input_cost": 0.2,
19
+ "output_cost": 1.5,
20
+ "reasoning_model": False,
21
+ },
22
+ "grok-4-fast-reasoning": {
23
+ "id": "grok-4-fast-reasoning",
24
+ "name": "grok-4-fast-reasoning",
25
+ "api_base": "https://api.x.ai/v1",
26
+ "api_key_env_var": "GROK_API_KEY",
27
+ "supports_json": True,
28
+ "supports_logprobs": True,
29
+ "api_spec": "openai",
30
+ "input_cost": 0.2,
31
+ "output_cost": 0.5,
32
+ "reasoning_model": False,
33
+ },
34
+ "grok-4-fast-non-reasoning": {
35
+ "id": "grok-4-fast-non-reasoning",
36
+ "name": "grok-4-fast-non-reasoning",
37
+ "api_base": "https://api.x.ai/v1",
38
+ "api_key_env_var": "GROK_API_KEY",
39
+ "supports_json": True,
40
+ "supports_logprobs": True,
41
+ "api_spec": "openai",
42
+ "input_cost": 0.2,
43
+ "output_cost": 0.5,
44
+ "reasoning_model": False,
45
+ },
46
+ "grok-4": {
47
+ "id": "grok-4",
48
+ "name": "grok-4-0709",
49
+ "api_base": "https://api.x.ai/v1",
50
+ "api_key_env_var": "GROK_API_KEY",
51
+ "supports_json": True,
52
+ "supports_logprobs": True,
53
+ "api_spec": "openai",
54
+ "input_cost": 2.0,
55
+ "output_cost": 8.0,
56
+ "reasoning_model": False,
57
+ },
10
58
  "grok-3": {
11
59
  "id": "grok-3",
12
60
  "name": "grok-3-latest",
@@ -17,8 +65,6 @@ XAI_MODELS = {
17
65
  "api_spec": "openai",
18
66
  "input_cost": 2.0,
19
67
  "output_cost": 8.0,
20
- "requests_per_minute": 20,
21
- "tokens_per_minute": 100_000,
22
68
  "reasoning_model": False,
23
69
  },
24
70
  "grok-3-mini": {
@@ -31,8 +77,6 @@ XAI_MODELS = {
31
77
  "api_spec": "openai",
32
78
  "input_cost": 2.0,
33
79
  "output_cost": 8.0,
34
- "requests_per_minute": 20,
35
- "tokens_per_minute": 100_000,
36
80
  "reasoning_model": True,
37
81
  },
38
82
  }
lm_deluge/models/groq.py CHANGED
@@ -41,10 +41,10 @@ GROQ_MODELS = {
41
41
  },
42
42
  "kimi-k2-groq": {
43
43
  "id": "kimi-k2-groq",
44
- "name": "moonshotai/kimi-k2-instruct",
44
+ "name": "moonshotai/kimi-k2-instruct-0905",
45
45
  "api_base": "https://api.groq.com/openai/v1",
46
46
  "api_key_env_var": "GROQ_API_KEY",
47
- "supports_json": False,
47
+ "supports_json": True,
48
48
  "api_spec": "openai",
49
49
  },
50
50
  "gpt-oss-120b-groq": {
lm_deluge/models/meta.py CHANGED
@@ -16,8 +16,6 @@ META_MODELS = {
16
16
  "api_spec": "openai",
17
17
  "input_cost": 0.0,
18
18
  "output_cost": 0.0,
19
- "requests_per_minute": 3_000,
20
- "tokens_per_minute": 1_000_000,
21
19
  "reasoning_model": False,
22
20
  },
23
21
  "llama-4-maverick": {
@@ -30,8 +28,6 @@ META_MODELS = {
30
28
  "api_spec": "openai",
31
29
  "input_cost": 0.0,
32
30
  "output_cost": 0.0,
33
- "requests_per_minute": 3_000,
34
- "tokens_per_minute": 1_000_000,
35
31
  "reasoning_model": False,
36
32
  },
37
33
  "llama-3.3-70b": {
@@ -44,8 +40,6 @@ META_MODELS = {
44
40
  "api_spec": "openai",
45
41
  "input_cost": 0.0,
46
42
  "output_cost": 0.0,
47
- "requests_per_minute": 3_000,
48
- "tokens_per_minute": 1_000_000,
49
43
  "reasoning_model": False,
50
44
  },
51
45
  "llama-3.3-8b": {
@@ -58,8 +52,6 @@ META_MODELS = {
58
52
  "api_spec": "openai",
59
53
  "input_cost": 0.0,
60
54
  "output_cost": 0.0,
61
- "requests_per_minute": 3_000,
62
- "tokens_per_minute": 1_000_000,
63
55
  "reasoning_model": False,
64
56
  },
65
57
  }
@@ -77,8 +77,6 @@ OPENAI_MODELS = {
77
77
  "api_spec": "openai",
78
78
  "input_cost": 3.0,
79
79
  "output_cost": 12.0,
80
- "requests_per_minute": 20,
81
- "tokens_per_minute": 100_000,
82
80
  "reasoning_model": False,
83
81
  },
84
82
  "o3": {
@@ -93,8 +91,6 @@ OPENAI_MODELS = {
93
91
  "input_cost": 2.0,
94
92
  "cached_input_cost": 0.50,
95
93
  "output_cost": 8.0,
96
- "requests_per_minute": 20,
97
- "tokens_per_minute": 100_000,
98
94
  "reasoning_model": True,
99
95
  },
100
96
  "o4-mini": {
@@ -109,8 +105,6 @@ OPENAI_MODELS = {
109
105
  "input_cost": 1.1,
110
106
  "cached_input_cost": 0.275,
111
107
  "output_cost": 4.4,
112
- "requests_per_minute": 20,
113
- "tokens_per_minute": 100_000,
114
108
  "reasoning_model": True,
115
109
  },
116
110
  "gpt-4.1": {
@@ -125,8 +119,6 @@ OPENAI_MODELS = {
125
119
  "input_cost": 2.0,
126
120
  "cached_input_cost": 0.50,
127
121
  "output_cost": 8.0,
128
- "requests_per_minute": 20,
129
- "tokens_per_minute": 100_000,
130
122
  "reasoning_model": False,
131
123
  },
132
124
  "gpt-4.1-mini": {
@@ -141,8 +133,6 @@ OPENAI_MODELS = {
141
133
  "input_cost": 0.4,
142
134
  "cached_input_cost": 0.10,
143
135
  "output_cost": 1.6,
144
- "requests_per_minute": 20,
145
- "tokens_per_minute": 100_000,
146
136
  "reasoning_model": False,
147
137
  },
148
138
  "gpt-4.1-nano": {
@@ -157,8 +147,6 @@ OPENAI_MODELS = {
157
147
  "input_cost": 0.1,
158
148
  "cached_input_cost": 0.025,
159
149
  "output_cost": 0.4,
160
- "requests_per_minute": 20,
161
- "tokens_per_minute": 100_000,
162
150
  "reasoning_model": False,
163
151
  },
164
152
  "gpt-4.5": {
@@ -172,8 +160,6 @@ OPENAI_MODELS = {
172
160
  "api_spec": "openai",
173
161
  "input_cost": 75.0,
174
162
  "output_cost": 150.0,
175
- "requests_per_minute": 20,
176
- "tokens_per_minute": 100_000,
177
163
  "reasoning_model": False,
178
164
  },
179
165
  "o3-mini": {
@@ -188,8 +174,6 @@ OPENAI_MODELS = {
188
174
  "input_cost": 1.1,
189
175
  "cached_input_cost": 0.55,
190
176
  "output_cost": 4.4,
191
- "requests_per_minute": 20,
192
- "tokens_per_minute": 100_000,
193
177
  "reasoning_model": True,
194
178
  },
195
179
  "o1": {
@@ -204,8 +188,6 @@ OPENAI_MODELS = {
204
188
  "input_cost": 15.0,
205
189
  "cached_input_cost": 7.50,
206
190
  "output_cost": 60.0,
207
- "requests_per_minute": 20,
208
- "tokens_per_minute": 100_000,
209
191
  "reasoning_model": True,
210
192
  },
211
193
  "o1-preview": {
@@ -219,8 +201,6 @@ OPENAI_MODELS = {
219
201
  "api_spec": "openai",
220
202
  "input_cost": 15.0,
221
203
  "output_cost": 60.0,
222
- "requests_per_minute": 20,
223
- "tokens_per_minute": 100_000,
224
204
  "reasoning_model": True,
225
205
  },
226
206
  "o1-mini": {
@@ -235,8 +215,6 @@ OPENAI_MODELS = {
235
215
  "input_cost": 1.1,
236
216
  "cached_input_cost": 0.55,
237
217
  "output_cost": 4.4,
238
- "requests_per_minute": 20,
239
- "tokens_per_minute": 100_000,
240
218
  "reasoning_model": True,
241
219
  },
242
220
  "gpt-4o": {
@@ -251,8 +229,6 @@ OPENAI_MODELS = {
251
229
  "input_cost": 2.50,
252
230
  "cached_input_cost": 1.25,
253
231
  "output_cost": 10.0,
254
- "requests_per_minute": 10_000,
255
- "tokens_per_minute": 30_000_000,
256
232
  },
257
233
  "gpt-4o-mini": {
258
234
  "id": "gpt-4o-mini",
@@ -266,8 +242,6 @@ OPENAI_MODELS = {
266
242
  "input_cost": 0.15,
267
243
  "cached_input_cost": 0.075,
268
244
  "output_cost": 0.6,
269
- "requests_per_minute": 60_000,
270
- "tokens_per_minute": 250_000_000,
271
245
  },
272
246
  "gpt-3.5-turbo": {
273
247
  "id": "gpt-3.5-turbo",
@@ -280,8 +254,6 @@ OPENAI_MODELS = {
280
254
  "api_spec": "openai",
281
255
  "input_cost": 0.5,
282
256
  "output_cost": 1.5,
283
- "requests_per_minute": 40_000,
284
- "tokens_per_minute": 75_000_000,
285
257
  },
286
258
  "gpt-4-turbo": {
287
259
  "id": "gpt-4-turbo",
@@ -294,8 +266,6 @@ OPENAI_MODELS = {
294
266
  "api_spec": "openai",
295
267
  "input_cost": 10.0,
296
268
  "output_cost": 30.0,
297
- "requests_per_minute": 10_000,
298
- "tokens_per_minute": 1_500_000,
299
269
  },
300
270
  "gpt-4": {
301
271
  "id": "gpt-4",
@@ -308,8 +278,6 @@ OPENAI_MODELS = {
308
278
  "api_spec": "openai",
309
279
  "input_cost": 30.0,
310
280
  "output_cost": 60.0,
311
- "requests_per_minute": 10_000,
312
- "tokens_per_minute": 300_000,
313
281
  },
314
282
  "gpt-4-32k": {
315
283
  "id": "gpt-4-32k",
@@ -322,7 +290,5 @@ OPENAI_MODELS = {
322
290
  "api_spec": "openai",
323
291
  "input_cost": 60.0,
324
292
  "output_cost": 120.0,
325
- "requests_per_minute": 1_000,
326
- "tokens_per_minute": 150_000,
327
293
  },
328
294
  }
@@ -1 +1,64 @@
1
- OPENROUTER_MODELS = {}
1
+ OPENROUTER_MODELS = {
2
+ "glm-4.6-openrouter": {
3
+ "id": "glm-4.6-openrouter",
4
+ "name": "z-ai/glm-4.6",
5
+ "api_base": "https://openrouter.ai/api/v1",
6
+ "api_key_env_var": "OPENROUTER_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "openai",
9
+ "input_cost": 0.6,
10
+ "cached_input_cost": 0.11,
11
+ "cache_write_cost": 0.6,
12
+ "output_cost": 2.20,
13
+ },
14
+ "deepseek-r1-openrouter": {
15
+ "id": "deepseek-r1-openrouter",
16
+ "name": "deepseek/deepseek-r1-0528",
17
+ "api_base": "https://openrouter.ai/api/v1",
18
+ "api_key_env_var": "OPENROUTER_API_KEY",
19
+ "supports_json": True,
20
+ "api_spec": "openai",
21
+ "input_cost": 0.40,
22
+ "cached_input_cost": 0.40,
23
+ "cache_write_cost": 0.40,
24
+ "output_cost": 1.75,
25
+ },
26
+ "deepseek-3.1-openrouter": {
27
+ "id": "deepseek-3.1-openrouter",
28
+ "name": "deepseek/deepseek-v3.1-terminus",
29
+ "api_base": "https://openrouter.ai/api/v1",
30
+ "api_key_env_var": "OPENROUTER_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "openai",
33
+ "input_cost": 0.23,
34
+ "cached_input_cost": 0.23,
35
+ "cache_write_cost": 0.23,
36
+ "output_cost": 0.9,
37
+ },
38
+ "deepseek-3.2-openrouter": {
39
+ "id": "deepseek-3.2-openrouter",
40
+ "name": "deepseek/deepseek-v3.2-exp",
41
+ "api_base": "https://openrouter.ai/api/v1",
42
+ "api_key_env_var": "OPENROUTER_API_KEY",
43
+ "supports_json": True,
44
+ "api_spec": "openai",
45
+ "input_cost": 0.27,
46
+ "cached_input_cost": 0.27,
47
+ "cache_write_cost": 0.27,
48
+ "output_cost": 0.4,
49
+ },
50
+ # "gpt-oss-20b-openrouter": {},
51
+ # "gpt-oss-120b-openrouter": {},
52
+ "kimi-k2-openrouter": {
53
+ "id": "kimi-k2-openrouter",
54
+ "name": "z-ai/glm-4.6",
55
+ "api_base": "https://openrouter.ai/api/v1",
56
+ "api_key_env_var": "OPENROUTER_API_KEY",
57
+ "supports_json": True,
58
+ "api_spec": "openai",
59
+ "input_cost": 0.6,
60
+ "cached_input_cost": 0.11,
61
+ "cache_write_cost": 0.6,
62
+ "output_cost": 2.20,
63
+ },
64
+ }