lm-deluge 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lm_deluge/__init__.py +12 -1
  2. lm_deluge/api_requests/anthropic.py +12 -1
  3. lm_deluge/api_requests/base.py +87 -5
  4. lm_deluge/api_requests/bedrock.py +3 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +7 -6
  7. lm_deluge/api_requests/mistral.py +8 -9
  8. lm_deluge/api_requests/openai.py +179 -124
  9. lm_deluge/batches.py +25 -9
  10. lm_deluge/client.py +280 -67
  11. lm_deluge/config.py +1 -1
  12. lm_deluge/file.py +382 -13
  13. lm_deluge/mock_openai.py +482 -0
  14. lm_deluge/models/__init__.py +12 -8
  15. lm_deluge/models/anthropic.py +12 -20
  16. lm_deluge/models/bedrock.py +0 -14
  17. lm_deluge/models/cohere.py +0 -16
  18. lm_deluge/models/google.py +0 -20
  19. lm_deluge/models/grok.py +48 -4
  20. lm_deluge/models/groq.py +2 -2
  21. lm_deluge/models/kimi.py +34 -0
  22. lm_deluge/models/meta.py +0 -8
  23. lm_deluge/models/minimax.py +10 -0
  24. lm_deluge/models/openai.py +28 -34
  25. lm_deluge/models/openrouter.py +64 -1
  26. lm_deluge/models/together.py +0 -16
  27. lm_deluge/prompt.py +138 -29
  28. lm_deluge/request_context.py +9 -11
  29. lm_deluge/tool.py +395 -19
  30. lm_deluge/tracker.py +11 -5
  31. lm_deluge/warnings.py +46 -0
  32. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/METADATA +3 -1
  33. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/RECORD +36 -33
  34. lm_deluge/agent.py +0 -0
  35. lm_deluge/gemini_limits.py +0 -65
  36. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/WHEEL +0 -0
  37. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/licenses/LICENSE +0 -0
  38. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,6 @@ BEDROCK_MODELS = {
16
16
  "api_spec": "bedrock",
17
17
  "input_cost": 0.25,
18
18
  "output_cost": 1.25,
19
- "requests_per_minute": 4_000,
20
- "tokens_per_minute": 8_000_000,
21
19
  },
22
20
  "claude-3.5-haiku-bedrock": {
23
21
  "id": "claude-3.5-haiku-bedrock",
@@ -28,8 +26,6 @@ BEDROCK_MODELS = {
28
26
  "api_spec": "bedrock",
29
27
  "input_cost": 0.25,
30
28
  "output_cost": 1.25,
31
- "requests_per_minute": 4_000,
32
- "tokens_per_minute": 8_000_000,
33
29
  },
34
30
  "claude-3.5-sonnet-bedrock": {
35
31
  "id": "claude-3.5-sonnet-bedrock",
@@ -40,8 +36,6 @@ BEDROCK_MODELS = {
40
36
  "api_spec": "bedrock",
41
37
  "input_cost": 3.0,
42
38
  "output_cost": 15.0,
43
- "requests_per_minute": 4_000,
44
- "tokens_per_minute": 400_000,
45
39
  "reasoning_model": False,
46
40
  },
47
41
  "claude-3.6-sonnet-bedrock": {
@@ -53,8 +47,6 @@ BEDROCK_MODELS = {
53
47
  "api_spec": "bedrock",
54
48
  "input_cost": 3.0,
55
49
  "output_cost": 15.0,
56
- "requests_per_minute": 4_000,
57
- "tokens_per_minute": 400_000,
58
50
  "reasoning_model": False,
59
51
  },
60
52
  "claude-3.7-sonnet-bedrock": {
@@ -66,8 +58,6 @@ BEDROCK_MODELS = {
66
58
  "api_spec": "bedrock",
67
59
  "input_cost": 3.0,
68
60
  "output_cost": 15.0,
69
- "requests_per_minute": 4_000,
70
- "tokens_per_minute": 400_000,
71
61
  "reasoning_model": True,
72
62
  },
73
63
  "claude-4-sonnet-bedrock": {
@@ -79,8 +69,6 @@ BEDROCK_MODELS = {
79
69
  "api_spec": "bedrock",
80
70
  "input_cost": 3.0,
81
71
  "output_cost": 15.0,
82
- "requests_per_minute": 4_000,
83
- "tokens_per_minute": 400_000,
84
72
  "reasoning_model": True,
85
73
  },
86
74
  "claude-4-opus-bedrock": {
@@ -92,8 +80,6 @@ BEDROCK_MODELS = {
92
80
  "api_spec": "bedrock",
93
81
  "input_cost": 3.0,
94
82
  "output_cost": 15.0,
95
- "requests_per_minute": 4_000,
96
- "tokens_per_minute": 400_000,
97
83
  "reasoning_model": True,
98
84
  },
99
85
  # GPT-OSS on AWS Bedrock
@@ -15,8 +15,6 @@ COHERE_MODELS = {
15
15
  "api_spec": "openai",
16
16
  "input_cost": 0.5,
17
17
  "output_cost": 1.5,
18
- "requests_per_minute": 10_000,
19
- "tokens_per_minute": None,
20
18
  },
21
19
  "aya-expanse-32b": {
22
20
  "id": "aya-expanse-32b",
@@ -26,8 +24,6 @@ COHERE_MODELS = {
26
24
  "api_spec": "openai",
27
25
  "input_cost": 0.5,
28
26
  "output_cost": 1.5,
29
- "requests_per_minute": 10_000,
30
- "tokens_per_minute": None,
31
27
  },
32
28
  "aya-vision-8b": {
33
29
  "id": "aya-vision-8b",
@@ -37,8 +33,6 @@ COHERE_MODELS = {
37
33
  "api_spec": "openai",
38
34
  "input_cost": 0.5,
39
35
  "output_cost": 1.5,
40
- "requests_per_minute": 10_000,
41
- "tokens_per_minute": None,
42
36
  },
43
37
  "aya-vision-32b": {
44
38
  "id": "aya-vision-32b",
@@ -48,8 +42,6 @@ COHERE_MODELS = {
48
42
  "api_spec": "openai",
49
43
  "input_cost": 0.5,
50
44
  "output_cost": 1.5,
51
- "requests_per_minute": 10_000,
52
- "tokens_per_minute": None,
53
45
  },
54
46
  "command-a": {
55
47
  "id": "command-a",
@@ -59,8 +51,6 @@ COHERE_MODELS = {
59
51
  "api_spec": "openai",
60
52
  "input_cost": 0.5,
61
53
  "output_cost": 1.5,
62
- "requests_per_minute": 10_000,
63
- "tokens_per_minute": None,
64
54
  },
65
55
  "command-r-7b": {
66
56
  "id": "command-r-cohere",
@@ -70,8 +60,6 @@ COHERE_MODELS = {
70
60
  "api_spec": "openai",
71
61
  "input_cost": 0.5,
72
62
  "output_cost": 1.5,
73
- "requests_per_minute": 10_000,
74
- "tokens_per_minute": None,
75
63
  },
76
64
  "command-r": {
77
65
  "id": "command-r",
@@ -81,8 +69,6 @@ COHERE_MODELS = {
81
69
  "api_spec": "openai",
82
70
  "input_cost": 0.5,
83
71
  "output_cost": 1.5,
84
- "requests_per_minute": 10_000,
85
- "tokens_per_minute": None,
86
72
  },
87
73
  "command-r-plus": {
88
74
  "id": "command-r-plus",
@@ -92,7 +78,5 @@ COHERE_MODELS = {
92
78
  "api_spec": "openai",
93
79
  "input_cost": 3.0,
94
80
  "output_cost": 15.0,
95
- "requests_per_minute": 10_000,
96
- "tokens_per_minute": None,
97
81
  },
98
82
  }
@@ -20,8 +20,6 @@ GOOGLE_MODELS = {
20
20
  "input_cost": 0.1,
21
21
  "cached_input_cost": 0.025,
22
22
  "output_cost": 0.4,
23
- "requests_per_minute": 20,
24
- "tokens_per_minute": 100_000,
25
23
  "reasoning_model": False,
26
24
  },
27
25
  "gemini-2.0-flash-lite-compat": {
@@ -34,8 +32,6 @@ GOOGLE_MODELS = {
34
32
  "api_spec": "openai",
35
33
  "input_cost": 0.075,
36
34
  "output_cost": 0.3,
37
- "requests_per_minute": 20,
38
- "tokens_per_minute": 100_000,
39
35
  "reasoning_model": False,
40
36
  },
41
37
  "gemini-2.5-pro-compat": {
@@ -49,8 +45,6 @@ GOOGLE_MODELS = {
49
45
  "input_cost": 1.25,
50
46
  "cached_input_cost": 0.31,
51
47
  "output_cost": 10.0,
52
- "requests_per_minute": 20,
53
- "tokens_per_minute": 100_000,
54
48
  "reasoning_model": True,
55
49
  },
56
50
  "gemini-2.5-flash-compat": {
@@ -64,8 +58,6 @@ GOOGLE_MODELS = {
64
58
  "input_cost": 0.3,
65
59
  "cached_input_cost": 0.075,
66
60
  "output_cost": 2.5,
67
- "requests_per_minute": 20,
68
- "tokens_per_minute": 100_000,
69
61
  "reasoning_model": True,
70
62
  },
71
63
  "gemini-2.5-flash-lite-compat": {
@@ -79,8 +71,6 @@ GOOGLE_MODELS = {
79
71
  "input_cost": 0.1,
80
72
  "cached_input_cost": 0.025,
81
73
  "output_cost": 0.4,
82
- "requests_per_minute": 20,
83
- "tokens_per_minute": 100_000,
84
74
  "reasoning_model": True,
85
75
  },
86
76
  # Native Gemini API versions with file support
@@ -95,8 +85,6 @@ GOOGLE_MODELS = {
95
85
  "input_cost": 0.1,
96
86
  "cached_input_cost": 0.025,
97
87
  "output_cost": 0.4,
98
- "requests_per_minute": 20,
99
- "tokens_per_minute": 100_000,
100
88
  "reasoning_model": False,
101
89
  },
102
90
  "gemini-2.0-flash-lite": {
@@ -109,8 +97,6 @@ GOOGLE_MODELS = {
109
97
  "api_spec": "gemini",
110
98
  "input_cost": 0.075,
111
99
  "output_cost": 0.3,
112
- "requests_per_minute": 20,
113
- "tokens_per_minute": 100_000,
114
100
  "reasoning_model": False,
115
101
  },
116
102
  "gemini-2.5-pro": {
@@ -124,8 +110,6 @@ GOOGLE_MODELS = {
124
110
  "input_cost": 1.25,
125
111
  "cached_input_cost": 0.31,
126
112
  "output_cost": 10.0,
127
- "requests_per_minute": 20,
128
- "tokens_per_minute": 100_000,
129
113
  "reasoning_model": True,
130
114
  },
131
115
  "gemini-2.5-flash": {
@@ -139,8 +123,6 @@ GOOGLE_MODELS = {
139
123
  "input_cost": 0.3,
140
124
  "cached_input_cost": 0.075,
141
125
  "output_cost": 2.5,
142
- "requests_per_minute": 20,
143
- "tokens_per_minute": 100_000,
144
126
  "reasoning_model": True,
145
127
  },
146
128
  "gemini-2.5-flash-lite": {
@@ -154,8 +136,6 @@ GOOGLE_MODELS = {
154
136
  "input_cost": 0.1,
155
137
  "cached_input_cost": 0.025,
156
138
  "output_cost": 0.4,
157
- "requests_per_minute": 20,
158
- "tokens_per_minute": 100_000,
159
139
  "reasoning_model": True,
160
140
  },
161
141
  }
lm_deluge/models/grok.py CHANGED
@@ -7,6 +7,54 @@ XAI_MODELS = {
7
7
  # 888 888 888 888 888 888888K
8
8
  # Y88b d88P 888 Y88..88P 888 "88b
9
9
  # "Y8888P88 888 "Y88P" 888 888
10
+ "grok-code-fast-1": {
11
+ "id": "grok-code-fast-1",
12
+ "name": "grok-code-fast-1",
13
+ "api_base": "https://api.x.ai/v1",
14
+ "api_key_env_var": "GROK_API_KEY",
15
+ "supports_json": True,
16
+ "supports_logprobs": True,
17
+ "api_spec": "openai",
18
+ "input_cost": 0.2,
19
+ "output_cost": 1.5,
20
+ "reasoning_model": False,
21
+ },
22
+ "grok-4-fast-reasoning": {
23
+ "id": "grok-4-fast-reasoning",
24
+ "name": "grok-4-fast-reasoning",
25
+ "api_base": "https://api.x.ai/v1",
26
+ "api_key_env_var": "GROK_API_KEY",
27
+ "supports_json": True,
28
+ "supports_logprobs": True,
29
+ "api_spec": "openai",
30
+ "input_cost": 0.2,
31
+ "output_cost": 0.5,
32
+ "reasoning_model": False,
33
+ },
34
+ "grok-4-fast-non-reasoning": {
35
+ "id": "grok-4-fast-non-reasoning",
36
+ "name": "grok-4-fast-non-reasoning",
37
+ "api_base": "https://api.x.ai/v1",
38
+ "api_key_env_var": "GROK_API_KEY",
39
+ "supports_json": True,
40
+ "supports_logprobs": True,
41
+ "api_spec": "openai",
42
+ "input_cost": 0.2,
43
+ "output_cost": 0.5,
44
+ "reasoning_model": False,
45
+ },
46
+ "grok-4": {
47
+ "id": "grok-4",
48
+ "name": "grok-4-0709",
49
+ "api_base": "https://api.x.ai/v1",
50
+ "api_key_env_var": "GROK_API_KEY",
51
+ "supports_json": True,
52
+ "supports_logprobs": True,
53
+ "api_spec": "openai",
54
+ "input_cost": 2.0,
55
+ "output_cost": 8.0,
56
+ "reasoning_model": False,
57
+ },
10
58
  "grok-3": {
11
59
  "id": "grok-3",
12
60
  "name": "grok-3-latest",
@@ -17,8 +65,6 @@ XAI_MODELS = {
17
65
  "api_spec": "openai",
18
66
  "input_cost": 2.0,
19
67
  "output_cost": 8.0,
20
- "requests_per_minute": 20,
21
- "tokens_per_minute": 100_000,
22
68
  "reasoning_model": False,
23
69
  },
24
70
  "grok-3-mini": {
@@ -31,8 +77,6 @@ XAI_MODELS = {
31
77
  "api_spec": "openai",
32
78
  "input_cost": 2.0,
33
79
  "output_cost": 8.0,
34
- "requests_per_minute": 20,
35
- "tokens_per_minute": 100_000,
36
80
  "reasoning_model": True,
37
81
  },
38
82
  }
lm_deluge/models/groq.py CHANGED
@@ -41,10 +41,10 @@ GROQ_MODELS = {
41
41
  },
42
42
  "kimi-k2-groq": {
43
43
  "id": "kimi-k2-groq",
44
- "name": "moonshotai/kimi-k2-instruct",
44
+ "name": "moonshotai/kimi-k2-instruct-0905",
45
45
  "api_base": "https://api.groq.com/openai/v1",
46
46
  "api_key_env_var": "GROQ_API_KEY",
47
- "supports_json": False,
47
+ "supports_json": True,
48
48
  "api_spec": "openai",
49
49
  },
50
50
  "gpt-oss-120b-groq": {
@@ -0,0 +1,34 @@
1
+ KIMI_MODELS = {
2
+ "kimi-k2": {
3
+ "id": "kimi-k2",
4
+ "name": "kimi-k2-0905-preview",
5
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
6
+ "api_key_env_var": "MOONSHOT_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "anthropic",
9
+ },
10
+ "kimi-k2-turbo": {
11
+ "id": "kimi-k2-turbo",
12
+ "name": "kimi-k2-turbo-preview",
13
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
14
+ "api_key_env_var": "MOONSHOT_API_KEY",
15
+ "supports_json": True,
16
+ "api_spec": "anthropic",
17
+ },
18
+ "kimi-k2-thinking": {
19
+ "id": "kimi-k2-thinking",
20
+ "name": "kimi-k2-thinking",
21
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
22
+ "api_key_env_var": "MOONSHOT_API_KEY",
23
+ "supports_json": True,
24
+ "api_spec": "anthropic",
25
+ },
26
+ "kimi-k2-thinking-turbo": {
27
+ "id": "kimi-k2-thinking-turbo",
28
+ "name": "kimi-k2-thinking-turbo",
29
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
30
+ "api_key_env_var": "MOONSHOT_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "anthropic",
33
+ },
34
+ }
lm_deluge/models/meta.py CHANGED
@@ -16,8 +16,6 @@ META_MODELS = {
16
16
  "api_spec": "openai",
17
17
  "input_cost": 0.0,
18
18
  "output_cost": 0.0,
19
- "requests_per_minute": 3_000,
20
- "tokens_per_minute": 1_000_000,
21
19
  "reasoning_model": False,
22
20
  },
23
21
  "llama-4-maverick": {
@@ -30,8 +28,6 @@ META_MODELS = {
30
28
  "api_spec": "openai",
31
29
  "input_cost": 0.0,
32
30
  "output_cost": 0.0,
33
- "requests_per_minute": 3_000,
34
- "tokens_per_minute": 1_000_000,
35
31
  "reasoning_model": False,
36
32
  },
37
33
  "llama-3.3-70b": {
@@ -44,8 +40,6 @@ META_MODELS = {
44
40
  "api_spec": "openai",
45
41
  "input_cost": 0.0,
46
42
  "output_cost": 0.0,
47
- "requests_per_minute": 3_000,
48
- "tokens_per_minute": 1_000_000,
49
43
  "reasoning_model": False,
50
44
  },
51
45
  "llama-3.3-8b": {
@@ -58,8 +52,6 @@ META_MODELS = {
58
52
  "api_spec": "openai",
59
53
  "input_cost": 0.0,
60
54
  "output_cost": 0.0,
61
- "requests_per_minute": 3_000,
62
- "tokens_per_minute": 1_000_000,
63
55
  "reasoning_model": False,
64
56
  },
65
57
  }
@@ -0,0 +1,10 @@
1
+ MINIMAX_MODELS = {
2
+ "minimax-m2": {
3
+ "id": "minimax-m2",
4
+ "name": "MiniMax-M2",
5
+ "api_base": "https://api.minimax.io/anthropic/v1",
6
+ "api_key_env_var": "MINIMAX_API_KEY",
7
+ "supports_json": False,
8
+ "api_spec": "anthropic",
9
+ }
10
+ }
@@ -10,6 +10,20 @@ OPENAI_MODELS = {
10
10
  # ░███
11
11
  # █████
12
12
  # ░░░░░
13
+ "gpt-5-codex": {
14
+ "id": "gpt-5-codex",
15
+ "name": "gpt-5-codex",
16
+ "api_base": "https://api.openai.com/v1",
17
+ "api_key_env_var": "OPENAI_API_KEY",
18
+ "supports_json": False,
19
+ "supports_logprobs": True,
20
+ "supports_responses": True,
21
+ "api_spec": "openai",
22
+ "input_cost": 1.25,
23
+ "cached_input_cost": 0.125,
24
+ "output_cost": 10.0,
25
+ "reasoning_model": True,
26
+ },
13
27
  "gpt-5": {
14
28
  "id": "gpt-5",
15
29
  "name": "gpt-5",
@@ -77,10 +91,22 @@ OPENAI_MODELS = {
77
91
  "api_spec": "openai",
78
92
  "input_cost": 3.0,
79
93
  "output_cost": 12.0,
80
- "requests_per_minute": 20,
81
- "tokens_per_minute": 100_000,
82
94
  "reasoning_model": False,
83
95
  },
96
+ "codex-mini-latest": {
97
+ "id": "codex-mini-latest",
98
+ "name": "codex-mini-latest",
99
+ "api_base": "https://api.openai.com/v1",
100
+ "api_key_env_var": "OPENAI_API_KEY",
101
+ "supports_json": True,
102
+ "supports_logprobs": False,
103
+ "supports_responses": True,
104
+ "api_spec": "openai",
105
+ "input_cost": 1.5,
106
+ "cached_input_cost": 0.375,
107
+ "output_cost": 6.0,
108
+ "reasoning_model": True,
109
+ },
84
110
  "o3": {
85
111
  "id": "o3",
86
112
  "name": "o3-2025-04-16",
@@ -93,8 +119,6 @@ OPENAI_MODELS = {
93
119
  "input_cost": 2.0,
94
120
  "cached_input_cost": 0.50,
95
121
  "output_cost": 8.0,
96
- "requests_per_minute": 20,
97
- "tokens_per_minute": 100_000,
98
122
  "reasoning_model": True,
99
123
  },
100
124
  "o4-mini": {
@@ -109,8 +133,6 @@ OPENAI_MODELS = {
109
133
  "input_cost": 1.1,
110
134
  "cached_input_cost": 0.275,
111
135
  "output_cost": 4.4,
112
- "requests_per_minute": 20,
113
- "tokens_per_minute": 100_000,
114
136
  "reasoning_model": True,
115
137
  },
116
138
  "gpt-4.1": {
@@ -125,8 +147,6 @@ OPENAI_MODELS = {
125
147
  "input_cost": 2.0,
126
148
  "cached_input_cost": 0.50,
127
149
  "output_cost": 8.0,
128
- "requests_per_minute": 20,
129
- "tokens_per_minute": 100_000,
130
150
  "reasoning_model": False,
131
151
  },
132
152
  "gpt-4.1-mini": {
@@ -141,8 +161,6 @@ OPENAI_MODELS = {
141
161
  "input_cost": 0.4,
142
162
  "cached_input_cost": 0.10,
143
163
  "output_cost": 1.6,
144
- "requests_per_minute": 20,
145
- "tokens_per_minute": 100_000,
146
164
  "reasoning_model": False,
147
165
  },
148
166
  "gpt-4.1-nano": {
@@ -157,8 +175,6 @@ OPENAI_MODELS = {
157
175
  "input_cost": 0.1,
158
176
  "cached_input_cost": 0.025,
159
177
  "output_cost": 0.4,
160
- "requests_per_minute": 20,
161
- "tokens_per_minute": 100_000,
162
178
  "reasoning_model": False,
163
179
  },
164
180
  "gpt-4.5": {
@@ -172,8 +188,6 @@ OPENAI_MODELS = {
172
188
  "api_spec": "openai",
173
189
  "input_cost": 75.0,
174
190
  "output_cost": 150.0,
175
- "requests_per_minute": 20,
176
- "tokens_per_minute": 100_000,
177
191
  "reasoning_model": False,
178
192
  },
179
193
  "o3-mini": {
@@ -188,8 +202,6 @@ OPENAI_MODELS = {
188
202
  "input_cost": 1.1,
189
203
  "cached_input_cost": 0.55,
190
204
  "output_cost": 4.4,
191
- "requests_per_minute": 20,
192
- "tokens_per_minute": 100_000,
193
205
  "reasoning_model": True,
194
206
  },
195
207
  "o1": {
@@ -204,8 +216,6 @@ OPENAI_MODELS = {
204
216
  "input_cost": 15.0,
205
217
  "cached_input_cost": 7.50,
206
218
  "output_cost": 60.0,
207
- "requests_per_minute": 20,
208
- "tokens_per_minute": 100_000,
209
219
  "reasoning_model": True,
210
220
  },
211
221
  "o1-preview": {
@@ -219,8 +229,6 @@ OPENAI_MODELS = {
219
229
  "api_spec": "openai",
220
230
  "input_cost": 15.0,
221
231
  "output_cost": 60.0,
222
- "requests_per_minute": 20,
223
- "tokens_per_minute": 100_000,
224
232
  "reasoning_model": True,
225
233
  },
226
234
  "o1-mini": {
@@ -235,8 +243,6 @@ OPENAI_MODELS = {
235
243
  "input_cost": 1.1,
236
244
  "cached_input_cost": 0.55,
237
245
  "output_cost": 4.4,
238
- "requests_per_minute": 20,
239
- "tokens_per_minute": 100_000,
240
246
  "reasoning_model": True,
241
247
  },
242
248
  "gpt-4o": {
@@ -251,8 +257,6 @@ OPENAI_MODELS = {
251
257
  "input_cost": 2.50,
252
258
  "cached_input_cost": 1.25,
253
259
  "output_cost": 10.0,
254
- "requests_per_minute": 10_000,
255
- "tokens_per_minute": 30_000_000,
256
260
  },
257
261
  "gpt-4o-mini": {
258
262
  "id": "gpt-4o-mini",
@@ -266,8 +270,6 @@ OPENAI_MODELS = {
266
270
  "input_cost": 0.15,
267
271
  "cached_input_cost": 0.075,
268
272
  "output_cost": 0.6,
269
- "requests_per_minute": 60_000,
270
- "tokens_per_minute": 250_000_000,
271
273
  },
272
274
  "gpt-3.5-turbo": {
273
275
  "id": "gpt-3.5-turbo",
@@ -280,8 +282,6 @@ OPENAI_MODELS = {
280
282
  "api_spec": "openai",
281
283
  "input_cost": 0.5,
282
284
  "output_cost": 1.5,
283
- "requests_per_minute": 40_000,
284
- "tokens_per_minute": 75_000_000,
285
285
  },
286
286
  "gpt-4-turbo": {
287
287
  "id": "gpt-4-turbo",
@@ -294,8 +294,6 @@ OPENAI_MODELS = {
294
294
  "api_spec": "openai",
295
295
  "input_cost": 10.0,
296
296
  "output_cost": 30.0,
297
- "requests_per_minute": 10_000,
298
- "tokens_per_minute": 1_500_000,
299
297
  },
300
298
  "gpt-4": {
301
299
  "id": "gpt-4",
@@ -308,8 +306,6 @@ OPENAI_MODELS = {
308
306
  "api_spec": "openai",
309
307
  "input_cost": 30.0,
310
308
  "output_cost": 60.0,
311
- "requests_per_minute": 10_000,
312
- "tokens_per_minute": 300_000,
313
309
  },
314
310
  "gpt-4-32k": {
315
311
  "id": "gpt-4-32k",
@@ -322,7 +318,5 @@ OPENAI_MODELS = {
322
318
  "api_spec": "openai",
323
319
  "input_cost": 60.0,
324
320
  "output_cost": 120.0,
325
- "requests_per_minute": 1_000,
326
- "tokens_per_minute": 150_000,
327
321
  },
328
322
  }
@@ -1 +1,64 @@
1
- OPENROUTER_MODELS = {}
1
+ OPENROUTER_MODELS = {
2
+ "glm-4.6-openrouter": {
3
+ "id": "glm-4.6-openrouter",
4
+ "name": "z-ai/glm-4.6",
5
+ "api_base": "https://openrouter.ai/api/v1",
6
+ "api_key_env_var": "OPENROUTER_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "openai",
9
+ "input_cost": 0.6,
10
+ "cached_input_cost": 0.11,
11
+ "cache_write_cost": 0.6,
12
+ "output_cost": 2.20,
13
+ },
14
+ "deepseek-r1-openrouter": {
15
+ "id": "deepseek-r1-openrouter",
16
+ "name": "deepseek/deepseek-r1-0528",
17
+ "api_base": "https://openrouter.ai/api/v1",
18
+ "api_key_env_var": "OPENROUTER_API_KEY",
19
+ "supports_json": True,
20
+ "api_spec": "openai",
21
+ "input_cost": 0.40,
22
+ "cached_input_cost": 0.40,
23
+ "cache_write_cost": 0.40,
24
+ "output_cost": 1.75,
25
+ },
26
+ "deepseek-3.1-openrouter": {
27
+ "id": "deepseek-3.1-openrouter",
28
+ "name": "deepseek/deepseek-v3.1-terminus",
29
+ "api_base": "https://openrouter.ai/api/v1",
30
+ "api_key_env_var": "OPENROUTER_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "openai",
33
+ "input_cost": 0.23,
34
+ "cached_input_cost": 0.23,
35
+ "cache_write_cost": 0.23,
36
+ "output_cost": 0.9,
37
+ },
38
+ "deepseek-3.2-openrouter": {
39
+ "id": "deepseek-3.2-openrouter",
40
+ "name": "deepseek/deepseek-v3.2-exp",
41
+ "api_base": "https://openrouter.ai/api/v1",
42
+ "api_key_env_var": "OPENROUTER_API_KEY",
43
+ "supports_json": True,
44
+ "api_spec": "openai",
45
+ "input_cost": 0.27,
46
+ "cached_input_cost": 0.27,
47
+ "cache_write_cost": 0.27,
48
+ "output_cost": 0.4,
49
+ },
50
+ # "gpt-oss-20b-openrouter": {},
51
+ # "gpt-oss-120b-openrouter": {},
52
+ "kimi-k2-openrouter": {
53
+ "id": "kimi-k2-openrouter",
54
+ "name": "z-ai/glm-4.6",
55
+ "api_base": "https://openrouter.ai/api/v1",
56
+ "api_key_env_var": "OPENROUTER_API_KEY",
57
+ "supports_json": True,
58
+ "api_spec": "openai",
59
+ "input_cost": 0.6,
60
+ "cached_input_cost": 0.11,
61
+ "cache_write_cost": 0.6,
62
+ "output_cost": 2.20,
63
+ },
64
+ }