langfun 0.1.2.dev202502110804__py3-none-any.whl → 0.1.2.dev202502120804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/core/__init__.py +6 -2
- langfun/core/language_model.py +365 -22
- langfun/core/language_model_test.py +123 -35
- langfun/core/llms/__init__.py +50 -57
- langfun/core/llms/anthropic.py +434 -163
- langfun/core/llms/anthropic_test.py +20 -1
- langfun/core/llms/deepseek.py +90 -51
- langfun/core/llms/deepseek_test.py +15 -16
- langfun/core/llms/fake.py +6 -0
- langfun/core/llms/gemini.py +480 -390
- langfun/core/llms/gemini_test.py +27 -7
- langfun/core/llms/google_genai.py +80 -50
- langfun/core/llms/google_genai_test.py +11 -4
- langfun/core/llms/groq.py +268 -167
- langfun/core/llms/groq_test.py +9 -3
- langfun/core/llms/openai.py +839 -328
- langfun/core/llms/openai_compatible.py +3 -18
- langfun/core/llms/openai_compatible_test.py +20 -5
- langfun/core/llms/openai_test.py +14 -4
- langfun/core/llms/rest.py +11 -6
- langfun/core/llms/vertexai.py +238 -240
- langfun/core/llms/vertexai_test.py +35 -8
- {langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/METADATA +1 -1
- {langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/RECORD +27 -27
- {langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/LICENSE +0 -0
- {langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/top_level.txt +0 -0
langfun/core/llms/openai.py
CHANGED
@@ -13,302 +13,878 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
"""Language models from OpenAI."""
|
15
15
|
|
16
|
+
import datetime
|
17
|
+
import functools
|
16
18
|
import os
|
17
|
-
from typing import Annotated, Any
|
19
|
+
from typing import Annotated, Any, Final
|
18
20
|
|
19
21
|
import langfun.core as lf
|
20
22
|
from langfun.core.llms import openai_compatible
|
21
23
|
import pyglove as pg
|
22
24
|
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
-
_DEFAULT_RPM = 3000
|
26
|
+
class OpenAIModelInfo(lf.ModelInfo):
|
27
|
+
"""OpenAI model info."""
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
# Constants for supported MIME types.
|
30
|
+
INPUT_IMAGE_TYPES = [
|
31
|
+
'image/png',
|
32
|
+
'image/jpeg',
|
33
|
+
'image/gif',
|
34
|
+
'image/webp',
|
35
|
+
]
|
36
|
+
|
37
|
+
LINKS = dict(
|
38
|
+
models='https://platform.openai.com/docs/models',
|
39
|
+
pricing='https://openai.com/api/pricing/',
|
40
|
+
rate_limits='https://platform.openai.com/docs/guides/rate-limits',
|
41
|
+
error_codes='https://platform.openai.com/docs/guides/error-codes',
|
42
|
+
)
|
43
|
+
|
44
|
+
provider: Final[str] = 'OpenAI' # pylint: disable=invalid-name
|
45
|
+
|
46
|
+
|
47
|
+
#
|
48
|
+
# !!! Please sort models by model family and model_id (time descending).
|
49
|
+
#
|
50
|
+
|
51
|
+
SUPPORTED_MODELS = [
|
52
|
+
# o3-mini models.
|
53
|
+
OpenAIModelInfo(
|
54
|
+
model_id='o3-mini',
|
55
|
+
alias_for='o3-mini-2025-01-31',
|
35
56
|
in_service=True,
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
57
|
+
model_type='thinking',
|
58
|
+
description='GPT O3-mini model (latest).',
|
59
|
+
url='https://platform.openai.com/docs/models#o3-mini',
|
60
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
61
|
+
context_length=lf.ModelInfo.ContextLength(
|
62
|
+
max_input_tokens=200_000,
|
63
|
+
max_output_tokens=100_000,
|
64
|
+
),
|
65
|
+
pricing=lf.ModelInfo.Pricing(
|
66
|
+
cost_per_1m_cached_input_tokens=0.55,
|
67
|
+
cost_per_1m_input_tokens=1.1,
|
68
|
+
cost_per_1m_output_tokens=4.4,
|
69
|
+
),
|
70
|
+
# Tier 5 rate limits.
|
71
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
72
|
+
max_requests_per_minute=10_000,
|
73
|
+
max_tokens_per_minute=10_000_000,
|
74
|
+
),
|
40
75
|
),
|
41
|
-
|
76
|
+
OpenAIModelInfo(
|
77
|
+
model_id='o3-mini-2025-01-31',
|
42
78
|
in_service=True,
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
79
|
+
model_type='thinking',
|
80
|
+
description='GPT O3-mini model (01/31/2025).',
|
81
|
+
url='https://platform.openai.com/docs/models#o3-mini',
|
82
|
+
release_date=datetime.datetime(2025, 1, 31),
|
83
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
84
|
+
context_length=lf.ModelInfo.ContextLength(
|
85
|
+
max_input_tokens=200_000,
|
86
|
+
max_output_tokens=100_000,
|
87
|
+
),
|
88
|
+
pricing=lf.ModelInfo.Pricing(
|
89
|
+
cost_per_1m_cached_input_tokens=0.55,
|
90
|
+
cost_per_1m_input_tokens=1.1,
|
91
|
+
cost_per_1m_output_tokens=4.4,
|
92
|
+
),
|
93
|
+
# Tier 5 rate limits.
|
94
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
95
|
+
max_requests_per_minute=10_000,
|
96
|
+
max_tokens_per_minute=10_000_000,
|
97
|
+
),
|
47
98
|
),
|
48
|
-
|
99
|
+
# o1-mini models.
|
100
|
+
OpenAIModelInfo(
|
101
|
+
model_id='o1-mini',
|
102
|
+
alias_for='o1-mini-2024-09-12',
|
49
103
|
in_service=True,
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
104
|
+
model_type='thinking',
|
105
|
+
description='GPT O1-mini model (latest).',
|
106
|
+
url='https://platform.openai.com/docs/models#o1',
|
107
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
108
|
+
context_length=lf.ModelInfo.ContextLength(
|
109
|
+
max_input_tokens=128_000,
|
110
|
+
max_output_tokens=65_536,
|
111
|
+
),
|
112
|
+
pricing=lf.ModelInfo.Pricing(
|
113
|
+
cost_per_1m_cached_input_tokens=0.55,
|
114
|
+
cost_per_1m_input_tokens=1.1,
|
115
|
+
cost_per_1m_output_tokens=4.4,
|
116
|
+
),
|
117
|
+
# Tier 5 rate limits.
|
118
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
119
|
+
max_requests_per_minute=10_000,
|
120
|
+
max_tokens_per_minute=10_000_000,
|
121
|
+
),
|
54
122
|
),
|
55
|
-
|
123
|
+
OpenAIModelInfo(
|
124
|
+
model_id='o1-mini-2024-09-12',
|
56
125
|
in_service=True,
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
126
|
+
model_type='thinking',
|
127
|
+
description='GPT O1-mini model (09/12/2024).',
|
128
|
+
url='https://platform.openai.com/docs/models#o1',
|
129
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
130
|
+
context_length=lf.ModelInfo.ContextLength(
|
131
|
+
max_input_tokens=128_000,
|
132
|
+
max_output_tokens=65_536,
|
133
|
+
),
|
134
|
+
pricing=lf.ModelInfo.Pricing(
|
135
|
+
cost_per_1m_cached_input_tokens=0.55,
|
136
|
+
cost_per_1m_input_tokens=1.1,
|
137
|
+
cost_per_1m_output_tokens=4.4,
|
138
|
+
),
|
139
|
+
# Tier 5 rate limits.
|
140
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
141
|
+
max_requests_per_minute=10_000,
|
142
|
+
max_tokens_per_minute=10_000_000,
|
143
|
+
),
|
61
144
|
),
|
62
|
-
|
145
|
+
OpenAIModelInfo(
|
146
|
+
model_id='o1-preview',
|
147
|
+
alias_for='o1-preview-2024-09-12',
|
63
148
|
in_service=True,
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
149
|
+
model_type='thinking',
|
150
|
+
description='GPT O1-preview model (latest).',
|
151
|
+
url='https://platform.openai.com/docs/models#o1',
|
152
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
153
|
+
context_length=lf.ModelInfo.ContextLength(
|
154
|
+
max_input_tokens=128_000,
|
155
|
+
max_output_tokens=32_768,
|
156
|
+
),
|
157
|
+
pricing=lf.ModelInfo.Pricing(
|
158
|
+
cost_per_1m_cached_input_tokens=7.5,
|
159
|
+
cost_per_1m_input_tokens=15.0,
|
160
|
+
cost_per_1m_output_tokens=60.0,
|
161
|
+
),
|
162
|
+
# Tier 5 rate limits.
|
163
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
164
|
+
max_requests_per_minute=10_000,
|
165
|
+
max_tokens_per_minute=10_000_000,
|
166
|
+
),
|
68
167
|
),
|
69
|
-
|
168
|
+
OpenAIModelInfo(
|
169
|
+
model_id='o1-preview-2024-09-12',
|
70
170
|
in_service=True,
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
171
|
+
model_type='thinking',
|
172
|
+
description='GPT O1-preview model (09/12/2024).',
|
173
|
+
url='https://platform.openai.com/docs/models#o1',
|
174
|
+
release_date=datetime.datetime(2024, 9, 12),
|
175
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
176
|
+
context_length=lf.ModelInfo.ContextLength(
|
177
|
+
max_input_tokens=128_000,
|
178
|
+
max_output_tokens=32_768,
|
179
|
+
),
|
180
|
+
pricing=lf.ModelInfo.Pricing(
|
181
|
+
cost_per_1m_cached_input_tokens=7.5,
|
182
|
+
cost_per_1m_input_tokens=15.0,
|
183
|
+
cost_per_1m_output_tokens=60.0,
|
184
|
+
),
|
185
|
+
# Tier 5 rate limits.
|
186
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
187
|
+
max_requests_per_minute=10_000,
|
188
|
+
max_tokens_per_minute=2_000_000,
|
189
|
+
),
|
75
190
|
),
|
76
|
-
|
191
|
+
# o1 models.
|
192
|
+
OpenAIModelInfo(
|
193
|
+
model_id='o1',
|
194
|
+
alias_for='o1-2024-12-17',
|
77
195
|
in_service=True,
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
196
|
+
model_type='thinking',
|
197
|
+
description='GPT O1 model (latest).',
|
198
|
+
url='https://platform.openai.com/docs/models#o1',
|
199
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
200
|
+
context_length=lf.ModelInfo.ContextLength(
|
201
|
+
max_input_tokens=200_000,
|
202
|
+
max_output_tokens=100_000,
|
203
|
+
),
|
204
|
+
pricing=lf.ModelInfo.Pricing(
|
205
|
+
cost_per_1m_cached_input_tokens=7.5,
|
206
|
+
cost_per_1m_input_tokens=15.0,
|
207
|
+
cost_per_1m_output_tokens=60.0,
|
208
|
+
),
|
209
|
+
# Tier 5 rate limits.
|
210
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
211
|
+
max_requests_per_minute=10_000,
|
212
|
+
max_tokens_per_minute=2_000_000,
|
213
|
+
),
|
82
214
|
),
|
83
|
-
|
84
|
-
|
215
|
+
OpenAIModelInfo(
|
216
|
+
model_id='o1-2024-12-17',
|
85
217
|
in_service=True,
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
218
|
+
model_type='thinking',
|
219
|
+
description='GPT O1 model (12/17/2024).',
|
220
|
+
url='https://platform.openai.com/docs/models#o1',
|
221
|
+
release_date=datetime.datetime(2024, 12, 17),
|
222
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
223
|
+
context_length=lf.ModelInfo.ContextLength(
|
224
|
+
max_input_tokens=200_000,
|
225
|
+
max_output_tokens=100_000,
|
226
|
+
),
|
227
|
+
pricing=lf.ModelInfo.Pricing(
|
228
|
+
cost_per_1m_cached_input_tokens=7.5,
|
229
|
+
cost_per_1m_input_tokens=15.0,
|
230
|
+
cost_per_1m_output_tokens=60.0,
|
231
|
+
),
|
232
|
+
# Tier 5 rate limits.
|
233
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
234
|
+
max_requests_per_minute=10_000,
|
235
|
+
max_tokens_per_minute=2_000_000,
|
236
|
+
),
|
90
237
|
),
|
91
|
-
|
238
|
+
# GPT-4o-mini models
|
239
|
+
OpenAIModelInfo(
|
240
|
+
model_id='gpt-4o-mini',
|
241
|
+
alias_for='gpt-4o-mini-2024-07-18',
|
92
242
|
in_service=True,
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
243
|
+
model_type='instruction-tuned',
|
244
|
+
description='GPT 4o mini model (latest).',
|
245
|
+
url='https://platform.openai.com/docs/models#gpt-4o-mini',
|
246
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
247
|
+
context_length=lf.ModelInfo.ContextLength(
|
248
|
+
max_input_tokens=128_000,
|
249
|
+
max_output_tokens=16_384,
|
250
|
+
),
|
251
|
+
pricing=lf.ModelInfo.Pricing(
|
252
|
+
cost_per_1m_cached_input_tokens=0.075,
|
253
|
+
cost_per_1m_input_tokens=0.15,
|
254
|
+
cost_per_1m_output_tokens=0.6,
|
255
|
+
),
|
256
|
+
# Tier 5 rate limits.
|
257
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
258
|
+
max_requests_per_minute=10_000,
|
259
|
+
max_tokens_per_minute=10_000_000,
|
260
|
+
),
|
97
261
|
),
|
98
|
-
|
262
|
+
OpenAIModelInfo(
|
263
|
+
model_id='gpt-4o-mini-2024-07-18',
|
99
264
|
in_service=True,
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
265
|
+
model_type='instruction-tuned',
|
266
|
+
description='GPT 4o mini model (07/18/2024).',
|
267
|
+
url='https://platform.openai.com/docs/models#gpt-4o-mini',
|
268
|
+
release_date=datetime.datetime(2024, 7, 18),
|
269
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
270
|
+
context_length=lf.ModelInfo.ContextLength(
|
271
|
+
max_input_tokens=128_000,
|
272
|
+
max_output_tokens=16_384,
|
273
|
+
),
|
274
|
+
pricing=lf.ModelInfo.Pricing(
|
275
|
+
cost_per_1m_cached_input_tokens=0.075,
|
276
|
+
cost_per_1m_input_tokens=0.15,
|
277
|
+
cost_per_1m_output_tokens=0.6,
|
278
|
+
),
|
279
|
+
# Tier 5 rate limits.
|
280
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
281
|
+
max_requests_per_minute=10_000,
|
282
|
+
max_tokens_per_minute=10_000_000,
|
283
|
+
),
|
104
284
|
),
|
105
|
-
|
285
|
+
# GPT-4o models
|
286
|
+
OpenAIModelInfo(
|
287
|
+
model_id='gpt-4o',
|
288
|
+
alias_for='gpt-4o-2024-08-06',
|
106
289
|
in_service=True,
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
290
|
+
model_type='instruction-tuned',
|
291
|
+
description='GPT 4o model (latest stable).',
|
292
|
+
url='https://platform.openai.com/docs/models#gpt-4o',
|
293
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
294
|
+
context_length=lf.ModelInfo.ContextLength(
|
295
|
+
max_input_tokens=128_000,
|
296
|
+
max_output_tokens=16_384,
|
297
|
+
),
|
298
|
+
pricing=lf.ModelInfo.Pricing(
|
299
|
+
cost_per_1m_cached_input_tokens=1.25,
|
300
|
+
cost_per_1m_input_tokens=2.5,
|
301
|
+
cost_per_1m_output_tokens=10.0,
|
302
|
+
),
|
303
|
+
# Tier 5 rate limits.
|
304
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
305
|
+
max_requests_per_minute=10_000,
|
306
|
+
max_tokens_per_minute=2_000_000,
|
307
|
+
),
|
111
308
|
),
|
112
|
-
|
309
|
+
OpenAIModelInfo(
|
310
|
+
model_id='gpt-4o-2024-11-20',
|
113
311
|
in_service=True,
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
312
|
+
model_type='instruction-tuned',
|
313
|
+
description='GPT 4o model (11/20/2024).',
|
314
|
+
url='https://platform.openai.com/docs/models#gpt-4o',
|
315
|
+
release_date=datetime.datetime(2024, 11, 20),
|
316
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
317
|
+
context_length=lf.ModelInfo.ContextLength(
|
318
|
+
max_input_tokens=128_000,
|
319
|
+
max_output_tokens=16_384,
|
320
|
+
),
|
321
|
+
pricing=lf.ModelInfo.Pricing(
|
322
|
+
cost_per_1m_cached_input_tokens=1.25,
|
323
|
+
cost_per_1m_input_tokens=2.5,
|
324
|
+
cost_per_1m_output_tokens=10.0,
|
325
|
+
),
|
326
|
+
# Tier 5 rate limits.
|
327
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
328
|
+
max_requests_per_minute=10_000,
|
329
|
+
max_tokens_per_minute=2_000_000,
|
330
|
+
),
|
118
331
|
),
|
119
|
-
|
332
|
+
OpenAIModelInfo(
|
333
|
+
model_id='gpt-4o-2024-08-06',
|
120
334
|
in_service=True,
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
335
|
+
model_type='instruction-tuned',
|
336
|
+
description='GPT 4o model (08/06/2024).',
|
337
|
+
url='https://platform.openai.com/docs/models#gpt-4o',
|
338
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
339
|
+
context_length=lf.ModelInfo.ContextLength(
|
340
|
+
max_input_tokens=128_000,
|
341
|
+
max_output_tokens=16_384,
|
342
|
+
),
|
343
|
+
pricing=lf.ModelInfo.Pricing(
|
344
|
+
cost_per_1m_cached_input_tokens=1.25,
|
345
|
+
cost_per_1m_input_tokens=2.5,
|
346
|
+
cost_per_1m_output_tokens=10.0,
|
347
|
+
),
|
348
|
+
# Tier 5 rate limits.
|
349
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
350
|
+
max_requests_per_minute=10_000,
|
351
|
+
max_tokens_per_minute=2_000_000,
|
352
|
+
),
|
125
353
|
),
|
126
|
-
|
127
|
-
|
354
|
+
OpenAIModelInfo(
|
355
|
+
model_id='gpt-4o-2024-05-13',
|
128
356
|
in_service=True,
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
357
|
+
model_type='instruction-tuned',
|
358
|
+
description='GPT 4o model (05/13/2024).',
|
359
|
+
url='https://platform.opedsnai.com/docs/models#gpt-4o',
|
360
|
+
release_date=datetime.datetime(2024, 5, 13),
|
361
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
362
|
+
context_length=lf.ModelInfo.ContextLength(
|
363
|
+
max_input_tokens=128_000,
|
364
|
+
max_output_tokens=16_384,
|
365
|
+
),
|
366
|
+
pricing=lf.ModelInfo.Pricing(
|
367
|
+
cost_per_1m_cached_input_tokens=None,
|
368
|
+
cost_per_1m_input_tokens=5.0,
|
369
|
+
cost_per_1m_output_tokens=15.0,
|
370
|
+
),
|
371
|
+
# Tier 5 rate limits.
|
372
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
373
|
+
max_requests_per_minute=10_000,
|
374
|
+
max_tokens_per_minute=2_000_000,
|
375
|
+
),
|
133
376
|
),
|
134
|
-
|
377
|
+
OpenAIModelInfo(
|
378
|
+
model_id='chatgpt-4o-latest',
|
135
379
|
in_service=True,
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
380
|
+
model_type='instruction-tuned',
|
381
|
+
description='GPT 4o model ChatGPT version (latest).',
|
382
|
+
url='https://platform.openai.com/docs/models#gpt-4o',
|
383
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
384
|
+
context_length=lf.ModelInfo.ContextLength(
|
385
|
+
max_input_tokens=128_000,
|
386
|
+
max_output_tokens=16_384,
|
387
|
+
),
|
388
|
+
pricing=lf.ModelInfo.Pricing(
|
389
|
+
cost_per_1m_cached_input_tokens=None,
|
390
|
+
cost_per_1m_input_tokens=5.0,
|
391
|
+
cost_per_1m_output_tokens=15.0,
|
392
|
+
),
|
393
|
+
# Tier 5 rate limits.
|
394
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
395
|
+
max_requests_per_minute=10_000,
|
396
|
+
max_tokens_per_minute=2_000_000,
|
397
|
+
),
|
140
398
|
),
|
141
|
-
|
399
|
+
# GPT-4 Turbo models.
|
400
|
+
OpenAIModelInfo(
|
401
|
+
model_id='gpt-4-turbo',
|
402
|
+
alias_for='gpt-4-turbo-2024-04-09',
|
142
403
|
in_service=True,
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
404
|
+
model_type='instruction-tuned',
|
405
|
+
description='GPT 4 Turbo model (latest).',
|
406
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
407
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
408
|
+
context_length=lf.ModelInfo.ContextLength(
|
409
|
+
max_input_tokens=128_000,
|
410
|
+
max_output_tokens=4_096,
|
411
|
+
),
|
412
|
+
pricing=lf.ModelInfo.Pricing(
|
413
|
+
cost_per_1m_cached_input_tokens=None,
|
414
|
+
cost_per_1m_input_tokens=10.0,
|
415
|
+
cost_per_1m_output_tokens=30.0,
|
416
|
+
),
|
417
|
+
# Tier 5 rate limits.
|
418
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
419
|
+
max_requests_per_minute=10_000,
|
420
|
+
max_tokens_per_minute=800_000,
|
421
|
+
),
|
147
422
|
),
|
148
|
-
|
423
|
+
OpenAIModelInfo(
|
424
|
+
model_id='gpt-4-turbo-2024-04-09',
|
149
425
|
in_service=True,
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
426
|
+
model_type='instruction-tuned',
|
427
|
+
description='GPT 4 Turbo model (04/09/2024).',
|
428
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
429
|
+
release_date=datetime.datetime(2024, 4, 9),
|
430
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
431
|
+
context_length=lf.ModelInfo.ContextLength(
|
432
|
+
max_input_tokens=128_000,
|
433
|
+
max_output_tokens=4_096,
|
434
|
+
),
|
435
|
+
pricing=lf.ModelInfo.Pricing(
|
436
|
+
cost_per_1m_cached_input_tokens=None,
|
437
|
+
cost_per_1m_input_tokens=10.0,
|
438
|
+
cost_per_1m_output_tokens=30.0,
|
439
|
+
),
|
440
|
+
# Tier 5 rate limits.
|
441
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
442
|
+
max_requests_per_minute=10_000,
|
443
|
+
max_tokens_per_minute=800_000,
|
444
|
+
),
|
154
445
|
),
|
155
|
-
|
446
|
+
OpenAIModelInfo(
|
447
|
+
model_id='gpt-4-turbo-preview',
|
448
|
+
alias_for='gpt-4-0125-preview',
|
156
449
|
in_service=True,
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
450
|
+
model_type='instruction-tuned',
|
451
|
+
description='GPT 4 Turbo preview model (latest).',
|
452
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
453
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
454
|
+
context_length=lf.ModelInfo.ContextLength(
|
455
|
+
max_input_tokens=128_000,
|
456
|
+
max_output_tokens=4_096,
|
457
|
+
),
|
458
|
+
pricing=lf.ModelInfo.Pricing(
|
459
|
+
cost_per_1m_cached_input_tokens=None,
|
460
|
+
cost_per_1m_input_tokens=10.0,
|
461
|
+
cost_per_1m_output_tokens=30.0,
|
462
|
+
),
|
463
|
+
# Tier 5 rate limits.
|
464
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
465
|
+
max_requests_per_minute=10_000,
|
466
|
+
max_tokens_per_minute=800_000,
|
467
|
+
),
|
161
468
|
),
|
162
|
-
|
469
|
+
OpenAIModelInfo(
|
470
|
+
model_id='gpt-4-0125-preview',
|
163
471
|
in_service=True,
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
472
|
+
model_type='instruction-tuned',
|
473
|
+
description='GPT 4 Turbo preview model (01/25/2024).',
|
474
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
475
|
+
release_date=datetime.datetime(2024, 1, 25),
|
476
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
477
|
+
context_length=lf.ModelInfo.ContextLength(
|
478
|
+
max_input_tokens=128_000,
|
479
|
+
max_output_tokens=4_096,
|
480
|
+
),
|
481
|
+
pricing=lf.ModelInfo.Pricing(
|
482
|
+
cost_per_1m_cached_input_tokens=None,
|
483
|
+
cost_per_1m_input_tokens=10.0,
|
484
|
+
cost_per_1m_output_tokens=30.0,
|
485
|
+
),
|
486
|
+
# Tier 5 rate limits.
|
487
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
488
|
+
max_requests_per_minute=10_000,
|
489
|
+
max_tokens_per_minute=800_000,
|
490
|
+
),
|
168
491
|
),
|
169
|
-
|
492
|
+
OpenAIModelInfo(
|
493
|
+
model_id='gpt-4-1106-preview',
|
170
494
|
in_service=True,
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
495
|
+
model_type='instruction-tuned',
|
496
|
+
description='GPT 4 Turbo preview model (11/06/2024).',
|
497
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
498
|
+
release_date=datetime.datetime(2024, 11, 6),
|
499
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
500
|
+
context_length=lf.ModelInfo.ContextLength(
|
501
|
+
max_input_tokens=128_000,
|
502
|
+
max_output_tokens=4_096,
|
503
|
+
),
|
504
|
+
pricing=lf.ModelInfo.Pricing(
|
505
|
+
cost_per_1m_cached_input_tokens=None,
|
506
|
+
cost_per_1m_input_tokens=10.0,
|
507
|
+
cost_per_1m_output_tokens=30.0,
|
508
|
+
),
|
509
|
+
# Tier 5 rate limits.
|
510
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
511
|
+
max_requests_per_minute=10_000,
|
512
|
+
max_tokens_per_minute=800_000,
|
513
|
+
),
|
175
514
|
),
|
176
|
-
# GPT-4 models
|
177
|
-
|
515
|
+
# GPT-4 models.
|
516
|
+
OpenAIModelInfo(
|
517
|
+
model_id='gpt-4',
|
518
|
+
alias_for='gpt-4-0613',
|
178
519
|
in_service=True,
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
520
|
+
model_type='instruction-tuned',
|
521
|
+
description='GPT 4 model (latest).',
|
522
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
523
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
524
|
+
context_length=lf.ModelInfo.ContextLength(
|
525
|
+
max_input_tokens=8_192,
|
526
|
+
max_output_tokens=8_192,
|
527
|
+
),
|
528
|
+
pricing=lf.ModelInfo.Pricing(
|
529
|
+
cost_per_1m_cached_input_tokens=None,
|
530
|
+
cost_per_1m_input_tokens=30.0,
|
531
|
+
cost_per_1m_output_tokens=60.0,
|
532
|
+
),
|
533
|
+
# Tier 5 rate limits.
|
534
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
535
|
+
max_requests_per_minute=10_000,
|
536
|
+
max_tokens_per_minute=300_000,
|
537
|
+
),
|
197
538
|
),
|
198
|
-
|
539
|
+
OpenAIModelInfo(
|
540
|
+
model_id='gpt-4-0613',
|
199
541
|
in_service=True,
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
542
|
+
model_type='instruction-tuned',
|
543
|
+
description='GPT 4 model (06/13/2023).',
|
544
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
545
|
+
release_date=datetime.datetime(2023, 6, 13),
|
546
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
547
|
+
context_length=lf.ModelInfo.ContextLength(
|
548
|
+
max_input_tokens=8_192,
|
549
|
+
max_output_tokens=8_192,
|
550
|
+
),
|
551
|
+
pricing=lf.ModelInfo.Pricing(
|
552
|
+
cost_per_1m_cached_input_tokens=None,
|
553
|
+
cost_per_1m_input_tokens=30.0,
|
554
|
+
cost_per_1m_output_tokens=60.0,
|
555
|
+
),
|
556
|
+
# Tier 5 rate limits.
|
557
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
558
|
+
max_requests_per_minute=10_000,
|
559
|
+
max_tokens_per_minute=300_000,
|
560
|
+
),
|
204
561
|
),
|
205
|
-
|
562
|
+
OpenAIModelInfo(
|
563
|
+
model_id='gpt-4-0314',
|
206
564
|
in_service=False,
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
565
|
+
model_type='instruction-tuned',
|
566
|
+
description='GPT 4 model (03/14/2023).',
|
567
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
568
|
+
release_date=datetime.datetime(2023, 3, 14),
|
569
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
570
|
+
context_length=lf.ModelInfo.ContextLength(
|
571
|
+
max_input_tokens=8_192,
|
572
|
+
max_output_tokens=8_192,
|
573
|
+
),
|
574
|
+
pricing=lf.ModelInfo.Pricing(
|
575
|
+
cost_per_1m_cached_input_tokens=None,
|
576
|
+
cost_per_1m_input_tokens=30.0,
|
577
|
+
cost_per_1m_output_tokens=60.0,
|
578
|
+
),
|
579
|
+
# Tier 5 rate limits.
|
580
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
581
|
+
max_requests_per_minute=10_000,
|
582
|
+
max_tokens_per_minute=300_000,
|
583
|
+
),
|
211
584
|
),
|
212
|
-
|
585
|
+
# GPT-4 32K models.
|
586
|
+
OpenAIModelInfo(
|
587
|
+
model_id='gpt-4-32k',
|
588
|
+
alias_for='gpt-4-32k-0613',
|
213
589
|
in_service=False,
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
590
|
+
model_type='instruction-tuned',
|
591
|
+
description='GPT 4 32K model (latest).',
|
592
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
593
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
594
|
+
context_length=lf.ModelInfo.ContextLength(
|
595
|
+
max_input_tokens=32_768,
|
596
|
+
max_output_tokens=8_192,
|
597
|
+
),
|
598
|
+
pricing=lf.ModelInfo.Pricing(
|
599
|
+
cost_per_1m_cached_input_tokens=None,
|
600
|
+
cost_per_1m_input_tokens=60.0,
|
601
|
+
cost_per_1m_output_tokens=120.0,
|
602
|
+
),
|
603
|
+
# Tier 5 rate limits.
|
604
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
605
|
+
max_requests_per_minute=10_000,
|
606
|
+
max_tokens_per_minute=300_000,
|
607
|
+
),
|
218
608
|
),
|
219
|
-
|
220
|
-
|
221
|
-
in_service=
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
609
|
+
OpenAIModelInfo(
|
610
|
+
model_id='gpt-4-32k-0613',
|
611
|
+
in_service=False,
|
612
|
+
model_type='instruction-tuned',
|
613
|
+
description='GPT 4 32K model (06/13/2023).',
|
614
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
615
|
+
release_date=datetime.datetime(2023, 6, 13),
|
616
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
617
|
+
context_length=lf.ModelInfo.ContextLength(
|
618
|
+
max_input_tokens=32_768,
|
619
|
+
max_output_tokens=8_192,
|
620
|
+
),
|
621
|
+
pricing=lf.ModelInfo.Pricing(
|
622
|
+
cost_per_1m_cached_input_tokens=None,
|
623
|
+
cost_per_1m_input_tokens=60.0,
|
624
|
+
cost_per_1m_output_tokens=120.0,
|
625
|
+
),
|
626
|
+
# Tier 5 rate limits.
|
627
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
628
|
+
max_requests_per_minute=10_000,
|
629
|
+
max_tokens_per_minute=300_000,
|
630
|
+
),
|
226
631
|
),
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
632
|
+
OpenAIModelInfo(
|
633
|
+
model_id='gpt-4-32k-0314',
|
634
|
+
in_service=False,
|
635
|
+
model_type='instruction-tuned',
|
636
|
+
description='GPT 4 32K model (03/14/2023).',
|
637
|
+
url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
|
638
|
+
release_date=datetime.datetime(2023, 3, 14),
|
639
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
640
|
+
context_length=lf.ModelInfo.ContextLength(
|
641
|
+
max_input_tokens=32_768,
|
642
|
+
max_output_tokens=8_192,
|
643
|
+
),
|
644
|
+
pricing=lf.ModelInfo.Pricing(
|
645
|
+
cost_per_1m_cached_input_tokens=None,
|
646
|
+
cost_per_1m_input_tokens=60.0,
|
647
|
+
cost_per_1m_output_tokens=120.0,
|
648
|
+
),
|
649
|
+
# Tier 5 rate limits.
|
650
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
651
|
+
max_requests_per_minute=10_000,
|
652
|
+
max_tokens_per_minute=300_000,
|
653
|
+
),
|
233
654
|
),
|
234
|
-
|
655
|
+
# GPT 3.5 Turbo models.
|
656
|
+
OpenAIModelInfo(
|
657
|
+
model_id='gpt-3.5-turbo',
|
658
|
+
alias_for='gpt-3.5-turbo-0125',
|
235
659
|
in_service=True,
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
660
|
+
model_type='instruction-tuned',
|
661
|
+
description='GPT 3.5 Turbo model (latest).',
|
662
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
663
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
664
|
+
context_length=lf.ModelInfo.ContextLength(
|
665
|
+
max_input_tokens=16_384,
|
666
|
+
max_output_tokens=4_096,
|
667
|
+
),
|
668
|
+
pricing=lf.ModelInfo.Pricing(
|
669
|
+
cost_per_1m_cached_input_tokens=None,
|
670
|
+
cost_per_1m_input_tokens=0.5,
|
671
|
+
cost_per_1m_output_tokens=1.5,
|
672
|
+
),
|
673
|
+
# Tier 5 rate limits.
|
674
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
675
|
+
max_requests_per_minute=10_000,
|
676
|
+
max_tokens_per_minute=10_000_000,
|
677
|
+
),
|
240
678
|
),
|
241
|
-
|
679
|
+
OpenAIModelInfo(
|
680
|
+
model_id='gpt-3.5-turbo-0125',
|
242
681
|
in_service=True,
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
682
|
+
release_date=datetime.datetime(2024, 1, 25),
|
683
|
+
model_type='instruction-tuned',
|
684
|
+
description='GPT 3.5 Turbo model (01/25/2024).',
|
685
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
686
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
687
|
+
context_length=lf.ModelInfo.ContextLength(
|
688
|
+
max_input_tokens=16_384,
|
689
|
+
max_output_tokens=4_096,
|
690
|
+
),
|
691
|
+
pricing=lf.ModelInfo.Pricing(
|
692
|
+
cost_per_1m_cached_input_tokens=None,
|
693
|
+
cost_per_1m_input_tokens=0.5,
|
694
|
+
cost_per_1m_output_tokens=1.5,
|
695
|
+
),
|
696
|
+
# Tier 5 rate limits.
|
697
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
698
|
+
max_requests_per_minute=10_000,
|
699
|
+
max_tokens_per_minute=10_000_000,
|
700
|
+
),
|
247
701
|
),
|
248
|
-
|
702
|
+
OpenAIModelInfo(
|
703
|
+
model_id='gpt-3.5-turbo-1106',
|
249
704
|
in_service=True,
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
705
|
+
release_date=datetime.datetime(2023, 11, 6),
|
706
|
+
model_type='instruction-tuned',
|
707
|
+
description='GPT 3.5 Turbo model (11/06/2023).',
|
708
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
709
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
710
|
+
context_length=lf.ModelInfo.ContextLength(
|
711
|
+
max_input_tokens=16_384,
|
712
|
+
max_output_tokens=4_096,
|
713
|
+
),
|
714
|
+
pricing=lf.ModelInfo.Pricing(
|
715
|
+
cost_per_1m_cached_input_tokens=None,
|
716
|
+
cost_per_1m_input_tokens=1.0,
|
717
|
+
cost_per_1m_output_tokens=2.0,
|
718
|
+
),
|
719
|
+
# Tier 5 rate limits.
|
720
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
721
|
+
max_requests_per_minute=10_000,
|
722
|
+
max_tokens_per_minute=10_000_000,
|
723
|
+
),
|
254
724
|
),
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
725
|
+
OpenAIModelInfo(
|
726
|
+
model_id='gpt-3.5-turbo-0613',
|
727
|
+
in_service=False,
|
728
|
+
release_date=datetime.datetime(2023, 6, 13),
|
729
|
+
model_type='instruction-tuned',
|
730
|
+
description='GPT 3.5 Turbo model (06/13/2023).',
|
731
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
732
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
733
|
+
context_length=lf.ModelInfo.ContextLength(
|
734
|
+
max_input_tokens=16_384,
|
735
|
+
max_output_tokens=4_096,
|
736
|
+
),
|
737
|
+
pricing=lf.ModelInfo.Pricing(
|
738
|
+
cost_per_1m_cached_input_tokens=None,
|
739
|
+
cost_per_1m_input_tokens=1.5,
|
740
|
+
cost_per_1m_output_tokens=2.0,
|
741
|
+
),
|
742
|
+
# Tier 5 rate limits.
|
743
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
744
|
+
max_requests_per_minute=10_000,
|
745
|
+
max_tokens_per_minute=10_000_000,
|
746
|
+
),
|
261
747
|
),
|
262
|
-
|
748
|
+
# GPT 3.5 Turbo 16K models.
|
749
|
+
OpenAIModelInfo(
|
750
|
+
model_id='gpt-3.5-turbo-16k',
|
751
|
+
alias_for='gpt-3.5-turbo-16k-0613',
|
263
752
|
in_service=True,
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
753
|
+
model_type='instruction-tuned',
|
754
|
+
description='GPT 3.5 Turbo 16K model (latest).',
|
755
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
756
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
757
|
+
context_length=lf.ModelInfo.ContextLength(
|
758
|
+
max_input_tokens=16_385,
|
759
|
+
max_output_tokens=4_096,
|
760
|
+
),
|
761
|
+
pricing=lf.ModelInfo.Pricing(
|
762
|
+
cost_per_1m_cached_input_tokens=None,
|
763
|
+
cost_per_1m_input_tokens=3.0,
|
764
|
+
cost_per_1m_output_tokens=4.0,
|
765
|
+
),
|
766
|
+
# Tier 5 rate limits.
|
767
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
768
|
+
max_requests_per_minute=10_000,
|
769
|
+
max_tokens_per_minute=10_000_000,
|
770
|
+
),
|
268
771
|
),
|
269
|
-
|
772
|
+
OpenAIModelInfo(
|
773
|
+
model_id='gpt-3.5-turbo-16k-0613',
|
270
774
|
in_service=False,
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
775
|
+
model_type='instruction-tuned',
|
776
|
+
description='GPT 3.5 Turbo 16K model (06/13/2023).',
|
777
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
778
|
+
release_date=datetime.datetime(2023, 6, 13),
|
779
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
780
|
+
context_length=lf.ModelInfo.ContextLength(
|
781
|
+
max_input_tokens=16_385,
|
782
|
+
max_output_tokens=4_096,
|
783
|
+
),
|
784
|
+
pricing=lf.ModelInfo.Pricing(
|
785
|
+
cost_per_1m_cached_input_tokens=None,
|
786
|
+
cost_per_1m_input_tokens=3.0,
|
787
|
+
cost_per_1m_output_tokens=4.0,
|
788
|
+
),
|
789
|
+
# Tier 5 rate limits.
|
790
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
791
|
+
max_requests_per_minute=10_000,
|
792
|
+
max_tokens_per_minute=10_000_000,
|
793
|
+
),
|
279
794
|
),
|
280
|
-
|
281
|
-
|
282
|
-
),
|
283
|
-
'code-davinci-002': pg.Dict(
|
284
|
-
in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
|
285
|
-
),
|
286
|
-
# GPT-3 instruction-tuned models (Deprecated)
|
287
|
-
'text-curie-001': pg.Dict(
|
288
|
-
in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
|
289
|
-
),
|
290
|
-
'text-babbage-001': pg.Dict(
|
795
|
+
OpenAIModelInfo(
|
796
|
+
model_id='gpt-3.5-turbo-16k-0301',
|
291
797
|
in_service=False,
|
292
|
-
|
293
|
-
|
798
|
+
model_type='instruction-tuned',
|
799
|
+
description='GPT 3.5 Turbo 16K model (03/01/2023).',
|
800
|
+
url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
|
801
|
+
release_date=datetime.datetime(2023, 3, 1),
|
802
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
803
|
+
context_length=lf.ModelInfo.ContextLength(
|
804
|
+
max_input_tokens=16_385,
|
805
|
+
max_output_tokens=4_096,
|
806
|
+
),
|
807
|
+
pricing=lf.ModelInfo.Pricing(
|
808
|
+
cost_per_1m_cached_input_tokens=None,
|
809
|
+
cost_per_1m_input_tokens=3.0,
|
810
|
+
cost_per_1m_output_tokens=4.0,
|
811
|
+
),
|
812
|
+
# Tier 5 rate limits.
|
813
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
814
|
+
max_requests_per_minute=10_000,
|
815
|
+
max_tokens_per_minute=10_000_000,
|
816
|
+
),
|
294
817
|
),
|
295
|
-
|
818
|
+
# GPT 3.5 models.
|
819
|
+
OpenAIModelInfo(
|
820
|
+
model_id='text-davinci-003',
|
296
821
|
in_service=False,
|
297
|
-
|
298
|
-
|
822
|
+
model_type='instruction-tuned',
|
823
|
+
description='ChatGPT 3.5 model.',
|
824
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
825
|
+
context_length=lf.ModelInfo.ContextLength(
|
826
|
+
max_input_tokens=16_384,
|
827
|
+
max_output_tokens=4_096,
|
828
|
+
),
|
829
|
+
pricing=lf.ModelInfo.Pricing(
|
830
|
+
cost_per_1m_cached_input_tokens=None,
|
831
|
+
cost_per_1m_input_tokens=3.0,
|
832
|
+
cost_per_1m_output_tokens=3.0,
|
833
|
+
),
|
834
|
+
# Tier 5 rate limits.
|
835
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
836
|
+
max_requests_per_minute=None,
|
837
|
+
max_tokens_per_minute=None,
|
838
|
+
),
|
299
839
|
),
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
840
|
+
# GPT 3 models.
|
841
|
+
OpenAIModelInfo(
|
842
|
+
model_id='babbage-002',
|
843
|
+
in_service=True,
|
844
|
+
model_type='pretrained',
|
845
|
+
description='GPT3 base model babagge-002',
|
846
|
+
url='https://platform.openai.com/docs/models#gpt-base',
|
847
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
848
|
+
context_length=lf.ModelInfo.ContextLength(
|
849
|
+
max_input_tokens=16_384,
|
850
|
+
max_output_tokens=4_096,
|
851
|
+
),
|
852
|
+
pricing=lf.ModelInfo.Pricing(
|
853
|
+
cost_per_1m_cached_input_tokens=None,
|
854
|
+
cost_per_1m_input_tokens=2.0,
|
855
|
+
cost_per_1m_output_tokens=2.0,
|
856
|
+
),
|
857
|
+
# Tier 5 rate limits.
|
858
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
859
|
+
max_requests_per_minute=3_000,
|
860
|
+
max_tokens_per_minute=250_000,
|
861
|
+
),
|
862
|
+
),
|
863
|
+
OpenAIModelInfo(
|
864
|
+
model_id='davinci-002',
|
865
|
+
in_service=True,
|
866
|
+
model_type='pretrained',
|
867
|
+
description='GPT3 base model Davinci-002 ',
|
868
|
+
url='https://platform.openai.com/docs/models#gpt-base',
|
869
|
+
input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
|
870
|
+
context_length=lf.ModelInfo.ContextLength(
|
871
|
+
max_input_tokens=16_384,
|
872
|
+
max_output_tokens=4_096,
|
873
|
+
),
|
874
|
+
pricing=lf.ModelInfo.Pricing(
|
875
|
+
cost_per_1m_cached_input_tokens=None,
|
876
|
+
cost_per_1m_input_tokens=2.0,
|
877
|
+
cost_per_1m_output_tokens=2.0,
|
878
|
+
),
|
879
|
+
# Tier 5 rate limits.
|
880
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
881
|
+
max_requests_per_minute=3_000,
|
882
|
+
max_tokens_per_minute=250_000,
|
883
|
+
),
|
304
884
|
),
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
# GPT-3 base models that are still in service.
|
309
|
-
'babbage-002': pg.Dict(in_service=True, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
|
310
|
-
'davinci-002': pg.Dict(in_service=True, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
|
311
|
-
}
|
885
|
+
]
|
886
|
+
|
887
|
+
_SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
312
888
|
|
313
889
|
|
314
890
|
@lf.use_init_args(['model'])
|
@@ -317,7 +893,7 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
317
893
|
|
318
894
|
model: pg.typing.Annotated[
|
319
895
|
pg.typing.Enum(
|
320
|
-
pg.MISSING_VALUE, list(
|
896
|
+
pg.MISSING_VALUE, list(s.model_id for s in SUPPORTED_MODELS)
|
321
897
|
),
|
322
898
|
'The name of the model to use.',
|
323
899
|
]
|
@@ -355,6 +931,7 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
355
931
|
self._api_key = None
|
356
932
|
self._organization = None
|
357
933
|
self._project = None
|
934
|
+
self.__dict__.pop('model_info', None)
|
358
935
|
|
359
936
|
def _initialize(self):
|
360
937
|
api_key = self.api_key or os.environ.get('OPENAI_API_KEY', None)
|
@@ -380,41 +957,13 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
380
957
|
headers['OpenAI-Project'] = self._project
|
381
958
|
return headers
|
382
959
|
|
383
|
-
@
|
384
|
-
def
|
385
|
-
|
386
|
-
return f'OpenAI({self.model})'
|
387
|
-
|
388
|
-
@property
|
389
|
-
def max_concurrency(self) -> int:
|
390
|
-
rpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('rpm', 0)
|
391
|
-
tpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('tpm', 0)
|
392
|
-
return self.rate_to_max_concurrency(
|
393
|
-
requests_per_min=rpm, tokens_per_min=tpm
|
394
|
-
)
|
395
|
-
|
396
|
-
def estimate_cost(
|
397
|
-
self,
|
398
|
-
num_input_tokens: int,
|
399
|
-
num_output_tokens: int
|
400
|
-
) -> float | None:
|
401
|
-
"""Estimate the cost based on usage."""
|
402
|
-
cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
|
403
|
-
'cost_per_1k_input_tokens', None
|
404
|
-
)
|
405
|
-
cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
|
406
|
-
'cost_per_1k_output_tokens', None
|
407
|
-
)
|
408
|
-
if cost_per_1k_output_tokens is None or cost_per_1k_input_tokens is None:
|
409
|
-
return None
|
410
|
-
return (
|
411
|
-
cost_per_1k_input_tokens * num_input_tokens
|
412
|
-
+ cost_per_1k_output_tokens * num_output_tokens
|
413
|
-
) / 1000
|
960
|
+
@functools.cached_property
|
961
|
+
def model_info(self) -> OpenAIModelInfo:
|
962
|
+
return _SUPPORTED_MODELS_BY_MODEL_ID[self.model]
|
414
963
|
|
415
964
|
@classmethod
|
416
965
|
def dir(cls):
|
417
|
-
return [
|
966
|
+
return [s.model_id for s in SUPPORTED_MODELS if s.in_service]
|
418
967
|
|
419
968
|
def _request_args(
|
420
969
|
self, options: lf.LMSamplingOptions) -> dict[str, Any]:
|
@@ -426,17 +975,13 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
426
975
|
|
427
976
|
class GptO3Mini(OpenAI):
|
428
977
|
"""GPT-O3-mini."""
|
429
|
-
|
430
978
|
model = 'o3-mini'
|
431
|
-
multimodal = True
|
432
979
|
timeout = None
|
433
980
|
|
434
981
|
|
435
982
|
class GptO1(OpenAI):
|
436
983
|
"""GPT-O1."""
|
437
|
-
|
438
984
|
model = 'o1'
|
439
|
-
multimodal = True
|
440
985
|
timeout = None
|
441
986
|
|
442
987
|
|
@@ -464,6 +1009,36 @@ class GptO1Mini_20240912(OpenAI): # pylint: disable=invalid-name
|
|
464
1009
|
timeout = None
|
465
1010
|
|
466
1011
|
|
1012
|
+
class Gpt4oMini(OpenAI):
|
1013
|
+
"""GPT-4o Mini."""
|
1014
|
+
model = 'gpt-4o-mini'
|
1015
|
+
|
1016
|
+
|
1017
|
+
class Gpt4oMini_20240718(OpenAI): # pylint:disable=invalid-name
|
1018
|
+
"""GPT-4o Mini."""
|
1019
|
+
model = 'gpt-4o-mini-2024-07-18'
|
1020
|
+
|
1021
|
+
|
1022
|
+
class Gpt4o(OpenAI):
|
1023
|
+
"""GPT-4o."""
|
1024
|
+
model = 'gpt-4o'
|
1025
|
+
|
1026
|
+
|
1027
|
+
class Gpt4o_20241120(OpenAI): # pylint:disable=invalid-name
|
1028
|
+
"""GPT-4o version 2024-11-20."""
|
1029
|
+
model = 'gpt-4o-2024-11-20'
|
1030
|
+
|
1031
|
+
|
1032
|
+
class Gpt4o_20240806(OpenAI): # pylint:disable=invalid-name
|
1033
|
+
"""GPT-4o version 2024-08-06."""
|
1034
|
+
model = 'gpt-4o-2024-08-06'
|
1035
|
+
|
1036
|
+
|
1037
|
+
class Gpt4o_20240513(OpenAI): # pylint:disable=invalid-name
|
1038
|
+
"""GPT-4o version 2024-05-13."""
|
1039
|
+
model = 'gpt-4o-2024-05-13'
|
1040
|
+
|
1041
|
+
|
467
1042
|
class Gpt4(OpenAI):
|
468
1043
|
"""GPT-4."""
|
469
1044
|
model = 'gpt-4'
|
@@ -472,13 +1047,11 @@ class Gpt4(OpenAI):
|
|
472
1047
|
class Gpt4Turbo(Gpt4):
|
473
1048
|
"""GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
|
474
1049
|
model = 'gpt-4-turbo'
|
475
|
-
multimodal = True
|
476
1050
|
|
477
1051
|
|
478
1052
|
class Gpt4Turbo_20240409(Gpt4Turbo): # pylint:disable=invalid-name
|
479
1053
|
"""GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
|
480
1054
|
model = 'gpt-4-turbo-2024-04-09'
|
481
|
-
multimodal = True
|
482
1055
|
|
483
1056
|
|
484
1057
|
class Gpt4TurboPreview(Gpt4):
|
@@ -496,17 +1069,6 @@ class Gpt4TurboPreview_20231106(Gpt4TurboPreview): # pylint: disable=invalid-na
|
|
496
1069
|
model = 'gpt-4-1106-preview'
|
497
1070
|
|
498
1071
|
|
499
|
-
class Gpt4VisionPreview(Gpt4):
|
500
|
-
"""GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
|
501
|
-
model = 'gpt-4-vision-preview'
|
502
|
-
multimodal = True
|
503
|
-
|
504
|
-
|
505
|
-
class Gpt4VisionPreview_20231106(Gpt4): # pylint: disable=invalid-name
|
506
|
-
"""GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
|
507
|
-
model = 'gpt-4-1106-vision-preview'
|
508
|
-
|
509
|
-
|
510
1072
|
class Gpt4_20230613(Gpt4): # pylint:disable=invalid-name
|
511
1073
|
"""GPT-4 @20230613. 8K context window. Knowledge up to 9-2021."""
|
512
1074
|
model = 'gpt-4-0613'
|
@@ -522,42 +1084,6 @@ class Gpt4_32K_20230613(Gpt4_32K): # pylint:disable=invalid-name
|
|
522
1084
|
model = 'gpt-4-32k-0613'
|
523
1085
|
|
524
1086
|
|
525
|
-
class Gpt4oMini(OpenAI):
|
526
|
-
"""GPT-4o Mini."""
|
527
|
-
model = 'gpt-4o-mini'
|
528
|
-
multimodal = True
|
529
|
-
|
530
|
-
|
531
|
-
class Gpt4oMini_20240718(OpenAI): # pylint:disable=invalid-name
|
532
|
-
"""GPT-4o Mini."""
|
533
|
-
model = 'gpt-4o-mini-2024-07-18'
|
534
|
-
multimodal = True
|
535
|
-
|
536
|
-
|
537
|
-
class Gpt4o(OpenAI):
|
538
|
-
"""GPT-4o."""
|
539
|
-
model = 'gpt-4o'
|
540
|
-
multimodal = True
|
541
|
-
|
542
|
-
|
543
|
-
class Gpt4o_20241120(OpenAI): # pylint:disable=invalid-name
|
544
|
-
"""GPT-4o version 2024-11-20."""
|
545
|
-
model = 'gpt-4o-2024-11-20'
|
546
|
-
multimodal = True
|
547
|
-
|
548
|
-
|
549
|
-
class Gpt4o_20240806(OpenAI): # pylint:disable=invalid-name
|
550
|
-
"""GPT-4o version 2024-08-06."""
|
551
|
-
model = 'gpt-4o-2024-08-06'
|
552
|
-
multimodal = True
|
553
|
-
|
554
|
-
|
555
|
-
class Gpt4o_20240513(OpenAI): # pylint:disable=invalid-name
|
556
|
-
"""GPT-4o version 2024-05-13."""
|
557
|
-
model = 'gpt-4o-2024-05-13'
|
558
|
-
multimodal = True
|
559
|
-
|
560
|
-
|
561
1087
|
class Gpt35(OpenAI):
|
562
1088
|
"""GPT-3.5. 4K max tokens, trained up on data up to Sep, 2021."""
|
563
1089
|
model = 'text-davinci-003'
|
@@ -593,24 +1119,9 @@ class Gpt35Turbo16K_20230613(Gpt35Turbo): # pylint:disable=invalid-name
|
|
593
1119
|
model = 'gpt-3.5-turbo-16k-0613'
|
594
1120
|
|
595
1121
|
|
596
|
-
|
597
|
-
"""
|
598
|
-
|
599
|
-
|
600
|
-
"""
|
601
|
-
model = 'davinci'
|
602
|
-
|
603
|
-
|
604
|
-
class Gpt3Curie(Gpt3):
|
605
|
-
"""Very capable, but faster and lower cost than Davici."""
|
606
|
-
model = 'curie'
|
607
|
-
|
608
|
-
|
609
|
-
class Gpt3Babbage(Gpt3):
|
610
|
-
"""Capable of straightforward tasks, very fast and low cost."""
|
611
|
-
model = 'babbage'
|
612
|
-
|
1122
|
+
def _register_openai_models():
|
1123
|
+
"""Registers OpenAI models."""
|
1124
|
+
for m in SUPPORTED_MODELS:
|
1125
|
+
lf.LanguageModel.register(m.model_id, OpenAI)
|
613
1126
|
|
614
|
-
|
615
|
-
"""Capable of very simple tasks, the fastest/lowest cost among GPT3 models."""
|
616
|
-
model = 'ada'
|
1127
|
+
_register_openai_models()
|