langfun 0.1.2.dev202502110804__py3-none-any.whl → 0.1.2.dev202502120804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,302 +13,878 @@
13
13
  # limitations under the License.
14
14
  """Language models from OpenAI."""
15
15
 
16
+ import datetime
17
+ import functools
16
18
  import os
17
- from typing import Annotated, Any
19
+ from typing import Annotated, Any, Final
18
20
 
19
21
  import langfun.core as lf
20
22
  from langfun.core.llms import openai_compatible
21
23
  import pyglove as pg
22
24
 
23
25
 
24
- # From https://platform.openai.com/settings/organization/limits
25
- _DEFAULT_TPM = 250000
26
- _DEFAULT_RPM = 3000
26
+ class OpenAIModelInfo(lf.ModelInfo):
27
+ """OpenAI model info."""
27
28
 
28
- SUPPORTED_MODELS_AND_SETTINGS = {
29
- # Models from https://platform.openai.com/docs/models
30
- # RPM is from https://platform.openai.com/docs/guides/rate-limits
31
- # o1 (preview) models.
32
- # Pricing in US dollars, from https://openai.com/api/pricing/
33
- # as of 2024-10-10.
34
- 'o3-mini-2025-01-31': pg.Dict(
29
+ # Constants for supported MIME types.
30
+ INPUT_IMAGE_TYPES = [
31
+ 'image/png',
32
+ 'image/jpeg',
33
+ 'image/gif',
34
+ 'image/webp',
35
+ ]
36
+
37
+ LINKS = dict(
38
+ models='https://platform.openai.com/docs/models',
39
+ pricing='https://openai.com/api/pricing/',
40
+ rate_limits='https://platform.openai.com/docs/guides/rate-limits',
41
+ error_codes='https://platform.openai.com/docs/guides/error-codes',
42
+ )
43
+
44
+ provider: Final[str] = 'OpenAI' # pylint: disable=invalid-name
45
+
46
+
47
+ #
48
+ # !!! Please sort models by model family and model_id (time descending).
49
+ #
50
+
51
+ SUPPORTED_MODELS = [
52
+ # o3-mini models.
53
+ OpenAIModelInfo(
54
+ model_id='o3-mini',
55
+ alias_for='o3-mini-2025-01-31',
35
56
  in_service=True,
36
- rpm=10000,
37
- tpm=5000000,
38
- cost_per_1k_input_tokens=0.0011,
39
- cost_per_1k_output_tokens=0.0044,
57
+ model_type='thinking',
58
+ description='GPT O3-mini model (latest).',
59
+ url='https://platform.openai.com/docs/models#o3-mini',
60
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
61
+ context_length=lf.ModelInfo.ContextLength(
62
+ max_input_tokens=200_000,
63
+ max_output_tokens=100_000,
64
+ ),
65
+ pricing=lf.ModelInfo.Pricing(
66
+ cost_per_1m_cached_input_tokens=0.55,
67
+ cost_per_1m_input_tokens=1.1,
68
+ cost_per_1m_output_tokens=4.4,
69
+ ),
70
+ # Tier 5 rate limits.
71
+ rate_limits=lf.ModelInfo.RateLimits(
72
+ max_requests_per_minute=10_000,
73
+ max_tokens_per_minute=10_000_000,
74
+ ),
40
75
  ),
41
- 'o3-mini': pg.Dict(
76
+ OpenAIModelInfo(
77
+ model_id='o3-mini-2025-01-31',
42
78
  in_service=True,
43
- rpm=10000,
44
- tpm=5000000,
45
- cost_per_1k_input_tokens=0.0011,
46
- cost_per_1k_output_tokens=0.0044,
79
+ model_type='thinking',
80
+ description='GPT O3-mini model (01/31/2025).',
81
+ url='https://platform.openai.com/docs/models#o3-mini',
82
+ release_date=datetime.datetime(2025, 1, 31),
83
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
84
+ context_length=lf.ModelInfo.ContextLength(
85
+ max_input_tokens=200_000,
86
+ max_output_tokens=100_000,
87
+ ),
88
+ pricing=lf.ModelInfo.Pricing(
89
+ cost_per_1m_cached_input_tokens=0.55,
90
+ cost_per_1m_input_tokens=1.1,
91
+ cost_per_1m_output_tokens=4.4,
92
+ ),
93
+ # Tier 5 rate limits.
94
+ rate_limits=lf.ModelInfo.RateLimits(
95
+ max_requests_per_minute=10_000,
96
+ max_tokens_per_minute=10_000_000,
97
+ ),
47
98
  ),
48
- 'o1': pg.Dict(
99
+ # o1-mini models.
100
+ OpenAIModelInfo(
101
+ model_id='o1-mini',
102
+ alias_for='o1-mini-2024-09-12',
49
103
  in_service=True,
50
- rpm=10000,
51
- tpm=5000000,
52
- cost_per_1k_input_tokens=0.015,
53
- cost_per_1k_output_tokens=0.06,
104
+ model_type='thinking',
105
+ description='GPT O1-mini model (latest).',
106
+ url='https://platform.openai.com/docs/models#o1',
107
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
108
+ context_length=lf.ModelInfo.ContextLength(
109
+ max_input_tokens=128_000,
110
+ max_output_tokens=65_536,
111
+ ),
112
+ pricing=lf.ModelInfo.Pricing(
113
+ cost_per_1m_cached_input_tokens=0.55,
114
+ cost_per_1m_input_tokens=1.1,
115
+ cost_per_1m_output_tokens=4.4,
116
+ ),
117
+ # Tier 5 rate limits.
118
+ rate_limits=lf.ModelInfo.RateLimits(
119
+ max_requests_per_minute=10_000,
120
+ max_tokens_per_minute=10_000_000,
121
+ ),
54
122
  ),
55
- 'o1-preview': pg.Dict(
123
+ OpenAIModelInfo(
124
+ model_id='o1-mini-2024-09-12',
56
125
  in_service=True,
57
- rpm=10000,
58
- tpm=5000000,
59
- cost_per_1k_input_tokens=0.015,
60
- cost_per_1k_output_tokens=0.06,
126
+ model_type='thinking',
127
+ description='GPT O1-mini model (09/12/2024).',
128
+ url='https://platform.openai.com/docs/models#o1',
129
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
130
+ context_length=lf.ModelInfo.ContextLength(
131
+ max_input_tokens=128_000,
132
+ max_output_tokens=65_536,
133
+ ),
134
+ pricing=lf.ModelInfo.Pricing(
135
+ cost_per_1m_cached_input_tokens=0.55,
136
+ cost_per_1m_input_tokens=1.1,
137
+ cost_per_1m_output_tokens=4.4,
138
+ ),
139
+ # Tier 5 rate limits.
140
+ rate_limits=lf.ModelInfo.RateLimits(
141
+ max_requests_per_minute=10_000,
142
+ max_tokens_per_minute=10_000_000,
143
+ ),
61
144
  ),
62
- 'o1-preview-2024-09-12': pg.Dict(
145
+ OpenAIModelInfo(
146
+ model_id='o1-preview',
147
+ alias_for='o1-preview-2024-09-12',
63
148
  in_service=True,
64
- rpm=10000,
65
- tpm=5000000,
66
- cost_per_1k_input_tokens=0.015,
67
- cost_per_1k_output_tokens=0.06,
149
+ model_type='thinking',
150
+ description='GPT O1-preview model (latest).',
151
+ url='https://platform.openai.com/docs/models#o1',
152
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
153
+ context_length=lf.ModelInfo.ContextLength(
154
+ max_input_tokens=128_000,
155
+ max_output_tokens=32_768,
156
+ ),
157
+ pricing=lf.ModelInfo.Pricing(
158
+ cost_per_1m_cached_input_tokens=7.5,
159
+ cost_per_1m_input_tokens=15.0,
160
+ cost_per_1m_output_tokens=60.0,
161
+ ),
162
+ # Tier 5 rate limits.
163
+ rate_limits=lf.ModelInfo.RateLimits(
164
+ max_requests_per_minute=10_000,
165
+ max_tokens_per_minute=10_000_000,
166
+ ),
68
167
  ),
69
- 'o1-mini': pg.Dict(
168
+ OpenAIModelInfo(
169
+ model_id='o1-preview-2024-09-12',
70
170
  in_service=True,
71
- rpm=10000,
72
- tpm=5000000,
73
- cost_per_1k_input_tokens=0.003,
74
- cost_per_1k_output_tokens=0.012,
171
+ model_type='thinking',
172
+ description='GPT O1-preview model (09/12/2024).',
173
+ url='https://platform.openai.com/docs/models#o1',
174
+ release_date=datetime.datetime(2024, 9, 12),
175
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
176
+ context_length=lf.ModelInfo.ContextLength(
177
+ max_input_tokens=128_000,
178
+ max_output_tokens=32_768,
179
+ ),
180
+ pricing=lf.ModelInfo.Pricing(
181
+ cost_per_1m_cached_input_tokens=7.5,
182
+ cost_per_1m_input_tokens=15.0,
183
+ cost_per_1m_output_tokens=60.0,
184
+ ),
185
+ # Tier 5 rate limits.
186
+ rate_limits=lf.ModelInfo.RateLimits(
187
+ max_requests_per_minute=10_000,
188
+ max_tokens_per_minute=2_000_000,
189
+ ),
75
190
  ),
76
- 'o1-mini-2024-09-12': pg.Dict(
191
+ # o1 models.
192
+ OpenAIModelInfo(
193
+ model_id='o1',
194
+ alias_for='o1-2024-12-17',
77
195
  in_service=True,
78
- rpm=10000,
79
- tpm=5000000,
80
- cost_per_1k_input_tokens=0.003,
81
- cost_per_1k_output_tokens=0.012,
196
+ model_type='thinking',
197
+ description='GPT O1 model (latest).',
198
+ url='https://platform.openai.com/docs/models#o1',
199
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
200
+ context_length=lf.ModelInfo.ContextLength(
201
+ max_input_tokens=200_000,
202
+ max_output_tokens=100_000,
203
+ ),
204
+ pricing=lf.ModelInfo.Pricing(
205
+ cost_per_1m_cached_input_tokens=7.5,
206
+ cost_per_1m_input_tokens=15.0,
207
+ cost_per_1m_output_tokens=60.0,
208
+ ),
209
+ # Tier 5 rate limits.
210
+ rate_limits=lf.ModelInfo.RateLimits(
211
+ max_requests_per_minute=10_000,
212
+ max_tokens_per_minute=2_000_000,
213
+ ),
82
214
  ),
83
- # GPT-4o models
84
- 'gpt-4o-mini': pg.Dict(
215
+ OpenAIModelInfo(
216
+ model_id='o1-2024-12-17',
85
217
  in_service=True,
86
- rpm=10000,
87
- tpm=5000000,
88
- cost_per_1k_input_tokens=0.00015,
89
- cost_per_1k_output_tokens=0.0006,
218
+ model_type='thinking',
219
+ description='GPT O1 model (12/17/2024).',
220
+ url='https://platform.openai.com/docs/models#o1',
221
+ release_date=datetime.datetime(2024, 12, 17),
222
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
223
+ context_length=lf.ModelInfo.ContextLength(
224
+ max_input_tokens=200_000,
225
+ max_output_tokens=100_000,
226
+ ),
227
+ pricing=lf.ModelInfo.Pricing(
228
+ cost_per_1m_cached_input_tokens=7.5,
229
+ cost_per_1m_input_tokens=15.0,
230
+ cost_per_1m_output_tokens=60.0,
231
+ ),
232
+ # Tier 5 rate limits.
233
+ rate_limits=lf.ModelInfo.RateLimits(
234
+ max_requests_per_minute=10_000,
235
+ max_tokens_per_minute=2_000_000,
236
+ ),
90
237
  ),
91
- 'gpt-4o-mini-2024-07-18': pg.Dict(
238
+ # GPT-4o-mini models
239
+ OpenAIModelInfo(
240
+ model_id='gpt-4o-mini',
241
+ alias_for='gpt-4o-mini-2024-07-18',
92
242
  in_service=True,
93
- rpm=10000,
94
- tpm=5000000,
95
- cost_per_1k_input_tokens=0.00015,
96
- cost_per_1k_output_tokens=0.0006,
243
+ model_type='instruction-tuned',
244
+ description='GPT 4o mini model (latest).',
245
+ url='https://platform.openai.com/docs/models#gpt-4o-mini',
246
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
247
+ context_length=lf.ModelInfo.ContextLength(
248
+ max_input_tokens=128_000,
249
+ max_output_tokens=16_384,
250
+ ),
251
+ pricing=lf.ModelInfo.Pricing(
252
+ cost_per_1m_cached_input_tokens=0.075,
253
+ cost_per_1m_input_tokens=0.15,
254
+ cost_per_1m_output_tokens=0.6,
255
+ ),
256
+ # Tier 5 rate limits.
257
+ rate_limits=lf.ModelInfo.RateLimits(
258
+ max_requests_per_minute=10_000,
259
+ max_tokens_per_minute=10_000_000,
260
+ ),
97
261
  ),
98
- 'gpt-4o': pg.Dict(
262
+ OpenAIModelInfo(
263
+ model_id='gpt-4o-mini-2024-07-18',
99
264
  in_service=True,
100
- rpm=10000,
101
- tpm=5000000,
102
- cost_per_1k_input_tokens=0.0025,
103
- cost_per_1k_output_tokens=0.01,
265
+ model_type='instruction-tuned',
266
+ description='GPT 4o mini model (07/18/2024).',
267
+ url='https://platform.openai.com/docs/models#gpt-4o-mini',
268
+ release_date=datetime.datetime(2024, 7, 18),
269
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
270
+ context_length=lf.ModelInfo.ContextLength(
271
+ max_input_tokens=128_000,
272
+ max_output_tokens=16_384,
273
+ ),
274
+ pricing=lf.ModelInfo.Pricing(
275
+ cost_per_1m_cached_input_tokens=0.075,
276
+ cost_per_1m_input_tokens=0.15,
277
+ cost_per_1m_output_tokens=0.6,
278
+ ),
279
+ # Tier 5 rate limits.
280
+ rate_limits=lf.ModelInfo.RateLimits(
281
+ max_requests_per_minute=10_000,
282
+ max_tokens_per_minute=10_000_000,
283
+ ),
104
284
  ),
105
- 'gpt-4o-2024-11-20': pg.Dict(
285
+ # GPT-4o models
286
+ OpenAIModelInfo(
287
+ model_id='gpt-4o',
288
+ alias_for='gpt-4o-2024-08-06',
106
289
  in_service=True,
107
- rpm=10000,
108
- tpm=5000000,
109
- cost_per_1k_input_tokens=0.0025,
110
- cost_per_1k_output_tokens=0.01,
290
+ model_type='instruction-tuned',
291
+ description='GPT 4o model (latest stable).',
292
+ url='https://platform.openai.com/docs/models#gpt-4o',
293
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
294
+ context_length=lf.ModelInfo.ContextLength(
295
+ max_input_tokens=128_000,
296
+ max_output_tokens=16_384,
297
+ ),
298
+ pricing=lf.ModelInfo.Pricing(
299
+ cost_per_1m_cached_input_tokens=1.25,
300
+ cost_per_1m_input_tokens=2.5,
301
+ cost_per_1m_output_tokens=10.0,
302
+ ),
303
+ # Tier 5 rate limits.
304
+ rate_limits=lf.ModelInfo.RateLimits(
305
+ max_requests_per_minute=10_000,
306
+ max_tokens_per_minute=2_000_000,
307
+ ),
111
308
  ),
112
- 'gpt-4o-2024-08-06': pg.Dict(
309
+ OpenAIModelInfo(
310
+ model_id='gpt-4o-2024-11-20',
113
311
  in_service=True,
114
- rpm=10000,
115
- tpm=5000000,
116
- cost_per_1k_input_tokens=0.0025,
117
- cost_per_1k_output_tokens=0.01,
312
+ model_type='instruction-tuned',
313
+ description='GPT 4o model (11/20/2024).',
314
+ url='https://platform.openai.com/docs/models#gpt-4o',
315
+ release_date=datetime.datetime(2024, 11, 20),
316
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
317
+ context_length=lf.ModelInfo.ContextLength(
318
+ max_input_tokens=128_000,
319
+ max_output_tokens=16_384,
320
+ ),
321
+ pricing=lf.ModelInfo.Pricing(
322
+ cost_per_1m_cached_input_tokens=1.25,
323
+ cost_per_1m_input_tokens=2.5,
324
+ cost_per_1m_output_tokens=10.0,
325
+ ),
326
+ # Tier 5 rate limits.
327
+ rate_limits=lf.ModelInfo.RateLimits(
328
+ max_requests_per_minute=10_000,
329
+ max_tokens_per_minute=2_000_000,
330
+ ),
118
331
  ),
119
- 'gpt-4o-2024-05-13': pg.Dict(
332
+ OpenAIModelInfo(
333
+ model_id='gpt-4o-2024-08-06',
120
334
  in_service=True,
121
- rpm=10000,
122
- tpm=5000000,
123
- cost_per_1k_input_tokens=0.005,
124
- cost_per_1k_output_tokens=0.015,
335
+ model_type='instruction-tuned',
336
+ description='GPT 4o model (08/06/2024).',
337
+ url='https://platform.openai.com/docs/models#gpt-4o',
338
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
339
+ context_length=lf.ModelInfo.ContextLength(
340
+ max_input_tokens=128_000,
341
+ max_output_tokens=16_384,
342
+ ),
343
+ pricing=lf.ModelInfo.Pricing(
344
+ cost_per_1m_cached_input_tokens=1.25,
345
+ cost_per_1m_input_tokens=2.5,
346
+ cost_per_1m_output_tokens=10.0,
347
+ ),
348
+ # Tier 5 rate limits.
349
+ rate_limits=lf.ModelInfo.RateLimits(
350
+ max_requests_per_minute=10_000,
351
+ max_tokens_per_minute=2_000_000,
352
+ ),
125
353
  ),
126
- # GPT-4-Turbo models
127
- 'gpt-4-turbo': pg.Dict(
354
+ OpenAIModelInfo(
355
+ model_id='gpt-4o-2024-05-13',
128
356
  in_service=True,
129
- rpm=10000,
130
- tpm=2000000,
131
- cost_per_1k_input_tokens=0.01,
132
- cost_per_1k_output_tokens=0.03,
357
+ model_type='instruction-tuned',
358
+ description='GPT 4o model (05/13/2024).',
359
+ url='https://platform.opedsnai.com/docs/models#gpt-4o',
360
+ release_date=datetime.datetime(2024, 5, 13),
361
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
362
+ context_length=lf.ModelInfo.ContextLength(
363
+ max_input_tokens=128_000,
364
+ max_output_tokens=16_384,
365
+ ),
366
+ pricing=lf.ModelInfo.Pricing(
367
+ cost_per_1m_cached_input_tokens=None,
368
+ cost_per_1m_input_tokens=5.0,
369
+ cost_per_1m_output_tokens=15.0,
370
+ ),
371
+ # Tier 5 rate limits.
372
+ rate_limits=lf.ModelInfo.RateLimits(
373
+ max_requests_per_minute=10_000,
374
+ max_tokens_per_minute=2_000_000,
375
+ ),
133
376
  ),
134
- 'gpt-4-turbo-2024-04-09': pg.Dict(
377
+ OpenAIModelInfo(
378
+ model_id='chatgpt-4o-latest',
135
379
  in_service=True,
136
- rpm=10000,
137
- tpm=2000000,
138
- cost_per_1k_input_tokens=0.01,
139
- cost_per_1k_output_tokens=0.03,
380
+ model_type='instruction-tuned',
381
+ description='GPT 4o model ChatGPT version (latest).',
382
+ url='https://platform.openai.com/docs/models#gpt-4o',
383
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
384
+ context_length=lf.ModelInfo.ContextLength(
385
+ max_input_tokens=128_000,
386
+ max_output_tokens=16_384,
387
+ ),
388
+ pricing=lf.ModelInfo.Pricing(
389
+ cost_per_1m_cached_input_tokens=None,
390
+ cost_per_1m_input_tokens=5.0,
391
+ cost_per_1m_output_tokens=15.0,
392
+ ),
393
+ # Tier 5 rate limits.
394
+ rate_limits=lf.ModelInfo.RateLimits(
395
+ max_requests_per_minute=10_000,
396
+ max_tokens_per_minute=2_000_000,
397
+ ),
140
398
  ),
141
- 'gpt-4-turbo-preview': pg.Dict(
399
+ # GPT-4 Turbo models.
400
+ OpenAIModelInfo(
401
+ model_id='gpt-4-turbo',
402
+ alias_for='gpt-4-turbo-2024-04-09',
142
403
  in_service=True,
143
- rpm=10000,
144
- tpm=2000000,
145
- cost_per_1k_input_tokens=0.01,
146
- cost_per_1k_output_tokens=0.03,
404
+ model_type='instruction-tuned',
405
+ description='GPT 4 Turbo model (latest).',
406
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
407
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
408
+ context_length=lf.ModelInfo.ContextLength(
409
+ max_input_tokens=128_000,
410
+ max_output_tokens=4_096,
411
+ ),
412
+ pricing=lf.ModelInfo.Pricing(
413
+ cost_per_1m_cached_input_tokens=None,
414
+ cost_per_1m_input_tokens=10.0,
415
+ cost_per_1m_output_tokens=30.0,
416
+ ),
417
+ # Tier 5 rate limits.
418
+ rate_limits=lf.ModelInfo.RateLimits(
419
+ max_requests_per_minute=10_000,
420
+ max_tokens_per_minute=800_000,
421
+ ),
147
422
  ),
148
- 'gpt-4-0125-preview': pg.Dict(
423
+ OpenAIModelInfo(
424
+ model_id='gpt-4-turbo-2024-04-09',
149
425
  in_service=True,
150
- rpm=10000,
151
- tpm=2000000,
152
- cost_per_1k_input_tokens=0.01,
153
- cost_per_1k_output_tokens=0.03,
426
+ model_type='instruction-tuned',
427
+ description='GPT 4 Turbo model (04/09/2024).',
428
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
429
+ release_date=datetime.datetime(2024, 4, 9),
430
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
431
+ context_length=lf.ModelInfo.ContextLength(
432
+ max_input_tokens=128_000,
433
+ max_output_tokens=4_096,
434
+ ),
435
+ pricing=lf.ModelInfo.Pricing(
436
+ cost_per_1m_cached_input_tokens=None,
437
+ cost_per_1m_input_tokens=10.0,
438
+ cost_per_1m_output_tokens=30.0,
439
+ ),
440
+ # Tier 5 rate limits.
441
+ rate_limits=lf.ModelInfo.RateLimits(
442
+ max_requests_per_minute=10_000,
443
+ max_tokens_per_minute=800_000,
444
+ ),
154
445
  ),
155
- 'gpt-4-1106-preview': pg.Dict(
446
+ OpenAIModelInfo(
447
+ model_id='gpt-4-turbo-preview',
448
+ alias_for='gpt-4-0125-preview',
156
449
  in_service=True,
157
- rpm=10000,
158
- tpm=2000000,
159
- cost_per_1k_input_tokens=0.01,
160
- cost_per_1k_output_tokens=0.03,
450
+ model_type='instruction-tuned',
451
+ description='GPT 4 Turbo preview model (latest).',
452
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
453
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
454
+ context_length=lf.ModelInfo.ContextLength(
455
+ max_input_tokens=128_000,
456
+ max_output_tokens=4_096,
457
+ ),
458
+ pricing=lf.ModelInfo.Pricing(
459
+ cost_per_1m_cached_input_tokens=None,
460
+ cost_per_1m_input_tokens=10.0,
461
+ cost_per_1m_output_tokens=30.0,
462
+ ),
463
+ # Tier 5 rate limits.
464
+ rate_limits=lf.ModelInfo.RateLimits(
465
+ max_requests_per_minute=10_000,
466
+ max_tokens_per_minute=800_000,
467
+ ),
161
468
  ),
162
- 'gpt-4-vision-preview': pg.Dict(
469
+ OpenAIModelInfo(
470
+ model_id='gpt-4-0125-preview',
163
471
  in_service=True,
164
- rpm=10000,
165
- tpm=2000000,
166
- cost_per_1k_input_tokens=0.01,
167
- cost_per_1k_output_tokens=0.03,
472
+ model_type='instruction-tuned',
473
+ description='GPT 4 Turbo preview model (01/25/2024).',
474
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
475
+ release_date=datetime.datetime(2024, 1, 25),
476
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
477
+ context_length=lf.ModelInfo.ContextLength(
478
+ max_input_tokens=128_000,
479
+ max_output_tokens=4_096,
480
+ ),
481
+ pricing=lf.ModelInfo.Pricing(
482
+ cost_per_1m_cached_input_tokens=None,
483
+ cost_per_1m_input_tokens=10.0,
484
+ cost_per_1m_output_tokens=30.0,
485
+ ),
486
+ # Tier 5 rate limits.
487
+ rate_limits=lf.ModelInfo.RateLimits(
488
+ max_requests_per_minute=10_000,
489
+ max_tokens_per_minute=800_000,
490
+ ),
168
491
  ),
169
- 'gpt-4-1106-vision-preview': pg.Dict(
492
+ OpenAIModelInfo(
493
+ model_id='gpt-4-1106-preview',
170
494
  in_service=True,
171
- rpm=10000,
172
- tpm=2000000,
173
- cost_per_1k_input_tokens=0.01,
174
- cost_per_1k_output_tokens=0.03,
495
+ model_type='instruction-tuned',
496
+ description='GPT 4 Turbo preview model (11/06/2024).',
497
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
498
+ release_date=datetime.datetime(2024, 11, 6),
499
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
500
+ context_length=lf.ModelInfo.ContextLength(
501
+ max_input_tokens=128_000,
502
+ max_output_tokens=4_096,
503
+ ),
504
+ pricing=lf.ModelInfo.Pricing(
505
+ cost_per_1m_cached_input_tokens=None,
506
+ cost_per_1m_input_tokens=10.0,
507
+ cost_per_1m_output_tokens=30.0,
508
+ ),
509
+ # Tier 5 rate limits.
510
+ rate_limits=lf.ModelInfo.RateLimits(
511
+ max_requests_per_minute=10_000,
512
+ max_tokens_per_minute=800_000,
513
+ ),
175
514
  ),
176
- # GPT-4 models
177
- 'gpt-4': pg.Dict(
515
+ # GPT-4 models.
516
+ OpenAIModelInfo(
517
+ model_id='gpt-4',
518
+ alias_for='gpt-4-0613',
178
519
  in_service=True,
179
- rpm=10000,
180
- tpm=300000,
181
- cost_per_1k_input_tokens=0.03,
182
- cost_per_1k_output_tokens=0.06,
183
- ),
184
- 'gpt-4-0613': pg.Dict(
185
- in_service=False,
186
- rpm=10000,
187
- tpm=300000,
188
- cost_per_1k_input_tokens=0.03,
189
- cost_per_1k_output_tokens=0.06,
190
- ),
191
- 'gpt-4-0314': pg.Dict(
192
- in_service=False,
193
- rpm=10000,
194
- tpm=300000,
195
- cost_per_1k_input_tokens=0.03,
196
- cost_per_1k_output_tokens=0.06,
520
+ model_type='instruction-tuned',
521
+ description='GPT 4 model (latest).',
522
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
523
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
524
+ context_length=lf.ModelInfo.ContextLength(
525
+ max_input_tokens=8_192,
526
+ max_output_tokens=8_192,
527
+ ),
528
+ pricing=lf.ModelInfo.Pricing(
529
+ cost_per_1m_cached_input_tokens=None,
530
+ cost_per_1m_input_tokens=30.0,
531
+ cost_per_1m_output_tokens=60.0,
532
+ ),
533
+ # Tier 5 rate limits.
534
+ rate_limits=lf.ModelInfo.RateLimits(
535
+ max_requests_per_minute=10_000,
536
+ max_tokens_per_minute=300_000,
537
+ ),
197
538
  ),
198
- 'gpt-4-32k': pg.Dict(
539
+ OpenAIModelInfo(
540
+ model_id='gpt-4-0613',
199
541
  in_service=True,
200
- rpm=10000,
201
- tpm=300000,
202
- cost_per_1k_input_tokens=0.06,
203
- cost_per_1k_output_tokens=0.12,
542
+ model_type='instruction-tuned',
543
+ description='GPT 4 model (06/13/2023).',
544
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
545
+ release_date=datetime.datetime(2023, 6, 13),
546
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
547
+ context_length=lf.ModelInfo.ContextLength(
548
+ max_input_tokens=8_192,
549
+ max_output_tokens=8_192,
550
+ ),
551
+ pricing=lf.ModelInfo.Pricing(
552
+ cost_per_1m_cached_input_tokens=None,
553
+ cost_per_1m_input_tokens=30.0,
554
+ cost_per_1m_output_tokens=60.0,
555
+ ),
556
+ # Tier 5 rate limits.
557
+ rate_limits=lf.ModelInfo.RateLimits(
558
+ max_requests_per_minute=10_000,
559
+ max_tokens_per_minute=300_000,
560
+ ),
204
561
  ),
205
- 'gpt-4-32k-0613': pg.Dict(
562
+ OpenAIModelInfo(
563
+ model_id='gpt-4-0314',
206
564
  in_service=False,
207
- rpm=10000,
208
- tpm=300000,
209
- cost_per_1k_input_tokens=0.06,
210
- cost_per_1k_output_tokens=0.12,
565
+ model_type='instruction-tuned',
566
+ description='GPT 4 model (03/14/2023).',
567
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
568
+ release_date=datetime.datetime(2023, 3, 14),
569
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
570
+ context_length=lf.ModelInfo.ContextLength(
571
+ max_input_tokens=8_192,
572
+ max_output_tokens=8_192,
573
+ ),
574
+ pricing=lf.ModelInfo.Pricing(
575
+ cost_per_1m_cached_input_tokens=None,
576
+ cost_per_1m_input_tokens=30.0,
577
+ cost_per_1m_output_tokens=60.0,
578
+ ),
579
+ # Tier 5 rate limits.
580
+ rate_limits=lf.ModelInfo.RateLimits(
581
+ max_requests_per_minute=10_000,
582
+ max_tokens_per_minute=300_000,
583
+ ),
211
584
  ),
212
- 'gpt-4-32k-0314': pg.Dict(
585
+ # GPT-4 32K models.
586
+ OpenAIModelInfo(
587
+ model_id='gpt-4-32k',
588
+ alias_for='gpt-4-32k-0613',
213
589
  in_service=False,
214
- rpm=10000,
215
- tpm=300000,
216
- cost_per_1k_input_tokens=0.06,
217
- cost_per_1k_output_tokens=0.12,
590
+ model_type='instruction-tuned',
591
+ description='GPT 4 32K model (latest).',
592
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
593
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
594
+ context_length=lf.ModelInfo.ContextLength(
595
+ max_input_tokens=32_768,
596
+ max_output_tokens=8_192,
597
+ ),
598
+ pricing=lf.ModelInfo.Pricing(
599
+ cost_per_1m_cached_input_tokens=None,
600
+ cost_per_1m_input_tokens=60.0,
601
+ cost_per_1m_output_tokens=120.0,
602
+ ),
603
+ # Tier 5 rate limits.
604
+ rate_limits=lf.ModelInfo.RateLimits(
605
+ max_requests_per_minute=10_000,
606
+ max_tokens_per_minute=300_000,
607
+ ),
218
608
  ),
219
- # GPT-3.5-Turbo models
220
- 'gpt-3.5-turbo': pg.Dict(
221
- in_service=True,
222
- rpm=10000,
223
- tpm=2000000,
224
- cost_per_1k_input_tokens=0.0005,
225
- cost_per_1k_output_tokens=0.0015,
609
+ OpenAIModelInfo(
610
+ model_id='gpt-4-32k-0613',
611
+ in_service=False,
612
+ model_type='instruction-tuned',
613
+ description='GPT 4 32K model (06/13/2023).',
614
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
615
+ release_date=datetime.datetime(2023, 6, 13),
616
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
617
+ context_length=lf.ModelInfo.ContextLength(
618
+ max_input_tokens=32_768,
619
+ max_output_tokens=8_192,
620
+ ),
621
+ pricing=lf.ModelInfo.Pricing(
622
+ cost_per_1m_cached_input_tokens=None,
623
+ cost_per_1m_input_tokens=60.0,
624
+ cost_per_1m_output_tokens=120.0,
625
+ ),
626
+ # Tier 5 rate limits.
627
+ rate_limits=lf.ModelInfo.RateLimits(
628
+ max_requests_per_minute=10_000,
629
+ max_tokens_per_minute=300_000,
630
+ ),
226
631
  ),
227
- 'gpt-3.5-turbo-0125': pg.Dict(
228
- in_service=True,
229
- rpm=10000,
230
- tpm=2000000,
231
- cost_per_1k_input_tokens=0.0005,
232
- cost_per_1k_output_tokens=0.0015,
632
+ OpenAIModelInfo(
633
+ model_id='gpt-4-32k-0314',
634
+ in_service=False,
635
+ model_type='instruction-tuned',
636
+ description='GPT 4 32K model (03/14/2023).',
637
+ url='https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4',
638
+ release_date=datetime.datetime(2023, 3, 14),
639
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
640
+ context_length=lf.ModelInfo.ContextLength(
641
+ max_input_tokens=32_768,
642
+ max_output_tokens=8_192,
643
+ ),
644
+ pricing=lf.ModelInfo.Pricing(
645
+ cost_per_1m_cached_input_tokens=None,
646
+ cost_per_1m_input_tokens=60.0,
647
+ cost_per_1m_output_tokens=120.0,
648
+ ),
649
+ # Tier 5 rate limits.
650
+ rate_limits=lf.ModelInfo.RateLimits(
651
+ max_requests_per_minute=10_000,
652
+ max_tokens_per_minute=300_000,
653
+ ),
233
654
  ),
234
- 'gpt-3.5-turbo-1106': pg.Dict(
655
+ # GPT 3.5 Turbo models.
656
+ OpenAIModelInfo(
657
+ model_id='gpt-3.5-turbo',
658
+ alias_for='gpt-3.5-turbo-0125',
235
659
  in_service=True,
236
- rpm=10000,
237
- tpm=2000000,
238
- cost_per_1k_input_tokens=0.001,
239
- cost_per_1k_output_tokens=0.002,
660
+ model_type='instruction-tuned',
661
+ description='GPT 3.5 Turbo model (latest).',
662
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
663
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
664
+ context_length=lf.ModelInfo.ContextLength(
665
+ max_input_tokens=16_384,
666
+ max_output_tokens=4_096,
667
+ ),
668
+ pricing=lf.ModelInfo.Pricing(
669
+ cost_per_1m_cached_input_tokens=None,
670
+ cost_per_1m_input_tokens=0.5,
671
+ cost_per_1m_output_tokens=1.5,
672
+ ),
673
+ # Tier 5 rate limits.
674
+ rate_limits=lf.ModelInfo.RateLimits(
675
+ max_requests_per_minute=10_000,
676
+ max_tokens_per_minute=10_000_000,
677
+ ),
240
678
  ),
241
- 'gpt-3.5-turbo-0613': pg.Dict(
679
+ OpenAIModelInfo(
680
+ model_id='gpt-3.5-turbo-0125',
242
681
  in_service=True,
243
- rpm=10000,
244
- tpm=2000000,
245
- cost_per_1k_input_tokens=0.0015,
246
- cost_per_1k_output_tokens=0.002,
682
+ release_date=datetime.datetime(2024, 1, 25),
683
+ model_type='instruction-tuned',
684
+ description='GPT 3.5 Turbo model (01/25/2024).',
685
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
686
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
687
+ context_length=lf.ModelInfo.ContextLength(
688
+ max_input_tokens=16_384,
689
+ max_output_tokens=4_096,
690
+ ),
691
+ pricing=lf.ModelInfo.Pricing(
692
+ cost_per_1m_cached_input_tokens=None,
693
+ cost_per_1m_input_tokens=0.5,
694
+ cost_per_1m_output_tokens=1.5,
695
+ ),
696
+ # Tier 5 rate limits.
697
+ rate_limits=lf.ModelInfo.RateLimits(
698
+ max_requests_per_minute=10_000,
699
+ max_tokens_per_minute=10_000_000,
700
+ ),
247
701
  ),
248
- 'gpt-3.5-turbo-0301': pg.Dict(
702
+ OpenAIModelInfo(
703
+ model_id='gpt-3.5-turbo-1106',
249
704
  in_service=True,
250
- rpm=10000,
251
- tpm=2000000,
252
- cost_per_1k_input_tokens=0.0015,
253
- cost_per_1k_output_tokens=0.002,
705
+ release_date=datetime.datetime(2023, 11, 6),
706
+ model_type='instruction-tuned',
707
+ description='GPT 3.5 Turbo model (11/06/2023).',
708
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
709
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
710
+ context_length=lf.ModelInfo.ContextLength(
711
+ max_input_tokens=16_384,
712
+ max_output_tokens=4_096,
713
+ ),
714
+ pricing=lf.ModelInfo.Pricing(
715
+ cost_per_1m_cached_input_tokens=None,
716
+ cost_per_1m_input_tokens=1.0,
717
+ cost_per_1m_output_tokens=2.0,
718
+ ),
719
+ # Tier 5 rate limits.
720
+ rate_limits=lf.ModelInfo.RateLimits(
721
+ max_requests_per_minute=10_000,
722
+ max_tokens_per_minute=10_000_000,
723
+ ),
254
724
  ),
255
- 'gpt-3.5-turbo-16k': pg.Dict(
256
- in_service=True,
257
- rpm=10000,
258
- tpm=2000000,
259
- cost_per_1k_input_tokens=0.003,
260
- cost_per_1k_output_tokens=0.004,
725
+ OpenAIModelInfo(
726
+ model_id='gpt-3.5-turbo-0613',
727
+ in_service=False,
728
+ release_date=datetime.datetime(2023, 6, 13),
729
+ model_type='instruction-tuned',
730
+ description='GPT 3.5 Turbo model (06/13/2023).',
731
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
732
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
733
+ context_length=lf.ModelInfo.ContextLength(
734
+ max_input_tokens=16_384,
735
+ max_output_tokens=4_096,
736
+ ),
737
+ pricing=lf.ModelInfo.Pricing(
738
+ cost_per_1m_cached_input_tokens=None,
739
+ cost_per_1m_input_tokens=1.5,
740
+ cost_per_1m_output_tokens=2.0,
741
+ ),
742
+ # Tier 5 rate limits.
743
+ rate_limits=lf.ModelInfo.RateLimits(
744
+ max_requests_per_minute=10_000,
745
+ max_tokens_per_minute=10_000_000,
746
+ ),
261
747
  ),
262
- 'gpt-3.5-turbo-16k-0613': pg.Dict(
748
+ # GPT 3.5 Turbo 16K models.
749
+ OpenAIModelInfo(
750
+ model_id='gpt-3.5-turbo-16k',
751
+ alias_for='gpt-3.5-turbo-16k-0613',
263
752
  in_service=True,
264
- rpm=10000,
265
- tpm=2000000,
266
- cost_per_1k_input_tokens=0.003,
267
- cost_per_1k_output_tokens=0.004,
753
+ model_type='instruction-tuned',
754
+ description='GPT 3.5 Turbo 16K model (latest).',
755
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
756
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
757
+ context_length=lf.ModelInfo.ContextLength(
758
+ max_input_tokens=16_385,
759
+ max_output_tokens=4_096,
760
+ ),
761
+ pricing=lf.ModelInfo.Pricing(
762
+ cost_per_1m_cached_input_tokens=None,
763
+ cost_per_1m_input_tokens=3.0,
764
+ cost_per_1m_output_tokens=4.0,
765
+ ),
766
+ # Tier 5 rate limits.
767
+ rate_limits=lf.ModelInfo.RateLimits(
768
+ max_requests_per_minute=10_000,
769
+ max_tokens_per_minute=10_000_000,
770
+ ),
268
771
  ),
269
- 'gpt-3.5-turbo-16k-0301': pg.Dict(
772
+ OpenAIModelInfo(
773
+ model_id='gpt-3.5-turbo-16k-0613',
270
774
  in_service=False,
271
- rpm=10000,
272
- tpm=2000000,
273
- cost_per_1k_input_tokens=0.003,
274
- cost_per_1k_output_tokens=0.004,
275
- ),
276
- # GPT-3.5 models
277
- 'text-davinci-003': pg.Dict(
278
- in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
775
+ model_type='instruction-tuned',
776
+ description='GPT 3.5 Turbo 16K model (06/13/2023).',
777
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
778
+ release_date=datetime.datetime(2023, 6, 13),
779
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
780
+ context_length=lf.ModelInfo.ContextLength(
781
+ max_input_tokens=16_385,
782
+ max_output_tokens=4_096,
783
+ ),
784
+ pricing=lf.ModelInfo.Pricing(
785
+ cost_per_1m_cached_input_tokens=None,
786
+ cost_per_1m_input_tokens=3.0,
787
+ cost_per_1m_output_tokens=4.0,
788
+ ),
789
+ # Tier 5 rate limits.
790
+ rate_limits=lf.ModelInfo.RateLimits(
791
+ max_requests_per_minute=10_000,
792
+ max_tokens_per_minute=10_000_000,
793
+ ),
279
794
  ),
280
- 'text-davinci-002': pg.Dict(
281
- in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
282
- ),
283
- 'code-davinci-002': pg.Dict(
284
- in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
285
- ),
286
- # GPT-3 instruction-tuned models (Deprecated)
287
- 'text-curie-001': pg.Dict(
288
- in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
289
- ),
290
- 'text-babbage-001': pg.Dict(
795
+ OpenAIModelInfo(
796
+ model_id='gpt-3.5-turbo-16k-0301',
291
797
  in_service=False,
292
- rpm=_DEFAULT_RPM,
293
- tpm=_DEFAULT_TPM,
798
+ model_type='instruction-tuned',
799
+ description='GPT 3.5 Turbo 16K model (03/01/2023).',
800
+ url='https://platform.openai.com/docs/models#gpt-3-5-turbo',
801
+ release_date=datetime.datetime(2023, 3, 1),
802
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
803
+ context_length=lf.ModelInfo.ContextLength(
804
+ max_input_tokens=16_385,
805
+ max_output_tokens=4_096,
806
+ ),
807
+ pricing=lf.ModelInfo.Pricing(
808
+ cost_per_1m_cached_input_tokens=None,
809
+ cost_per_1m_input_tokens=3.0,
810
+ cost_per_1m_output_tokens=4.0,
811
+ ),
812
+ # Tier 5 rate limits.
813
+ rate_limits=lf.ModelInfo.RateLimits(
814
+ max_requests_per_minute=10_000,
815
+ max_tokens_per_minute=10_000_000,
816
+ ),
294
817
  ),
295
- 'text-ada-001': pg.Dict(
818
+ # GPT 3.5 models.
819
+ OpenAIModelInfo(
820
+ model_id='text-davinci-003',
296
821
  in_service=False,
297
- rpm=_DEFAULT_RPM,
298
- tpm=_DEFAULT_TPM,
822
+ model_type='instruction-tuned',
823
+ description='ChatGPT 3.5 model.',
824
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
825
+ context_length=lf.ModelInfo.ContextLength(
826
+ max_input_tokens=16_384,
827
+ max_output_tokens=4_096,
828
+ ),
829
+ pricing=lf.ModelInfo.Pricing(
830
+ cost_per_1m_cached_input_tokens=None,
831
+ cost_per_1m_input_tokens=3.0,
832
+ cost_per_1m_output_tokens=3.0,
833
+ ),
834
+ # Tier 5 rate limits.
835
+ rate_limits=lf.ModelInfo.RateLimits(
836
+ max_requests_per_minute=None,
837
+ max_tokens_per_minute=None,
838
+ ),
299
839
  ),
300
- 'davinci': pg.Dict(
301
- in_service=False,
302
- rpm=_DEFAULT_RPM,
303
- tpm=_DEFAULT_TPM,
840
+ # GPT 3 models.
841
+ OpenAIModelInfo(
842
+ model_id='babbage-002',
843
+ in_service=True,
844
+ model_type='pretrained',
845
+ description='GPT3 base model babagge-002',
846
+ url='https://platform.openai.com/docs/models#gpt-base',
847
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
848
+ context_length=lf.ModelInfo.ContextLength(
849
+ max_input_tokens=16_384,
850
+ max_output_tokens=4_096,
851
+ ),
852
+ pricing=lf.ModelInfo.Pricing(
853
+ cost_per_1m_cached_input_tokens=None,
854
+ cost_per_1m_input_tokens=2.0,
855
+ cost_per_1m_output_tokens=2.0,
856
+ ),
857
+ # Tier 5 rate limits.
858
+ rate_limits=lf.ModelInfo.RateLimits(
859
+ max_requests_per_minute=3_000,
860
+ max_tokens_per_minute=250_000,
861
+ ),
862
+ ),
863
+ OpenAIModelInfo(
864
+ model_id='davinci-002',
865
+ in_service=True,
866
+ model_type='pretrained',
867
+ description='GPT3 base model Davinci-002 ',
868
+ url='https://platform.openai.com/docs/models#gpt-base',
869
+ input_modalities=lf.ModelInfo.TEXT_INPUT_ONLY,
870
+ context_length=lf.ModelInfo.ContextLength(
871
+ max_input_tokens=16_384,
872
+ max_output_tokens=4_096,
873
+ ),
874
+ pricing=lf.ModelInfo.Pricing(
875
+ cost_per_1m_cached_input_tokens=None,
876
+ cost_per_1m_input_tokens=2.0,
877
+ cost_per_1m_output_tokens=2.0,
878
+ ),
879
+ # Tier 5 rate limits.
880
+ rate_limits=lf.ModelInfo.RateLimits(
881
+ max_requests_per_minute=3_000,
882
+ max_tokens_per_minute=250_000,
883
+ ),
304
884
  ),
305
- 'curie': pg.Dict(in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
306
- 'babbage': pg.Dict(in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
307
- 'ada': pg.Dict(in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
308
- # GPT-3 base models that are still in service.
309
- 'babbage-002': pg.Dict(in_service=True, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
310
- 'davinci-002': pg.Dict(in_service=True, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
311
- }
885
+ ]
886
+
887
+ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
312
888
 
313
889
 
314
890
  @lf.use_init_args(['model'])
@@ -317,7 +893,7 @@ class OpenAI(openai_compatible.OpenAICompatible):
317
893
 
318
894
  model: pg.typing.Annotated[
319
895
  pg.typing.Enum(
320
- pg.MISSING_VALUE, list(SUPPORTED_MODELS_AND_SETTINGS.keys())
896
+ pg.MISSING_VALUE, list(s.model_id for s in SUPPORTED_MODELS)
321
897
  ),
322
898
  'The name of the model to use.',
323
899
  ]
@@ -355,6 +931,7 @@ class OpenAI(openai_compatible.OpenAICompatible):
355
931
  self._api_key = None
356
932
  self._organization = None
357
933
  self._project = None
934
+ self.__dict__.pop('model_info', None)
358
935
 
359
936
  def _initialize(self):
360
937
  api_key = self.api_key or os.environ.get('OPENAI_API_KEY', None)
@@ -380,41 +957,13 @@ class OpenAI(openai_compatible.OpenAICompatible):
380
957
  headers['OpenAI-Project'] = self._project
381
958
  return headers
382
959
 
383
- @property
384
- def model_id(self) -> str:
385
- """Returns a string to identify the model."""
386
- return f'OpenAI({self.model})'
387
-
388
- @property
389
- def max_concurrency(self) -> int:
390
- rpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('rpm', 0)
391
- tpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('tpm', 0)
392
- return self.rate_to_max_concurrency(
393
- requests_per_min=rpm, tokens_per_min=tpm
394
- )
395
-
396
- def estimate_cost(
397
- self,
398
- num_input_tokens: int,
399
- num_output_tokens: int
400
- ) -> float | None:
401
- """Estimate the cost based on usage."""
402
- cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
403
- 'cost_per_1k_input_tokens', None
404
- )
405
- cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
406
- 'cost_per_1k_output_tokens', None
407
- )
408
- if cost_per_1k_output_tokens is None or cost_per_1k_input_tokens is None:
409
- return None
410
- return (
411
- cost_per_1k_input_tokens * num_input_tokens
412
- + cost_per_1k_output_tokens * num_output_tokens
413
- ) / 1000
960
+ @functools.cached_property
961
+ def model_info(self) -> OpenAIModelInfo:
962
+ return _SUPPORTED_MODELS_BY_MODEL_ID[self.model]
414
963
 
415
964
  @classmethod
416
965
  def dir(cls):
417
- return [k for k, v in SUPPORTED_MODELS_AND_SETTINGS.items() if v.in_service]
966
+ return [s.model_id for s in SUPPORTED_MODELS if s.in_service]
418
967
 
419
968
  def _request_args(
420
969
  self, options: lf.LMSamplingOptions) -> dict[str, Any]:
@@ -426,17 +975,13 @@ class OpenAI(openai_compatible.OpenAICompatible):
426
975
 
427
976
  class GptO3Mini(OpenAI):
428
977
  """GPT-O3-mini."""
429
-
430
978
  model = 'o3-mini'
431
- multimodal = True
432
979
  timeout = None
433
980
 
434
981
 
435
982
  class GptO1(OpenAI):
436
983
  """GPT-O1."""
437
-
438
984
  model = 'o1'
439
- multimodal = True
440
985
  timeout = None
441
986
 
442
987
 
@@ -464,6 +1009,36 @@ class GptO1Mini_20240912(OpenAI): # pylint: disable=invalid-name
464
1009
  timeout = None
465
1010
 
466
1011
 
1012
+ class Gpt4oMini(OpenAI):
1013
+ """GPT-4o Mini."""
1014
+ model = 'gpt-4o-mini'
1015
+
1016
+
1017
+ class Gpt4oMini_20240718(OpenAI): # pylint:disable=invalid-name
1018
+ """GPT-4o Mini."""
1019
+ model = 'gpt-4o-mini-2024-07-18'
1020
+
1021
+
1022
+ class Gpt4o(OpenAI):
1023
+ """GPT-4o."""
1024
+ model = 'gpt-4o'
1025
+
1026
+
1027
+ class Gpt4o_20241120(OpenAI): # pylint:disable=invalid-name
1028
+ """GPT-4o version 2024-11-20."""
1029
+ model = 'gpt-4o-2024-11-20'
1030
+
1031
+
1032
+ class Gpt4o_20240806(OpenAI): # pylint:disable=invalid-name
1033
+ """GPT-4o version 2024-08-06."""
1034
+ model = 'gpt-4o-2024-08-06'
1035
+
1036
+
1037
+ class Gpt4o_20240513(OpenAI): # pylint:disable=invalid-name
1038
+ """GPT-4o version 2024-05-13."""
1039
+ model = 'gpt-4o-2024-05-13'
1040
+
1041
+
467
1042
  class Gpt4(OpenAI):
468
1043
  """GPT-4."""
469
1044
  model = 'gpt-4'
@@ -472,13 +1047,11 @@ class Gpt4(OpenAI):
472
1047
  class Gpt4Turbo(Gpt4):
473
1048
  """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
474
1049
  model = 'gpt-4-turbo'
475
- multimodal = True
476
1050
 
477
1051
 
478
1052
  class Gpt4Turbo_20240409(Gpt4Turbo): # pylint:disable=invalid-name
479
1053
  """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
480
1054
  model = 'gpt-4-turbo-2024-04-09'
481
- multimodal = True
482
1055
 
483
1056
 
484
1057
  class Gpt4TurboPreview(Gpt4):
@@ -496,17 +1069,6 @@ class Gpt4TurboPreview_20231106(Gpt4TurboPreview): # pylint: disable=invalid-na
496
1069
  model = 'gpt-4-1106-preview'
497
1070
 
498
1071
 
499
- class Gpt4VisionPreview(Gpt4):
500
- """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
501
- model = 'gpt-4-vision-preview'
502
- multimodal = True
503
-
504
-
505
- class Gpt4VisionPreview_20231106(Gpt4): # pylint: disable=invalid-name
506
- """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
507
- model = 'gpt-4-1106-vision-preview'
508
-
509
-
510
1072
  class Gpt4_20230613(Gpt4): # pylint:disable=invalid-name
511
1073
  """GPT-4 @20230613. 8K context window. Knowledge up to 9-2021."""
512
1074
  model = 'gpt-4-0613'
@@ -522,42 +1084,6 @@ class Gpt4_32K_20230613(Gpt4_32K): # pylint:disable=invalid-name
522
1084
  model = 'gpt-4-32k-0613'
523
1085
 
524
1086
 
525
- class Gpt4oMini(OpenAI):
526
- """GPT-4o Mini."""
527
- model = 'gpt-4o-mini'
528
- multimodal = True
529
-
530
-
531
- class Gpt4oMini_20240718(OpenAI): # pylint:disable=invalid-name
532
- """GPT-4o Mini."""
533
- model = 'gpt-4o-mini-2024-07-18'
534
- multimodal = True
535
-
536
-
537
- class Gpt4o(OpenAI):
538
- """GPT-4o."""
539
- model = 'gpt-4o'
540
- multimodal = True
541
-
542
-
543
- class Gpt4o_20241120(OpenAI): # pylint:disable=invalid-name
544
- """GPT-4o version 2024-11-20."""
545
- model = 'gpt-4o-2024-11-20'
546
- multimodal = True
547
-
548
-
549
- class Gpt4o_20240806(OpenAI): # pylint:disable=invalid-name
550
- """GPT-4o version 2024-08-06."""
551
- model = 'gpt-4o-2024-08-06'
552
- multimodal = True
553
-
554
-
555
- class Gpt4o_20240513(OpenAI): # pylint:disable=invalid-name
556
- """GPT-4o version 2024-05-13."""
557
- model = 'gpt-4o-2024-05-13'
558
- multimodal = True
559
-
560
-
561
1087
  class Gpt35(OpenAI):
562
1088
  """GPT-3.5. 4K max tokens, trained up on data up to Sep, 2021."""
563
1089
  model = 'text-davinci-003'
@@ -593,24 +1119,9 @@ class Gpt35Turbo16K_20230613(Gpt35Turbo): # pylint:disable=invalid-name
593
1119
  model = 'gpt-3.5-turbo-16k-0613'
594
1120
 
595
1121
 
596
- class Gpt3(OpenAI):
597
- """Most capable GPT-3 model (Davinci) 2K context window size.
598
-
599
- All GPT3 models have 2K max tokens and trained on data up to Oct 2019.
600
- """
601
- model = 'davinci'
602
-
603
-
604
- class Gpt3Curie(Gpt3):
605
- """Very capable, but faster and lower cost than Davici."""
606
- model = 'curie'
607
-
608
-
609
- class Gpt3Babbage(Gpt3):
610
- """Capable of straightforward tasks, very fast and low cost."""
611
- model = 'babbage'
612
-
1122
+ def _register_openai_models():
1123
+ """Registers OpenAI models."""
1124
+ for m in SUPPORTED_MODELS:
1125
+ lf.LanguageModel.register(m.model_id, OpenAI)
613
1126
 
614
- class Gpt3Ada(Gpt3):
615
- """Capable of very simple tasks, the fastest/lowest cost among GPT3 models."""
616
- model = 'ada'
1127
+ _register_openai_models()