langfun 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240429__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. langfun/__init__.py +2 -0
  2. langfun/core/__init__.py +1 -0
  3. langfun/core/coding/python/correction.py +0 -7
  4. langfun/core/component.py +6 -0
  5. langfun/core/component_test.py +1 -0
  6. langfun/core/eval/__init__.py +2 -0
  7. langfun/core/eval/base.py +202 -23
  8. langfun/core/eval/base_test.py +49 -10
  9. langfun/core/eval/matching.py +26 -9
  10. langfun/core/eval/matching_test.py +2 -1
  11. langfun/core/eval/scoring.py +15 -6
  12. langfun/core/eval/scoring_test.py +2 -1
  13. langfun/core/langfunc.py +0 -5
  14. langfun/core/langfunc_test.py +6 -4
  15. langfun/core/language_model.py +124 -24
  16. langfun/core/language_model_test.py +249 -26
  17. langfun/core/llms/__init__.py +19 -2
  18. langfun/core/llms/anthropic.py +263 -0
  19. langfun/core/llms/anthropic_test.py +167 -0
  20. langfun/core/llms/cache/in_memory_test.py +37 -28
  21. langfun/core/llms/fake.py +31 -22
  22. langfun/core/llms/fake_test.py +122 -11
  23. langfun/core/llms/google_genai_test.py +8 -3
  24. langfun/core/llms/groq.py +260 -0
  25. langfun/core/llms/groq_test.py +170 -0
  26. langfun/core/llms/llama_cpp.py +3 -1
  27. langfun/core/llms/openai.py +97 -79
  28. langfun/core/llms/openai_test.py +285 -59
  29. langfun/core/modalities/video.py +5 -2
  30. langfun/core/structured/__init__.py +3 -0
  31. langfun/core/structured/completion_test.py +2 -2
  32. langfun/core/structured/function_generation.py +245 -0
  33. langfun/core/structured/function_generation_test.py +329 -0
  34. langfun/core/structured/mapping.py +56 -2
  35. langfun/core/structured/mapping_test.py +17 -0
  36. langfun/core/structured/parsing_test.py +18 -13
  37. langfun/core/structured/prompting.py +27 -6
  38. langfun/core/structured/prompting_test.py +79 -12
  39. langfun/core/structured/schema.py +4 -2
  40. langfun/core/structured/schema_generation_test.py +2 -2
  41. langfun/core/structured/schema_test.py +4 -6
  42. langfun/core/template.py +125 -10
  43. langfun/core/template_test.py +75 -0
  44. langfun/core/templates/selfplay_test.py +6 -2
  45. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/METADATA +3 -2
  46. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/RECORD +49 -43
  47. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/LICENSE +0 -0
  48. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/WHEEL +0 -0
  49. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/top_level.txt +0 -0
@@ -26,65 +26,55 @@ from openai import openai_object
26
26
  import pyglove as pg
27
27
 
28
28
 
29
- class Usage(pg.Object):
30
- """Usage information per completion."""
31
-
32
- prompt_tokens: int
33
- completion_tokens: int
34
- total_tokens: int
35
-
36
-
37
- class LMSamplingResult(lf.LMSamplingResult):
38
- """LMSamplingResult with usage information."""
39
-
40
- usage: Usage | None = None
41
-
42
-
43
- SUPPORTED_MODELS_AND_SETTINGS = [
44
- # Model name, max concurrent requests.
45
- # The concurrent requests is estimated by TPM/RPM from
46
- # https://platform.openai.com/account/limits
47
- # GPT-4 Turbo models.
48
- ('gpt-4-turbo-preview', 1), # GPT-4 Turbo.
49
- ('gpt-4-0125-preview', 1), # GPT-4 Turbo
50
- ('gpt-4-1106-preview', 1), # GPT-4 Turbo
51
- ('gpt-4-vision-preview', 1), # GPT-4 Turbo with Vision.
52
- # GPT-4 models.
53
- ('gpt-4', 4),
54
- ('gpt-4-0613', 4),
55
- ('gpt-4-0314', 4),
56
- ('gpt-4-32k', 4),
57
- ('gpt-4-32k-0613', 4),
58
- ('gpt-4-32k-0314', 4),
59
- # GPT-3.5 Turbo models.
60
- ('gpt-3.5-turbo', 16),
61
- ('gpt-3.5-turbo-0125', 16),
62
- ('gpt-3.5-turbo-1106', 16),
63
- ('gpt-3.5-turbo-0613', 16),
64
- ('gpt-3.5-turbo-0301', 16),
65
- ('gpt-3.5-turbo-16k', 16),
66
- ('gpt-3.5-turbo-16k-0613', 16),
67
- ('gpt-3.5-turbo-16k-0301', 16),
68
- # GPT-3.5 models.
69
- ('text-davinci-003', 8), # GPT-3.5, trained with RHLF.
70
- ('text-davinci-002', 4), # Trained with SFT but no RHLF.
71
- ('code-davinci-002', 4),
72
- # GPT-3 instruction-tuned models.
73
- ('text-curie-001', 4),
74
- ('text-babbage-001', 4),
75
- ('text-ada-001', 4),
76
- ('davinci', 4),
77
- ('curie', 4),
78
- ('babbage', 4),
79
- ('ada', 4),
80
- # GPT-3 base models without instruction tuning.
81
- ('babbage-002', 4),
82
- ('davinci-002', 4),
83
- ]
84
-
85
-
86
- # Model concurreny setting.
87
- _MODEL_CONCURRENCY = {m[0]: m[1] for m in SUPPORTED_MODELS_AND_SETTINGS}
29
+ # From https://platform.openai.com/settings/organization/limits
30
+ _DEFAULT_TPM = 250000
31
+ _DEFAULT_RPM = 3000
32
+
33
+ SUPPORTED_MODELS_AND_SETTINGS = {
34
+ # Models from https://platform.openai.com/docs/models
35
+ # RPM is from https://platform.openai.com/docs/guides/rate-limits
36
+ # GPT-4-Turbo models
37
+ 'gpt-4-turbo': pg.Dict(rpm=10000, tpm=1500000),
38
+ 'gpt-4-turbo-2024-04-09': pg.Dict(rpm=10000, tpm=1500000),
39
+ 'gpt-4-turbo-preview': pg.Dict(rpm=10000, tpm=1500000),
40
+ 'gpt-4-0125-preview': pg.Dict(rpm=10000, tpm=1500000),
41
+ 'gpt-4-1106-preview': pg.Dict(rpm=10000, tpm=1500000),
42
+ 'gpt-4-vision-preview': pg.Dict(rpm=10000, tpm=1500000),
43
+ 'gpt-4-1106-vision-preview': pg.Dict(
44
+ rpm=10000, tpm=1500000
45
+ ),
46
+ # GPT-4 models
47
+ 'gpt-4': pg.Dict(rpm=10000, tpm=300000),
48
+ 'gpt-4-0613': pg.Dict(rpm=10000, tpm=300000),
49
+ 'gpt-4-0314': pg.Dict(rpm=10000, tpm=300000),
50
+ 'gpt-4-32k': pg.Dict(rpm=10000, tpm=300000),
51
+ 'gpt-4-32k-0613': pg.Dict(rpm=10000, tpm=300000),
52
+ 'gpt-4-32k-0314': pg.Dict(rpm=10000, tpm=300000),
53
+ # GPT-3.5-Turbo models
54
+ 'gpt-3.5-turbo': pg.Dict(rpm=10000, tpm=2000000),
55
+ 'gpt-3.5-turbo-0125': pg.Dict(rpm=10000, tpm=2000000),
56
+ 'gpt-3.5-turbo-1106': pg.Dict(rpm=10000, tpm=2000000),
57
+ 'gpt-3.5-turbo-0613': pg.Dict(rpm=10000, tpm=2000000),
58
+ 'gpt-3.5-turbo-0301': pg.Dict(rpm=10000, tpm=2000000),
59
+ 'gpt-3.5-turbo-16k': pg.Dict(rpm=10000, tpm=2000000),
60
+ 'gpt-3.5-turbo-16k-0613': pg.Dict(rpm=10000, tpm=2000000),
61
+ 'gpt-3.5-turbo-16k-0301': pg.Dict(rpm=10000, tpm=2000000),
62
+ # GPT-3.5 models
63
+ 'text-davinci-003': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
64
+ 'text-davinci-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
65
+ 'code-davinci-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
66
+ # GPT-3 instruction-tuned models
67
+ 'text-curie-001': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
68
+ 'text-babbage-001': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
69
+ 'text-ada-001': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
70
+ 'davinci': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
71
+ 'curie': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
72
+ 'babbage': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
73
+ 'ada': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
74
+ # GPT-3 base models
75
+ 'babbage-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
76
+ 'davinci-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
77
+ }
88
78
 
89
79
 
90
80
  @lf.use_init_args(['model'])
@@ -93,7 +83,7 @@ class OpenAI(lf.LanguageModel):
93
83
 
94
84
  model: pg.typing.Annotated[
95
85
  pg.typing.Enum(
96
- pg.MISSING_VALUE, [m[0] for m in SUPPORTED_MODELS_AND_SETTINGS]
86
+ pg.MISSING_VALUE, list(SUPPORTED_MODELS_AND_SETTINGS.keys())
97
87
  ),
98
88
  'The name of the model to use.',
99
89
  ] = 'gpt-3.5-turbo'
@@ -145,7 +135,11 @@ class OpenAI(lf.LanguageModel):
145
135
 
146
136
  @property
147
137
  def max_concurrency(self) -> int:
148
- return _MODEL_CONCURRENCY[self.model]
138
+ rpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('rpm', 0)
139
+ tpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('tpm', 0)
140
+ return self.rate_to_max_concurrency(
141
+ requests_per_min=rpm, tokens_per_min=tpm
142
+ )
149
143
 
150
144
  @classmethod
151
145
  def dir(cls):
@@ -163,8 +157,6 @@ class OpenAI(lf.LanguageModel):
163
157
  # NOTE(daiyip): options.top_k is not applicable.
164
158
  args = dict(
165
159
  n=options.n,
166
- temperature=options.temperature,
167
- max_tokens=options.max_tokens,
168
160
  stream=False,
169
161
  timeout=self.timeout,
170
162
  logprobs=options.logprobs,
@@ -173,13 +165,17 @@ class OpenAI(lf.LanguageModel):
173
165
  # Completion and ChatCompletion uses different parameter name for model.
174
166
  args['model' if self.is_chat_model else 'engine'] = self.model
175
167
 
168
+ if options.temperature is not None:
169
+ args['temperature'] = options.temperature
170
+ if options.max_tokens is not None:
171
+ args['max_tokens'] = options.max_tokens
176
172
  if options.top_p is not None:
177
173
  args['top_p'] = options.top_p
178
174
  if options.stop:
179
175
  args['stop'] = options.stop
180
176
  return args
181
177
 
182
- def _sample(self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
178
+ def _sample(self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
183
179
  assert self._api_initialized
184
180
  if self.is_chat_model:
185
181
  return self._chat_complete_batch(prompts)
@@ -187,7 +183,8 @@ class OpenAI(lf.LanguageModel):
187
183
  return self._complete_batch(prompts)
188
184
 
189
185
  def _complete_batch(
190
- self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
186
+ self, prompts: list[lf.Message]
187
+ ) -> list[lf.LMSamplingResult]:
191
188
 
192
189
  def _open_ai_completion(prompts):
193
190
  response = openai.Completion.create(
@@ -202,13 +199,13 @@ class OpenAI(lf.LanguageModel):
202
199
  lf.LMSample(choice.text.strip(), score=choice.logprobs or 0.0)
203
200
  )
204
201
 
205
- usage = Usage(
202
+ usage = lf.LMSamplingUsage(
206
203
  prompt_tokens=response.usage.prompt_tokens,
207
204
  completion_tokens=response.usage.completion_tokens,
208
205
  total_tokens=response.usage.total_tokens,
209
206
  )
210
207
  return [
211
- LMSamplingResult(
208
+ lf.LMSamplingResult(
212
209
  samples_by_index[index], usage=usage if index == 0 else None
213
210
  )
214
211
  for index in sorted(samples_by_index.keys())
@@ -220,12 +217,16 @@ class OpenAI(lf.LanguageModel):
220
217
  retry_on_errors=(
221
218
  openai_error.ServiceUnavailableError,
222
219
  openai_error.RateLimitError,
220
+ # Handling transient OpenAI server error (code 500). Check out
221
+ # https://platform.openai.com/docs/guides/error-codes/error-codes
222
+ (openai_error.APIError,
223
+ '.*The server had an error processing your request'),
223
224
  ),
224
225
  )[0]
225
226
 
226
227
  def _chat_complete_batch(
227
228
  self, prompts: list[lf.Message]
228
- ) -> list[LMSamplingResult]:
229
+ ) -> list[lf.LMSamplingResult]:
229
230
  def _open_ai_chat_completion(prompt: lf.Message):
230
231
  if self.multimodal:
231
232
  content = []
@@ -266,9 +267,9 @@ class OpenAI(lf.LanguageModel):
266
267
  )
267
268
  )
268
269
 
269
- return LMSamplingResult(
270
+ return lf.LMSamplingResult(
270
271
  samples=samples,
271
- usage=Usage(
272
+ usage=lf.LMSamplingUsage(
272
273
  prompt_tokens=response.usage.prompt_tokens,
273
274
  completion_tokens=response.usage.completion_tokens,
274
275
  total_tokens=response.usage.total_tokens,
@@ -291,26 +292,43 @@ class Gpt4(OpenAI):
291
292
 
292
293
 
293
294
  class Gpt4Turbo(Gpt4):
294
- """GPT-4 Turbo with 128K context window size. Knowledge up to 4-2023."""
295
- model = 'gpt-4-turbo-preview'
295
+ """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
296
+ model = 'gpt-4-turbo'
297
+ multimodal = True
296
298
 
297
299
 
298
- class Gpt4TurboVision(Gpt4Turbo):
299
- """GPT-4 Turbo with vision."""
300
- model = 'gpt-4-vision-preview'
300
+ class Gpt4Turbo_20240409(Gpt4Turbo): # pylint:disable=invalid-name
301
+ """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
302
+ model = 'gpt-4-turbo-2024-04-09'
301
303
  multimodal = True
302
304
 
303
305
 
304
- class Gpt4Turbo_0125(Gpt4Turbo): # pylint:disable=invalid-name
305
- """GPT-4 Turbo with 128K context window size. Knowledge up to 4-2023."""
306
+ class Gpt4TurboPreview(Gpt4):
307
+ """GPT-4 Turbo Preview with 128k context window. Knowledge up to Dec. 2023."""
308
+ model = 'gpt-4-turbo-preview'
309
+
310
+
311
+ class Gpt4TurboPreview_0125(Gpt4TurboPreview): # pylint: disable=invalid-name
312
+ """GPT-4 Turbo Preview with 128k context window. Knowledge up to Dec. 2023."""
306
313
  model = 'gpt-4-0125-preview'
307
314
 
308
315
 
309
- class Gpt4Turbo_1106(Gpt4Turbo): # pylint:disable=invalid-name
310
- """GPT-4 Turbo @20231106. 128K context window. Knowledge up to 4-2023."""
316
+ class Gpt4TurboPreview_1106(Gpt4TurboPreview): # pylint: disable=invalid-name
317
+ """GPT-4 Turbo Preview with 128k context window. Knowledge up to Apr. 2023."""
311
318
  model = 'gpt-4-1106-preview'
312
319
 
313
320
 
321
+ class Gpt4VisionPreview(Gpt4):
322
+ """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
323
+ model = 'gpt-4-vision-preview'
324
+ multimodal = True
325
+
326
+
327
+ class Gpt4VisionPreview_1106(Gpt4): # pylint: disable=invalid-name
328
+ """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
329
+ model = 'gpt-4-1106-vision-preview'
330
+
331
+
314
332
  class Gpt4_0613(Gpt4): # pylint:disable=invalid-name
315
333
  """GPT-4 @20230613. 8K context window. Knowledge up to 9-2021."""
316
334
  model = 'gpt-4-0613'