langfun 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240511__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (59) hide show
  1. langfun/__init__.py +7 -0
  2. langfun/core/__init__.py +1 -0
  3. langfun/core/coding/python/correction.py +0 -7
  4. langfun/core/component.py +6 -0
  5. langfun/core/component_test.py +1 -0
  6. langfun/core/eval/__init__.py +15 -0
  7. langfun/core/eval/base.py +665 -95
  8. langfun/core/eval/base_test.py +224 -53
  9. langfun/core/eval/matching.py +48 -30
  10. langfun/core/eval/matching_test.py +25 -3
  11. langfun/core/eval/patching.py +130 -0
  12. langfun/core/eval/patching_test.py +170 -0
  13. langfun/core/eval/scoring.py +19 -10
  14. langfun/core/eval/scoring_test.py +21 -3
  15. langfun/core/langfunc.py +1 -22
  16. langfun/core/langfunc_test.py +10 -4
  17. langfun/core/language_model.py +130 -24
  18. langfun/core/language_model_test.py +249 -26
  19. langfun/core/llms/__init__.py +27 -2
  20. langfun/core/llms/anthropic.py +263 -0
  21. langfun/core/llms/anthropic_test.py +167 -0
  22. langfun/core/llms/cache/in_memory_test.py +37 -28
  23. langfun/core/llms/fake.py +34 -25
  24. langfun/core/llms/fake_test.py +122 -11
  25. langfun/core/llms/google_genai.py +8 -0
  26. langfun/core/llms/google_genai_test.py +8 -3
  27. langfun/core/llms/groq.py +260 -0
  28. langfun/core/llms/groq_test.py +170 -0
  29. langfun/core/llms/llama_cpp.py +3 -1
  30. langfun/core/llms/openai.py +100 -81
  31. langfun/core/llms/openai_test.py +287 -60
  32. langfun/core/llms/vertexai.py +291 -0
  33. langfun/core/llms/vertexai_test.py +233 -0
  34. langfun/core/modalities/image.py +1 -3
  35. langfun/core/modalities/mime.py +6 -0
  36. langfun/core/modalities/video.py +6 -5
  37. langfun/core/structured/__init__.py +5 -0
  38. langfun/core/structured/completion_test.py +2 -2
  39. langfun/core/structured/function_generation.py +245 -0
  40. langfun/core/structured/function_generation_test.py +329 -0
  41. langfun/core/structured/mapping.py +61 -3
  42. langfun/core/structured/mapping_test.py +17 -0
  43. langfun/core/structured/parsing_test.py +18 -13
  44. langfun/core/structured/prompting.py +61 -12
  45. langfun/core/structured/prompting_test.py +122 -12
  46. langfun/core/structured/schema.py +38 -6
  47. langfun/core/structured/schema_generation_test.py +2 -2
  48. langfun/core/structured/schema_test.py +36 -7
  49. langfun/core/structured/scoring.py +4 -1
  50. langfun/core/structured/scoring_test.py +6 -0
  51. langfun/core/template.py +147 -11
  52. langfun/core/template_test.py +75 -0
  53. langfun/core/templates/selfplay_test.py +6 -2
  54. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/METADATA +3 -2
  55. langfun-0.0.2.dev20240511.dist-info/RECORD +112 -0
  56. langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
  57. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/LICENSE +0 -0
  58. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/WHEEL +0 -0
  59. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Tests for openai models."""
14
+ """Tests for OpenAI models."""
15
15
 
16
16
  import unittest
17
17
  from unittest import mock
@@ -32,11 +32,14 @@ def mock_completion_query(prompt, *, n=1, **kwargs):
32
32
  text=f'Sample {k} for prompt {i}.',
33
33
  logprobs=k / 10,
34
34
  ))
35
- return pg.Dict(choices=choices, usage=openai.Usage(
36
- prompt_tokens=100,
37
- completion_tokens=100,
38
- total_tokens=200,
39
- ))
35
+ return pg.Dict(
36
+ choices=choices,
37
+ usage=lf.LMSamplingUsage(
38
+ prompt_tokens=100,
39
+ completion_tokens=100,
40
+ total_tokens=200,
41
+ ),
42
+ )
40
43
 
41
44
 
42
45
  def mock_chat_completion_query(messages, *, n=1, **kwargs):
@@ -49,18 +52,22 @@ def mock_chat_completion_query(messages, *, n=1, **kwargs):
49
52
  ),
50
53
  logprobs=None,
51
54
  ))
52
- return pg.Dict(choices=choices, usage=openai.Usage(
53
- prompt_tokens=100,
54
- completion_tokens=100,
55
- total_tokens=200,
56
- ))
55
+ return pg.Dict(
56
+ choices=choices,
57
+ usage=lf.LMSamplingUsage(
58
+ prompt_tokens=100,
59
+ completion_tokens=100,
60
+ total_tokens=200,
61
+ ),
62
+ )
57
63
 
58
64
 
59
65
  def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
60
66
  del kwargs
61
67
  choices = []
62
68
  urls = [
63
- c['image_url'] for c in messages[0]['content'] if c['type'] == 'image_url'
69
+ c['image_url']['url']
70
+ for c in messages[0]['content'] if c['type'] == 'image_url'
64
71
  ]
65
72
  for k in range(n):
66
73
  choices.append(pg.Dict(
@@ -69,14 +76,17 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
69
76
  ),
70
77
  logprobs=None,
71
78
  ))
72
- return pg.Dict(choices=choices, usage=openai.Usage(
73
- prompt_tokens=100,
74
- completion_tokens=100,
75
- total_tokens=200,
76
- ))
79
+ return pg.Dict(
80
+ choices=choices,
81
+ usage=lf.LMSamplingUsage(
82
+ prompt_tokens=100,
83
+ completion_tokens=100,
84
+ total_tokens=200,
85
+ ),
86
+ )
77
87
 
78
88
 
79
- class OpenaiTest(unittest.TestCase):
89
+ class OpenAITest(unittest.TestCase):
80
90
  """Tests for OpenAI language model."""
81
91
 
82
92
  def test_model_id(self):
@@ -89,7 +99,7 @@ class OpenaiTest(unittest.TestCase):
89
99
  )
90
100
 
91
101
  def test_max_concurrency(self):
92
- self.assertEqual(openai.Gpt35(api_key='test_key').max_concurrency, 8)
102
+ self.assertGreater(openai.Gpt35(api_key='test_key').max_concurrency, 0)
93
103
 
94
104
  def test_get_request_args(self):
95
105
  self.assertEqual(
@@ -121,7 +131,6 @@ class OpenaiTest(unittest.TestCase):
121
131
  top_logprobs=None,
122
132
  n=1,
123
133
  temperature=1.0,
124
- max_tokens=1024,
125
134
  stream=False,
126
135
  timeout=120.0,
127
136
  stop=['\n'],
@@ -149,17 +158,19 @@ class OpenaiTest(unittest.TestCase):
149
158
  def test_call_chat_completion_vision(self):
150
159
  with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
151
160
  mock_chat_completion.side_effect = mock_chat_completion_query_vision
152
- lm = openai.Gpt4TurboVision(api_key='test_key')
153
- self.assertEqual(
154
- lm(
155
- lf.UserMessage(
156
- 'hello {{image}}',
157
- image=lf_modalities.Image.from_uri('https://fake/image')
158
- ),
159
- sampling_options=lf.LMSamplingOptions(n=2)
160
- ),
161
- 'Sample 0 for message: https://fake/image',
162
- )
161
+ lm_1 = openai.Gpt4Turbo(api_key='test_key')
162
+ lm_2 = openai.Gpt4VisionPreview(api_key='test_key')
163
+ for lm in (lm_1, lm_2):
164
+ self.assertEqual(
165
+ lm(
166
+ lf.UserMessage(
167
+ 'hello {{image}}',
168
+ image=lf_modalities.Image.from_uri('https://fake/image')
169
+ ),
170
+ sampling_options=lf.LMSamplingOptions(n=2)
171
+ ),
172
+ 'Sample 0 for message: https://fake/image',
173
+ )
163
174
 
164
175
  def test_sample_completion(self):
165
176
  with mock.patch('openai.Completion.create') as mock_completion:
@@ -170,18 +181,101 @@ class OpenaiTest(unittest.TestCase):
170
181
  )
171
182
 
172
183
  self.assertEqual(len(results), 2)
173
- self.assertEqual(results[0], openai.LMSamplingResult([
174
- lf.LMSample('Sample 0 for prompt 0.', score=0.0),
175
- lf.LMSample('Sample 1 for prompt 0.', score=0.1),
176
- lf.LMSample('Sample 2 for prompt 0.', score=0.2),
177
- ], usage=openai.Usage(
178
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
179
-
180
- self.assertEqual(results[1], openai.LMSamplingResult([
181
- lf.LMSample('Sample 0 for prompt 1.', score=0.0),
182
- lf.LMSample('Sample 1 for prompt 1.', score=0.1),
183
- lf.LMSample('Sample 2 for prompt 1.', score=0.2),
184
- ]))
184
+ self.assertEqual(
185
+ results[0],
186
+ lf.LMSamplingResult(
187
+ [
188
+ lf.LMSample(
189
+ lf.AIMessage(
190
+ 'Sample 0 for prompt 0.',
191
+ score=0.0,
192
+ logprobs=None,
193
+ usage=lf.LMSamplingUsage(
194
+ prompt_tokens=33,
195
+ completion_tokens=33,
196
+ total_tokens=66
197
+ ),
198
+ tags=[lf.Message.TAG_LM_RESPONSE],
199
+ ),
200
+ score=0.0,
201
+ logprobs=None,
202
+ ),
203
+ lf.LMSample(
204
+ lf.AIMessage(
205
+ 'Sample 1 for prompt 0.',
206
+ score=0.1,
207
+ logprobs=None,
208
+ usage=lf.LMSamplingUsage(
209
+ prompt_tokens=33,
210
+ completion_tokens=33,
211
+ total_tokens=66
212
+ ),
213
+ tags=[lf.Message.TAG_LM_RESPONSE],
214
+ ),
215
+ score=0.1,
216
+ logprobs=None,
217
+ ),
218
+ lf.LMSample(
219
+ lf.AIMessage(
220
+ 'Sample 2 for prompt 0.',
221
+ score=0.2,
222
+ logprobs=None,
223
+ usage=lf.LMSamplingUsage(
224
+ prompt_tokens=33,
225
+ completion_tokens=33,
226
+ total_tokens=66
227
+ ),
228
+ tags=[lf.Message.TAG_LM_RESPONSE],
229
+ ),
230
+ score=0.2,
231
+ logprobs=None,
232
+ ),
233
+ ],
234
+ usage=lf.LMSamplingUsage(
235
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
236
+ ),
237
+ ),
238
+ )
239
+ self.assertEqual(
240
+ results[1],
241
+ lf.LMSamplingResult(
242
+ [
243
+ lf.LMSample(
244
+ lf.AIMessage(
245
+ 'Sample 0 for prompt 1.',
246
+ score=0.0,
247
+ logprobs=None,
248
+ usage=None,
249
+ tags=[lf.Message.TAG_LM_RESPONSE],
250
+ ),
251
+ score=0.0,
252
+ logprobs=None,
253
+ ),
254
+ lf.LMSample(
255
+ lf.AIMessage(
256
+ 'Sample 1 for prompt 1.',
257
+ score=0.1,
258
+ logprobs=None,
259
+ usage=None,
260
+ tags=[lf.Message.TAG_LM_RESPONSE],
261
+ ),
262
+ score=0.1,
263
+ logprobs=None,
264
+ ),
265
+ lf.LMSample(
266
+ lf.AIMessage(
267
+ 'Sample 2 for prompt 1.',
268
+ score=0.2,
269
+ logprobs=None,
270
+ usage=None,
271
+ tags=[lf.Message.TAG_LM_RESPONSE],
272
+ ),
273
+ score=0.2,
274
+ logprobs=None,
275
+ ),
276
+ ],
277
+ ),
278
+ )
185
279
 
186
280
  def test_sample_chat_completion(self):
187
281
  with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
@@ -192,18 +286,116 @@ class OpenaiTest(unittest.TestCase):
192
286
  )
193
287
 
194
288
  self.assertEqual(len(results), 2)
195
- self.assertEqual(results[0], openai.LMSamplingResult([
196
- lf.LMSample('Sample 0 for message.', score=0.0),
197
- lf.LMSample('Sample 1 for message.', score=0.0),
198
- lf.LMSample('Sample 2 for message.', score=0.0),
199
- ], usage=openai.Usage(
200
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
201
- self.assertEqual(results[1], openai.LMSamplingResult([
202
- lf.LMSample('Sample 0 for message.', score=0.0),
203
- lf.LMSample('Sample 1 for message.', score=0.0),
204
- lf.LMSample('Sample 2 for message.', score=0.0),
205
- ], usage=openai.Usage(
206
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
289
+ self.assertEqual(
290
+ results[0],
291
+ lf.LMSamplingResult(
292
+ [
293
+ lf.LMSample(
294
+ lf.AIMessage(
295
+ 'Sample 0 for message.',
296
+ score=0.0,
297
+ logprobs=None,
298
+ usage=lf.LMSamplingUsage(
299
+ prompt_tokens=33,
300
+ completion_tokens=33,
301
+ total_tokens=66
302
+ ),
303
+ tags=[lf.Message.TAG_LM_RESPONSE],
304
+ ),
305
+ score=0.0,
306
+ logprobs=None,
307
+ ),
308
+ lf.LMSample(
309
+ lf.AIMessage(
310
+ 'Sample 1 for message.',
311
+ score=0.0,
312
+ logprobs=None,
313
+ usage=lf.LMSamplingUsage(
314
+ prompt_tokens=33,
315
+ completion_tokens=33,
316
+ total_tokens=66
317
+ ),
318
+ tags=[lf.Message.TAG_LM_RESPONSE],
319
+ ),
320
+ score=0.0,
321
+ logprobs=None,
322
+ ),
323
+ lf.LMSample(
324
+ lf.AIMessage(
325
+ 'Sample 2 for message.',
326
+ score=0.0,
327
+ logprobs=None,
328
+ usage=lf.LMSamplingUsage(
329
+ prompt_tokens=33,
330
+ completion_tokens=33,
331
+ total_tokens=66
332
+ ),
333
+ tags=[lf.Message.TAG_LM_RESPONSE],
334
+ ),
335
+ score=0.0,
336
+ logprobs=None,
337
+ ),
338
+ ],
339
+ usage=lf.LMSamplingUsage(
340
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
341
+ ),
342
+ ),
343
+ )
344
+ self.assertEqual(
345
+ results[1],
346
+ lf.LMSamplingResult(
347
+ [
348
+ lf.LMSample(
349
+ lf.AIMessage(
350
+ 'Sample 0 for message.',
351
+ score=0.0,
352
+ logprobs=None,
353
+ usage=lf.LMSamplingUsage(
354
+ prompt_tokens=33,
355
+ completion_tokens=33,
356
+ total_tokens=66
357
+ ),
358
+ tags=[lf.Message.TAG_LM_RESPONSE],
359
+ ),
360
+ score=0.0,
361
+ logprobs=None,
362
+ ),
363
+ lf.LMSample(
364
+ lf.AIMessage(
365
+ 'Sample 1 for message.',
366
+ score=0.0,
367
+ logprobs=None,
368
+ usage=lf.LMSamplingUsage(
369
+ prompt_tokens=33,
370
+ completion_tokens=33,
371
+ total_tokens=66
372
+ ),
373
+ tags=[lf.Message.TAG_LM_RESPONSE],
374
+ ),
375
+ score=0.0,
376
+ logprobs=None,
377
+ ),
378
+ lf.LMSample(
379
+ lf.AIMessage(
380
+ 'Sample 2 for message.',
381
+ score=0.0,
382
+ logprobs=None,
383
+ usage=lf.LMSamplingUsage(
384
+ prompt_tokens=33,
385
+ completion_tokens=33,
386
+ total_tokens=66
387
+ ),
388
+ tags=[lf.Message.TAG_LM_RESPONSE],
389
+ ),
390
+ score=0.0,
391
+ logprobs=None,
392
+ ),
393
+ ],
394
+ usage=lf.LMSamplingUsage(
395
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
396
+ ),
397
+ ),
398
+ )
207
399
 
208
400
  def test_sample_with_contextual_options(self):
209
401
  with mock.patch('openai.Completion.create') as mock_completion:
@@ -213,11 +405,46 @@ class OpenaiTest(unittest.TestCase):
213
405
  results = lm.sample(['hello'])
214
406
 
215
407
  self.assertEqual(len(results), 1)
216
- self.assertEqual(results[0], openai.LMSamplingResult([
217
- lf.LMSample('Sample 0 for prompt 0.', score=0.0),
218
- lf.LMSample('Sample 1 for prompt 0.', score=0.1),
219
- ], usage=openai.Usage(
220
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
408
+ self.assertEqual(
409
+ results[0],
410
+ lf.LMSamplingResult(
411
+ [
412
+ lf.LMSample(
413
+ lf.AIMessage(
414
+ 'Sample 0 for prompt 0.',
415
+ score=0.0,
416
+ logprobs=None,
417
+ usage=lf.LMSamplingUsage(
418
+ prompt_tokens=50,
419
+ completion_tokens=50,
420
+ total_tokens=100,
421
+ ),
422
+ tags=[lf.Message.TAG_LM_RESPONSE],
423
+ ),
424
+ score=0.0,
425
+ logprobs=None,
426
+ ),
427
+ lf.LMSample(
428
+ lf.AIMessage(
429
+ 'Sample 1 for prompt 0.',
430
+ score=0.1,
431
+ logprobs=None,
432
+ usage=lf.LMSamplingUsage(
433
+ prompt_tokens=50,
434
+ completion_tokens=50,
435
+ total_tokens=100,
436
+ ),
437
+ tags=[lf.Message.TAG_LM_RESPONSE],
438
+ ),
439
+ score=0.1,
440
+ logprobs=None,
441
+ ),
442
+ ],
443
+ usage=lf.LMSamplingUsage(
444
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
445
+ ),
446
+ ),
447
+ )
221
448
 
222
449
 
223
450
  if __name__ == '__main__':