langfun 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240429__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. langfun/__init__.py +2 -0
  2. langfun/core/__init__.py +1 -0
  3. langfun/core/coding/python/correction.py +0 -7
  4. langfun/core/component.py +6 -0
  5. langfun/core/component_test.py +1 -0
  6. langfun/core/eval/__init__.py +2 -0
  7. langfun/core/eval/base.py +202 -23
  8. langfun/core/eval/base_test.py +49 -10
  9. langfun/core/eval/matching.py +26 -9
  10. langfun/core/eval/matching_test.py +2 -1
  11. langfun/core/eval/scoring.py +15 -6
  12. langfun/core/eval/scoring_test.py +2 -1
  13. langfun/core/langfunc.py +0 -5
  14. langfun/core/langfunc_test.py +6 -4
  15. langfun/core/language_model.py +124 -24
  16. langfun/core/language_model_test.py +249 -26
  17. langfun/core/llms/__init__.py +19 -2
  18. langfun/core/llms/anthropic.py +263 -0
  19. langfun/core/llms/anthropic_test.py +167 -0
  20. langfun/core/llms/cache/in_memory_test.py +37 -28
  21. langfun/core/llms/fake.py +31 -22
  22. langfun/core/llms/fake_test.py +122 -11
  23. langfun/core/llms/google_genai_test.py +8 -3
  24. langfun/core/llms/groq.py +260 -0
  25. langfun/core/llms/groq_test.py +170 -0
  26. langfun/core/llms/llama_cpp.py +3 -1
  27. langfun/core/llms/openai.py +97 -79
  28. langfun/core/llms/openai_test.py +285 -59
  29. langfun/core/modalities/video.py +5 -2
  30. langfun/core/structured/__init__.py +3 -0
  31. langfun/core/structured/completion_test.py +2 -2
  32. langfun/core/structured/function_generation.py +245 -0
  33. langfun/core/structured/function_generation_test.py +329 -0
  34. langfun/core/structured/mapping.py +56 -2
  35. langfun/core/structured/mapping_test.py +17 -0
  36. langfun/core/structured/parsing_test.py +18 -13
  37. langfun/core/structured/prompting.py +27 -6
  38. langfun/core/structured/prompting_test.py +79 -12
  39. langfun/core/structured/schema.py +4 -2
  40. langfun/core/structured/schema_generation_test.py +2 -2
  41. langfun/core/structured/schema_test.py +4 -6
  42. langfun/core/template.py +125 -10
  43. langfun/core/template_test.py +75 -0
  44. langfun/core/templates/selfplay_test.py +6 -2
  45. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/METADATA +3 -2
  46. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/RECORD +49 -43
  47. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/LICENSE +0 -0
  48. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/WHEEL +0 -0
  49. {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Tests for openai models."""
14
+ """Tests for OpenAI models."""
15
15
 
16
16
  import unittest
17
17
  from unittest import mock
@@ -32,11 +32,14 @@ def mock_completion_query(prompt, *, n=1, **kwargs):
32
32
  text=f'Sample {k} for prompt {i}.',
33
33
  logprobs=k / 10,
34
34
  ))
35
- return pg.Dict(choices=choices, usage=openai.Usage(
36
- prompt_tokens=100,
37
- completion_tokens=100,
38
- total_tokens=200,
39
- ))
35
+ return pg.Dict(
36
+ choices=choices,
37
+ usage=lf.LMSamplingUsage(
38
+ prompt_tokens=100,
39
+ completion_tokens=100,
40
+ total_tokens=200,
41
+ ),
42
+ )
40
43
 
41
44
 
42
45
  def mock_chat_completion_query(messages, *, n=1, **kwargs):
@@ -49,11 +52,14 @@ def mock_chat_completion_query(messages, *, n=1, **kwargs):
49
52
  ),
50
53
  logprobs=None,
51
54
  ))
52
- return pg.Dict(choices=choices, usage=openai.Usage(
53
- prompt_tokens=100,
54
- completion_tokens=100,
55
- total_tokens=200,
56
- ))
55
+ return pg.Dict(
56
+ choices=choices,
57
+ usage=lf.LMSamplingUsage(
58
+ prompt_tokens=100,
59
+ completion_tokens=100,
60
+ total_tokens=200,
61
+ ),
62
+ )
57
63
 
58
64
 
59
65
  def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
@@ -69,14 +75,17 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
69
75
  ),
70
76
  logprobs=None,
71
77
  ))
72
- return pg.Dict(choices=choices, usage=openai.Usage(
73
- prompt_tokens=100,
74
- completion_tokens=100,
75
- total_tokens=200,
76
- ))
78
+ return pg.Dict(
79
+ choices=choices,
80
+ usage=lf.LMSamplingUsage(
81
+ prompt_tokens=100,
82
+ completion_tokens=100,
83
+ total_tokens=200,
84
+ ),
85
+ )
77
86
 
78
87
 
79
- class OpenaiTest(unittest.TestCase):
88
+ class OpenAITest(unittest.TestCase):
80
89
  """Tests for OpenAI language model."""
81
90
 
82
91
  def test_model_id(self):
@@ -89,7 +98,7 @@ class OpenaiTest(unittest.TestCase):
89
98
  )
90
99
 
91
100
  def test_max_concurrency(self):
92
- self.assertEqual(openai.Gpt35(api_key='test_key').max_concurrency, 8)
101
+ self.assertGreater(openai.Gpt35(api_key='test_key').max_concurrency, 0)
93
102
 
94
103
  def test_get_request_args(self):
95
104
  self.assertEqual(
@@ -121,7 +130,6 @@ class OpenaiTest(unittest.TestCase):
121
130
  top_logprobs=None,
122
131
  n=1,
123
132
  temperature=1.0,
124
- max_tokens=1024,
125
133
  stream=False,
126
134
  timeout=120.0,
127
135
  stop=['\n'],
@@ -149,17 +157,19 @@ class OpenaiTest(unittest.TestCase):
149
157
  def test_call_chat_completion_vision(self):
150
158
  with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
151
159
  mock_chat_completion.side_effect = mock_chat_completion_query_vision
152
- lm = openai.Gpt4TurboVision(api_key='test_key')
153
- self.assertEqual(
154
- lm(
155
- lf.UserMessage(
156
- 'hello {{image}}',
157
- image=lf_modalities.Image.from_uri('https://fake/image')
158
- ),
159
- sampling_options=lf.LMSamplingOptions(n=2)
160
- ),
161
- 'Sample 0 for message: https://fake/image',
162
- )
160
+ lm_1 = openai.Gpt4Turbo(api_key='test_key')
161
+ lm_2 = openai.Gpt4VisionPreview(api_key='test_key')
162
+ for lm in (lm_1, lm_2):
163
+ self.assertEqual(
164
+ lm(
165
+ lf.UserMessage(
166
+ 'hello {{image}}',
167
+ image=lf_modalities.Image.from_uri('https://fake/image')
168
+ ),
169
+ sampling_options=lf.LMSamplingOptions(n=2)
170
+ ),
171
+ 'Sample 0 for message: https://fake/image',
172
+ )
163
173
 
164
174
  def test_sample_completion(self):
165
175
  with mock.patch('openai.Completion.create') as mock_completion:
@@ -170,18 +180,101 @@ class OpenaiTest(unittest.TestCase):
170
180
  )
171
181
 
172
182
  self.assertEqual(len(results), 2)
173
- self.assertEqual(results[0], openai.LMSamplingResult([
174
- lf.LMSample('Sample 0 for prompt 0.', score=0.0),
175
- lf.LMSample('Sample 1 for prompt 0.', score=0.1),
176
- lf.LMSample('Sample 2 for prompt 0.', score=0.2),
177
- ], usage=openai.Usage(
178
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
179
-
180
- self.assertEqual(results[1], openai.LMSamplingResult([
181
- lf.LMSample('Sample 0 for prompt 1.', score=0.0),
182
- lf.LMSample('Sample 1 for prompt 1.', score=0.1),
183
- lf.LMSample('Sample 2 for prompt 1.', score=0.2),
184
- ]))
183
+ self.assertEqual(
184
+ results[0],
185
+ lf.LMSamplingResult(
186
+ [
187
+ lf.LMSample(
188
+ lf.AIMessage(
189
+ 'Sample 0 for prompt 0.',
190
+ score=0.0,
191
+ logprobs=None,
192
+ usage=lf.LMSamplingUsage(
193
+ prompt_tokens=33,
194
+ completion_tokens=33,
195
+ total_tokens=66
196
+ ),
197
+ tags=[lf.Message.TAG_LM_RESPONSE],
198
+ ),
199
+ score=0.0,
200
+ logprobs=None,
201
+ ),
202
+ lf.LMSample(
203
+ lf.AIMessage(
204
+ 'Sample 1 for prompt 0.',
205
+ score=0.1,
206
+ logprobs=None,
207
+ usage=lf.LMSamplingUsage(
208
+ prompt_tokens=33,
209
+ completion_tokens=33,
210
+ total_tokens=66
211
+ ),
212
+ tags=[lf.Message.TAG_LM_RESPONSE],
213
+ ),
214
+ score=0.1,
215
+ logprobs=None,
216
+ ),
217
+ lf.LMSample(
218
+ lf.AIMessage(
219
+ 'Sample 2 for prompt 0.',
220
+ score=0.2,
221
+ logprobs=None,
222
+ usage=lf.LMSamplingUsage(
223
+ prompt_tokens=33,
224
+ completion_tokens=33,
225
+ total_tokens=66
226
+ ),
227
+ tags=[lf.Message.TAG_LM_RESPONSE],
228
+ ),
229
+ score=0.2,
230
+ logprobs=None,
231
+ ),
232
+ ],
233
+ usage=lf.LMSamplingUsage(
234
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
235
+ ),
236
+ ),
237
+ )
238
+ self.assertEqual(
239
+ results[1],
240
+ lf.LMSamplingResult(
241
+ [
242
+ lf.LMSample(
243
+ lf.AIMessage(
244
+ 'Sample 0 for prompt 1.',
245
+ score=0.0,
246
+ logprobs=None,
247
+ usage=None,
248
+ tags=[lf.Message.TAG_LM_RESPONSE],
249
+ ),
250
+ score=0.0,
251
+ logprobs=None,
252
+ ),
253
+ lf.LMSample(
254
+ lf.AIMessage(
255
+ 'Sample 1 for prompt 1.',
256
+ score=0.1,
257
+ logprobs=None,
258
+ usage=None,
259
+ tags=[lf.Message.TAG_LM_RESPONSE],
260
+ ),
261
+ score=0.1,
262
+ logprobs=None,
263
+ ),
264
+ lf.LMSample(
265
+ lf.AIMessage(
266
+ 'Sample 2 for prompt 1.',
267
+ score=0.2,
268
+ logprobs=None,
269
+ usage=None,
270
+ tags=[lf.Message.TAG_LM_RESPONSE],
271
+ ),
272
+ score=0.2,
273
+ logprobs=None,
274
+ ),
275
+ ],
276
+ ),
277
+ )
185
278
 
186
279
  def test_sample_chat_completion(self):
187
280
  with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
@@ -192,18 +285,116 @@ class OpenaiTest(unittest.TestCase):
192
285
  )
193
286
 
194
287
  self.assertEqual(len(results), 2)
195
- self.assertEqual(results[0], openai.LMSamplingResult([
196
- lf.LMSample('Sample 0 for message.', score=0.0),
197
- lf.LMSample('Sample 1 for message.', score=0.0),
198
- lf.LMSample('Sample 2 for message.', score=0.0),
199
- ], usage=openai.Usage(
200
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
201
- self.assertEqual(results[1], openai.LMSamplingResult([
202
- lf.LMSample('Sample 0 for message.', score=0.0),
203
- lf.LMSample('Sample 1 for message.', score=0.0),
204
- lf.LMSample('Sample 2 for message.', score=0.0),
205
- ], usage=openai.Usage(
206
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
288
+ self.assertEqual(
289
+ results[0],
290
+ lf.LMSamplingResult(
291
+ [
292
+ lf.LMSample(
293
+ lf.AIMessage(
294
+ 'Sample 0 for message.',
295
+ score=0.0,
296
+ logprobs=None,
297
+ usage=lf.LMSamplingUsage(
298
+ prompt_tokens=33,
299
+ completion_tokens=33,
300
+ total_tokens=66
301
+ ),
302
+ tags=[lf.Message.TAG_LM_RESPONSE],
303
+ ),
304
+ score=0.0,
305
+ logprobs=None,
306
+ ),
307
+ lf.LMSample(
308
+ lf.AIMessage(
309
+ 'Sample 1 for message.',
310
+ score=0.0,
311
+ logprobs=None,
312
+ usage=lf.LMSamplingUsage(
313
+ prompt_tokens=33,
314
+ completion_tokens=33,
315
+ total_tokens=66
316
+ ),
317
+ tags=[lf.Message.TAG_LM_RESPONSE],
318
+ ),
319
+ score=0.0,
320
+ logprobs=None,
321
+ ),
322
+ lf.LMSample(
323
+ lf.AIMessage(
324
+ 'Sample 2 for message.',
325
+ score=0.0,
326
+ logprobs=None,
327
+ usage=lf.LMSamplingUsage(
328
+ prompt_tokens=33,
329
+ completion_tokens=33,
330
+ total_tokens=66
331
+ ),
332
+ tags=[lf.Message.TAG_LM_RESPONSE],
333
+ ),
334
+ score=0.0,
335
+ logprobs=None,
336
+ ),
337
+ ],
338
+ usage=lf.LMSamplingUsage(
339
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
340
+ ),
341
+ ),
342
+ )
343
+ self.assertEqual(
344
+ results[1],
345
+ lf.LMSamplingResult(
346
+ [
347
+ lf.LMSample(
348
+ lf.AIMessage(
349
+ 'Sample 0 for message.',
350
+ score=0.0,
351
+ logprobs=None,
352
+ usage=lf.LMSamplingUsage(
353
+ prompt_tokens=33,
354
+ completion_tokens=33,
355
+ total_tokens=66
356
+ ),
357
+ tags=[lf.Message.TAG_LM_RESPONSE],
358
+ ),
359
+ score=0.0,
360
+ logprobs=None,
361
+ ),
362
+ lf.LMSample(
363
+ lf.AIMessage(
364
+ 'Sample 1 for message.',
365
+ score=0.0,
366
+ logprobs=None,
367
+ usage=lf.LMSamplingUsage(
368
+ prompt_tokens=33,
369
+ completion_tokens=33,
370
+ total_tokens=66
371
+ ),
372
+ tags=[lf.Message.TAG_LM_RESPONSE],
373
+ ),
374
+ score=0.0,
375
+ logprobs=None,
376
+ ),
377
+ lf.LMSample(
378
+ lf.AIMessage(
379
+ 'Sample 2 for message.',
380
+ score=0.0,
381
+ logprobs=None,
382
+ usage=lf.LMSamplingUsage(
383
+ prompt_tokens=33,
384
+ completion_tokens=33,
385
+ total_tokens=66
386
+ ),
387
+ tags=[lf.Message.TAG_LM_RESPONSE],
388
+ ),
389
+ score=0.0,
390
+ logprobs=None,
391
+ ),
392
+ ],
393
+ usage=lf.LMSamplingUsage(
394
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
395
+ ),
396
+ ),
397
+ )
207
398
 
208
399
  def test_sample_with_contextual_options(self):
209
400
  with mock.patch('openai.Completion.create') as mock_completion:
@@ -213,11 +404,46 @@ class OpenaiTest(unittest.TestCase):
213
404
  results = lm.sample(['hello'])
214
405
 
215
406
  self.assertEqual(len(results), 1)
216
- self.assertEqual(results[0], openai.LMSamplingResult([
217
- lf.LMSample('Sample 0 for prompt 0.', score=0.0),
218
- lf.LMSample('Sample 1 for prompt 0.', score=0.1),
219
- ], usage=openai.Usage(
220
- prompt_tokens=100, completion_tokens=100, total_tokens=200)))
407
+ self.assertEqual(
408
+ results[0],
409
+ lf.LMSamplingResult(
410
+ [
411
+ lf.LMSample(
412
+ lf.AIMessage(
413
+ 'Sample 0 for prompt 0.',
414
+ score=0.0,
415
+ logprobs=None,
416
+ usage=lf.LMSamplingUsage(
417
+ prompt_tokens=50,
418
+ completion_tokens=50,
419
+ total_tokens=100,
420
+ ),
421
+ tags=[lf.Message.TAG_LM_RESPONSE],
422
+ ),
423
+ score=0.0,
424
+ logprobs=None,
425
+ ),
426
+ lf.LMSample(
427
+ lf.AIMessage(
428
+ 'Sample 1 for prompt 0.',
429
+ score=0.1,
430
+ logprobs=None,
431
+ usage=lf.LMSamplingUsage(
432
+ prompt_tokens=50,
433
+ completion_tokens=50,
434
+ total_tokens=100,
435
+ ),
436
+ tags=[lf.Message.TAG_LM_RESPONSE],
437
+ ),
438
+ score=0.1,
439
+ logprobs=None,
440
+ ),
441
+ ],
442
+ usage=lf.LMSamplingUsage(
443
+ prompt_tokens=100, completion_tokens=100, total_tokens=200
444
+ ),
445
+ ),
446
+ )
221
447
 
222
448
 
223
449
  if __name__ == '__main__':
@@ -15,9 +15,7 @@
15
15
 
16
16
  import base64
17
17
  from typing import cast
18
-
19
18
  from langfun.core.modalities import mime
20
- import magic
21
19
 
22
20
 
23
21
  class Video(mime.MimeType):
@@ -29,6 +27,11 @@ class Video(mime.MimeType):
29
27
 
30
28
  @property
31
29
  def mime_type(self) -> str:
30
+ # TODO(daiyip): after cl/619658455, LaunchPad binaries cannot import `magic`
31
+ # correctly. This is to mitigate the issue for major Langfun users who do
32
+ # not use Video. We shall move this import out once the issue is fixed.
33
+ import magic # pylint: disable=g-import-not-at-top
34
+
32
35
  video_mime_type = magic.from_buffer(self.to_bytes(), mime=True)
33
36
  if 'video/' not in video_mime_type:
34
37
  raise ValueError(f'Not a video: {video_mime_type!r}.')
@@ -48,7 +48,10 @@ from langfun.core.structured.schema_generation import generate_class
48
48
  from langfun.core.structured.schema_generation import classgen_example
49
49
  from langfun.core.structured.schema_generation import default_classgen_examples
50
50
 
51
+ from langfun.core.structured.function_generation import function_gen
52
+
51
53
  from langfun.core.structured.mapping import Mapping
54
+ from langfun.core.structured.mapping import MappingError
52
55
  from langfun.core.structured.mapping import MappingExample
53
56
 
54
57
  from langfun.core.structured.parsing import ParseStructure
@@ -17,7 +17,6 @@ import inspect
17
17
  import unittest
18
18
 
19
19
  import langfun.core as lf
20
- from langfun.core import coding
21
20
  from langfun.core import modalities
22
21
  from langfun.core.llms import fake
23
22
  from langfun.core.structured import completion
@@ -583,6 +582,7 @@ class CompleteStructureTest(unittest.TestCase):
583
582
  result=Activity(description='foo'),
584
583
  score=1.0,
585
584
  logprobs=None,
585
+ usage=lf.LMSamplingUsage(553, 27, 580),
586
586
  tags=['lm-response', 'lm-output', 'transformed']
587
587
  )
588
588
  )
@@ -607,7 +607,7 @@ class CompleteStructureTest(unittest.TestCase):
607
607
  override_attrs=True,
608
608
  ):
609
609
  with self.assertRaisesRegex(
610
- coding.CodeError,
610
+ mapping.MappingError,
611
611
  'Expect .* but encountered .*',
612
612
  ):
613
613
  completion.complete(Activity.partial(), autofix=0)