langfun 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240429__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/__init__.py +2 -0
- langfun/core/__init__.py +1 -0
- langfun/core/coding/python/correction.py +0 -7
- langfun/core/component.py +6 -0
- langfun/core/component_test.py +1 -0
- langfun/core/eval/__init__.py +2 -0
- langfun/core/eval/base.py +202 -23
- langfun/core/eval/base_test.py +49 -10
- langfun/core/eval/matching.py +26 -9
- langfun/core/eval/matching_test.py +2 -1
- langfun/core/eval/scoring.py +15 -6
- langfun/core/eval/scoring_test.py +2 -1
- langfun/core/langfunc.py +0 -5
- langfun/core/langfunc_test.py +6 -4
- langfun/core/language_model.py +124 -24
- langfun/core/language_model_test.py +249 -26
- langfun/core/llms/__init__.py +19 -2
- langfun/core/llms/anthropic.py +263 -0
- langfun/core/llms/anthropic_test.py +167 -0
- langfun/core/llms/cache/in_memory_test.py +37 -28
- langfun/core/llms/fake.py +31 -22
- langfun/core/llms/fake_test.py +122 -11
- langfun/core/llms/google_genai_test.py +8 -3
- langfun/core/llms/groq.py +260 -0
- langfun/core/llms/groq_test.py +170 -0
- langfun/core/llms/llama_cpp.py +3 -1
- langfun/core/llms/openai.py +97 -79
- langfun/core/llms/openai_test.py +285 -59
- langfun/core/modalities/video.py +5 -2
- langfun/core/structured/__init__.py +3 -0
- langfun/core/structured/completion_test.py +2 -2
- langfun/core/structured/function_generation.py +245 -0
- langfun/core/structured/function_generation_test.py +329 -0
- langfun/core/structured/mapping.py +56 -2
- langfun/core/structured/mapping_test.py +17 -0
- langfun/core/structured/parsing_test.py +18 -13
- langfun/core/structured/prompting.py +27 -6
- langfun/core/structured/prompting_test.py +79 -12
- langfun/core/structured/schema.py +4 -2
- langfun/core/structured/schema_generation_test.py +2 -2
- langfun/core/structured/schema_test.py +4 -6
- langfun/core/template.py +125 -10
- langfun/core/template_test.py +75 -0
- langfun/core/templates/selfplay_test.py +6 -2
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/METADATA +3 -2
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/RECORD +49 -43
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/LICENSE +0 -0
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/WHEEL +0 -0
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240429.dist-info}/top_level.txt +0 -0
langfun/core/llms/openai_test.py
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
"""Tests for
|
14
|
+
"""Tests for OpenAI models."""
|
15
15
|
|
16
16
|
import unittest
|
17
17
|
from unittest import mock
|
@@ -32,11 +32,14 @@ def mock_completion_query(prompt, *, n=1, **kwargs):
|
|
32
32
|
text=f'Sample {k} for prompt {i}.',
|
33
33
|
logprobs=k / 10,
|
34
34
|
))
|
35
|
-
return pg.Dict(
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
return pg.Dict(
|
36
|
+
choices=choices,
|
37
|
+
usage=lf.LMSamplingUsage(
|
38
|
+
prompt_tokens=100,
|
39
|
+
completion_tokens=100,
|
40
|
+
total_tokens=200,
|
41
|
+
),
|
42
|
+
)
|
40
43
|
|
41
44
|
|
42
45
|
def mock_chat_completion_query(messages, *, n=1, **kwargs):
|
@@ -49,11 +52,14 @@ def mock_chat_completion_query(messages, *, n=1, **kwargs):
|
|
49
52
|
),
|
50
53
|
logprobs=None,
|
51
54
|
))
|
52
|
-
return pg.Dict(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
55
|
+
return pg.Dict(
|
56
|
+
choices=choices,
|
57
|
+
usage=lf.LMSamplingUsage(
|
58
|
+
prompt_tokens=100,
|
59
|
+
completion_tokens=100,
|
60
|
+
total_tokens=200,
|
61
|
+
),
|
62
|
+
)
|
57
63
|
|
58
64
|
|
59
65
|
def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
|
@@ -69,14 +75,17 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
|
|
69
75
|
),
|
70
76
|
logprobs=None,
|
71
77
|
))
|
72
|
-
return pg.Dict(
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
78
|
+
return pg.Dict(
|
79
|
+
choices=choices,
|
80
|
+
usage=lf.LMSamplingUsage(
|
81
|
+
prompt_tokens=100,
|
82
|
+
completion_tokens=100,
|
83
|
+
total_tokens=200,
|
84
|
+
),
|
85
|
+
)
|
77
86
|
|
78
87
|
|
79
|
-
class
|
88
|
+
class OpenAITest(unittest.TestCase):
|
80
89
|
"""Tests for OpenAI language model."""
|
81
90
|
|
82
91
|
def test_model_id(self):
|
@@ -89,7 +98,7 @@ class OpenaiTest(unittest.TestCase):
|
|
89
98
|
)
|
90
99
|
|
91
100
|
def test_max_concurrency(self):
|
92
|
-
self.
|
101
|
+
self.assertGreater(openai.Gpt35(api_key='test_key').max_concurrency, 0)
|
93
102
|
|
94
103
|
def test_get_request_args(self):
|
95
104
|
self.assertEqual(
|
@@ -121,7 +130,6 @@ class OpenaiTest(unittest.TestCase):
|
|
121
130
|
top_logprobs=None,
|
122
131
|
n=1,
|
123
132
|
temperature=1.0,
|
124
|
-
max_tokens=1024,
|
125
133
|
stream=False,
|
126
134
|
timeout=120.0,
|
127
135
|
stop=['\n'],
|
@@ -149,17 +157,19 @@ class OpenaiTest(unittest.TestCase):
|
|
149
157
|
def test_call_chat_completion_vision(self):
|
150
158
|
with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
|
151
159
|
mock_chat_completion.side_effect = mock_chat_completion_query_vision
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
160
|
+
lm_1 = openai.Gpt4Turbo(api_key='test_key')
|
161
|
+
lm_2 = openai.Gpt4VisionPreview(api_key='test_key')
|
162
|
+
for lm in (lm_1, lm_2):
|
163
|
+
self.assertEqual(
|
164
|
+
lm(
|
165
|
+
lf.UserMessage(
|
166
|
+
'hello {{image}}',
|
167
|
+
image=lf_modalities.Image.from_uri('https://fake/image')
|
168
|
+
),
|
169
|
+
sampling_options=lf.LMSamplingOptions(n=2)
|
170
|
+
),
|
171
|
+
'Sample 0 for message: https://fake/image',
|
172
|
+
)
|
163
173
|
|
164
174
|
def test_sample_completion(self):
|
165
175
|
with mock.patch('openai.Completion.create') as mock_completion:
|
@@ -170,18 +180,101 @@ class OpenaiTest(unittest.TestCase):
|
|
170
180
|
)
|
171
181
|
|
172
182
|
self.assertEqual(len(results), 2)
|
173
|
-
self.assertEqual(
|
174
|
-
|
175
|
-
lf.
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
183
|
+
self.assertEqual(
|
184
|
+
results[0],
|
185
|
+
lf.LMSamplingResult(
|
186
|
+
[
|
187
|
+
lf.LMSample(
|
188
|
+
lf.AIMessage(
|
189
|
+
'Sample 0 for prompt 0.',
|
190
|
+
score=0.0,
|
191
|
+
logprobs=None,
|
192
|
+
usage=lf.LMSamplingUsage(
|
193
|
+
prompt_tokens=33,
|
194
|
+
completion_tokens=33,
|
195
|
+
total_tokens=66
|
196
|
+
),
|
197
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
198
|
+
),
|
199
|
+
score=0.0,
|
200
|
+
logprobs=None,
|
201
|
+
),
|
202
|
+
lf.LMSample(
|
203
|
+
lf.AIMessage(
|
204
|
+
'Sample 1 for prompt 0.',
|
205
|
+
score=0.1,
|
206
|
+
logprobs=None,
|
207
|
+
usage=lf.LMSamplingUsage(
|
208
|
+
prompt_tokens=33,
|
209
|
+
completion_tokens=33,
|
210
|
+
total_tokens=66
|
211
|
+
),
|
212
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
213
|
+
),
|
214
|
+
score=0.1,
|
215
|
+
logprobs=None,
|
216
|
+
),
|
217
|
+
lf.LMSample(
|
218
|
+
lf.AIMessage(
|
219
|
+
'Sample 2 for prompt 0.',
|
220
|
+
score=0.2,
|
221
|
+
logprobs=None,
|
222
|
+
usage=lf.LMSamplingUsage(
|
223
|
+
prompt_tokens=33,
|
224
|
+
completion_tokens=33,
|
225
|
+
total_tokens=66
|
226
|
+
),
|
227
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
228
|
+
),
|
229
|
+
score=0.2,
|
230
|
+
logprobs=None,
|
231
|
+
),
|
232
|
+
],
|
233
|
+
usage=lf.LMSamplingUsage(
|
234
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
235
|
+
),
|
236
|
+
),
|
237
|
+
)
|
238
|
+
self.assertEqual(
|
239
|
+
results[1],
|
240
|
+
lf.LMSamplingResult(
|
241
|
+
[
|
242
|
+
lf.LMSample(
|
243
|
+
lf.AIMessage(
|
244
|
+
'Sample 0 for prompt 1.',
|
245
|
+
score=0.0,
|
246
|
+
logprobs=None,
|
247
|
+
usage=None,
|
248
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
249
|
+
),
|
250
|
+
score=0.0,
|
251
|
+
logprobs=None,
|
252
|
+
),
|
253
|
+
lf.LMSample(
|
254
|
+
lf.AIMessage(
|
255
|
+
'Sample 1 for prompt 1.',
|
256
|
+
score=0.1,
|
257
|
+
logprobs=None,
|
258
|
+
usage=None,
|
259
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
260
|
+
),
|
261
|
+
score=0.1,
|
262
|
+
logprobs=None,
|
263
|
+
),
|
264
|
+
lf.LMSample(
|
265
|
+
lf.AIMessage(
|
266
|
+
'Sample 2 for prompt 1.',
|
267
|
+
score=0.2,
|
268
|
+
logprobs=None,
|
269
|
+
usage=None,
|
270
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
271
|
+
),
|
272
|
+
score=0.2,
|
273
|
+
logprobs=None,
|
274
|
+
),
|
275
|
+
],
|
276
|
+
),
|
277
|
+
)
|
185
278
|
|
186
279
|
def test_sample_chat_completion(self):
|
187
280
|
with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
|
@@ -192,18 +285,116 @@ class OpenaiTest(unittest.TestCase):
|
|
192
285
|
)
|
193
286
|
|
194
287
|
self.assertEqual(len(results), 2)
|
195
|
-
self.assertEqual(
|
196
|
-
|
197
|
-
lf.
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
288
|
+
self.assertEqual(
|
289
|
+
results[0],
|
290
|
+
lf.LMSamplingResult(
|
291
|
+
[
|
292
|
+
lf.LMSample(
|
293
|
+
lf.AIMessage(
|
294
|
+
'Sample 0 for message.',
|
295
|
+
score=0.0,
|
296
|
+
logprobs=None,
|
297
|
+
usage=lf.LMSamplingUsage(
|
298
|
+
prompt_tokens=33,
|
299
|
+
completion_tokens=33,
|
300
|
+
total_tokens=66
|
301
|
+
),
|
302
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
303
|
+
),
|
304
|
+
score=0.0,
|
305
|
+
logprobs=None,
|
306
|
+
),
|
307
|
+
lf.LMSample(
|
308
|
+
lf.AIMessage(
|
309
|
+
'Sample 1 for message.',
|
310
|
+
score=0.0,
|
311
|
+
logprobs=None,
|
312
|
+
usage=lf.LMSamplingUsage(
|
313
|
+
prompt_tokens=33,
|
314
|
+
completion_tokens=33,
|
315
|
+
total_tokens=66
|
316
|
+
),
|
317
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
318
|
+
),
|
319
|
+
score=0.0,
|
320
|
+
logprobs=None,
|
321
|
+
),
|
322
|
+
lf.LMSample(
|
323
|
+
lf.AIMessage(
|
324
|
+
'Sample 2 for message.',
|
325
|
+
score=0.0,
|
326
|
+
logprobs=None,
|
327
|
+
usage=lf.LMSamplingUsage(
|
328
|
+
prompt_tokens=33,
|
329
|
+
completion_tokens=33,
|
330
|
+
total_tokens=66
|
331
|
+
),
|
332
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
333
|
+
),
|
334
|
+
score=0.0,
|
335
|
+
logprobs=None,
|
336
|
+
),
|
337
|
+
],
|
338
|
+
usage=lf.LMSamplingUsage(
|
339
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
340
|
+
),
|
341
|
+
),
|
342
|
+
)
|
343
|
+
self.assertEqual(
|
344
|
+
results[1],
|
345
|
+
lf.LMSamplingResult(
|
346
|
+
[
|
347
|
+
lf.LMSample(
|
348
|
+
lf.AIMessage(
|
349
|
+
'Sample 0 for message.',
|
350
|
+
score=0.0,
|
351
|
+
logprobs=None,
|
352
|
+
usage=lf.LMSamplingUsage(
|
353
|
+
prompt_tokens=33,
|
354
|
+
completion_tokens=33,
|
355
|
+
total_tokens=66
|
356
|
+
),
|
357
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
358
|
+
),
|
359
|
+
score=0.0,
|
360
|
+
logprobs=None,
|
361
|
+
),
|
362
|
+
lf.LMSample(
|
363
|
+
lf.AIMessage(
|
364
|
+
'Sample 1 for message.',
|
365
|
+
score=0.0,
|
366
|
+
logprobs=None,
|
367
|
+
usage=lf.LMSamplingUsage(
|
368
|
+
prompt_tokens=33,
|
369
|
+
completion_tokens=33,
|
370
|
+
total_tokens=66
|
371
|
+
),
|
372
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
373
|
+
),
|
374
|
+
score=0.0,
|
375
|
+
logprobs=None,
|
376
|
+
),
|
377
|
+
lf.LMSample(
|
378
|
+
lf.AIMessage(
|
379
|
+
'Sample 2 for message.',
|
380
|
+
score=0.0,
|
381
|
+
logprobs=None,
|
382
|
+
usage=lf.LMSamplingUsage(
|
383
|
+
prompt_tokens=33,
|
384
|
+
completion_tokens=33,
|
385
|
+
total_tokens=66
|
386
|
+
),
|
387
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
388
|
+
),
|
389
|
+
score=0.0,
|
390
|
+
logprobs=None,
|
391
|
+
),
|
392
|
+
],
|
393
|
+
usage=lf.LMSamplingUsage(
|
394
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
395
|
+
),
|
396
|
+
),
|
397
|
+
)
|
207
398
|
|
208
399
|
def test_sample_with_contextual_options(self):
|
209
400
|
with mock.patch('openai.Completion.create') as mock_completion:
|
@@ -213,11 +404,46 @@ class OpenaiTest(unittest.TestCase):
|
|
213
404
|
results = lm.sample(['hello'])
|
214
405
|
|
215
406
|
self.assertEqual(len(results), 1)
|
216
|
-
self.assertEqual(
|
217
|
-
|
218
|
-
lf.
|
219
|
-
|
220
|
-
|
407
|
+
self.assertEqual(
|
408
|
+
results[0],
|
409
|
+
lf.LMSamplingResult(
|
410
|
+
[
|
411
|
+
lf.LMSample(
|
412
|
+
lf.AIMessage(
|
413
|
+
'Sample 0 for prompt 0.',
|
414
|
+
score=0.0,
|
415
|
+
logprobs=None,
|
416
|
+
usage=lf.LMSamplingUsage(
|
417
|
+
prompt_tokens=50,
|
418
|
+
completion_tokens=50,
|
419
|
+
total_tokens=100,
|
420
|
+
),
|
421
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
422
|
+
),
|
423
|
+
score=0.0,
|
424
|
+
logprobs=None,
|
425
|
+
),
|
426
|
+
lf.LMSample(
|
427
|
+
lf.AIMessage(
|
428
|
+
'Sample 1 for prompt 0.',
|
429
|
+
score=0.1,
|
430
|
+
logprobs=None,
|
431
|
+
usage=lf.LMSamplingUsage(
|
432
|
+
prompt_tokens=50,
|
433
|
+
completion_tokens=50,
|
434
|
+
total_tokens=100,
|
435
|
+
),
|
436
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
437
|
+
),
|
438
|
+
score=0.1,
|
439
|
+
logprobs=None,
|
440
|
+
),
|
441
|
+
],
|
442
|
+
usage=lf.LMSamplingUsage(
|
443
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
444
|
+
),
|
445
|
+
),
|
446
|
+
)
|
221
447
|
|
222
448
|
|
223
449
|
if __name__ == '__main__':
|
langfun/core/modalities/video.py
CHANGED
@@ -15,9 +15,7 @@
|
|
15
15
|
|
16
16
|
import base64
|
17
17
|
from typing import cast
|
18
|
-
|
19
18
|
from langfun.core.modalities import mime
|
20
|
-
import magic
|
21
19
|
|
22
20
|
|
23
21
|
class Video(mime.MimeType):
|
@@ -29,6 +27,11 @@ class Video(mime.MimeType):
|
|
29
27
|
|
30
28
|
@property
|
31
29
|
def mime_type(self) -> str:
|
30
|
+
# TODO(daiyip): after cl/619658455, LaunchPad binaries cannot import `magic`
|
31
|
+
# correctly. This is to mitigate the issue for major Langfun users who do
|
32
|
+
# not use Video. We shall move this import out once the issue is fixed.
|
33
|
+
import magic # pylint: disable=g-import-not-at-top
|
34
|
+
|
32
35
|
video_mime_type = magic.from_buffer(self.to_bytes(), mime=True)
|
33
36
|
if 'video/' not in video_mime_type:
|
34
37
|
raise ValueError(f'Not a video: {video_mime_type!r}.')
|
@@ -48,7 +48,10 @@ from langfun.core.structured.schema_generation import generate_class
|
|
48
48
|
from langfun.core.structured.schema_generation import classgen_example
|
49
49
|
from langfun.core.structured.schema_generation import default_classgen_examples
|
50
50
|
|
51
|
+
from langfun.core.structured.function_generation import function_gen
|
52
|
+
|
51
53
|
from langfun.core.structured.mapping import Mapping
|
54
|
+
from langfun.core.structured.mapping import MappingError
|
52
55
|
from langfun.core.structured.mapping import MappingExample
|
53
56
|
|
54
57
|
from langfun.core.structured.parsing import ParseStructure
|
@@ -17,7 +17,6 @@ import inspect
|
|
17
17
|
import unittest
|
18
18
|
|
19
19
|
import langfun.core as lf
|
20
|
-
from langfun.core import coding
|
21
20
|
from langfun.core import modalities
|
22
21
|
from langfun.core.llms import fake
|
23
22
|
from langfun.core.structured import completion
|
@@ -583,6 +582,7 @@ class CompleteStructureTest(unittest.TestCase):
|
|
583
582
|
result=Activity(description='foo'),
|
584
583
|
score=1.0,
|
585
584
|
logprobs=None,
|
585
|
+
usage=lf.LMSamplingUsage(553, 27, 580),
|
586
586
|
tags=['lm-response', 'lm-output', 'transformed']
|
587
587
|
)
|
588
588
|
)
|
@@ -607,7 +607,7 @@ class CompleteStructureTest(unittest.TestCase):
|
|
607
607
|
override_attrs=True,
|
608
608
|
):
|
609
609
|
with self.assertRaisesRegex(
|
610
|
-
|
610
|
+
mapping.MappingError,
|
611
611
|
'Expect .* but encountered .*',
|
612
612
|
):
|
613
613
|
completion.complete(Activity.partial(), autofix=0)
|