langfun 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240511__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/__init__.py +7 -0
- langfun/core/__init__.py +1 -0
- langfun/core/coding/python/correction.py +0 -7
- langfun/core/component.py +6 -0
- langfun/core/component_test.py +1 -0
- langfun/core/eval/__init__.py +15 -0
- langfun/core/eval/base.py +665 -95
- langfun/core/eval/base_test.py +224 -53
- langfun/core/eval/matching.py +48 -30
- langfun/core/eval/matching_test.py +25 -3
- langfun/core/eval/patching.py +130 -0
- langfun/core/eval/patching_test.py +170 -0
- langfun/core/eval/scoring.py +19 -10
- langfun/core/eval/scoring_test.py +21 -3
- langfun/core/langfunc.py +1 -22
- langfun/core/langfunc_test.py +10 -4
- langfun/core/language_model.py +130 -24
- langfun/core/language_model_test.py +249 -26
- langfun/core/llms/__init__.py +27 -2
- langfun/core/llms/anthropic.py +263 -0
- langfun/core/llms/anthropic_test.py +167 -0
- langfun/core/llms/cache/in_memory_test.py +37 -28
- langfun/core/llms/fake.py +34 -25
- langfun/core/llms/fake_test.py +122 -11
- langfun/core/llms/google_genai.py +8 -0
- langfun/core/llms/google_genai_test.py +8 -3
- langfun/core/llms/groq.py +260 -0
- langfun/core/llms/groq_test.py +170 -0
- langfun/core/llms/llama_cpp.py +3 -1
- langfun/core/llms/openai.py +100 -81
- langfun/core/llms/openai_test.py +287 -60
- langfun/core/llms/vertexai.py +291 -0
- langfun/core/llms/vertexai_test.py +233 -0
- langfun/core/modalities/image.py +1 -3
- langfun/core/modalities/mime.py +6 -0
- langfun/core/modalities/video.py +6 -5
- langfun/core/structured/__init__.py +5 -0
- langfun/core/structured/completion_test.py +2 -2
- langfun/core/structured/function_generation.py +245 -0
- langfun/core/structured/function_generation_test.py +329 -0
- langfun/core/structured/mapping.py +61 -3
- langfun/core/structured/mapping_test.py +17 -0
- langfun/core/structured/parsing_test.py +18 -13
- langfun/core/structured/prompting.py +61 -12
- langfun/core/structured/prompting_test.py +122 -12
- langfun/core/structured/schema.py +38 -6
- langfun/core/structured/schema_generation_test.py +2 -2
- langfun/core/structured/schema_test.py +36 -7
- langfun/core/structured/scoring.py +4 -1
- langfun/core/structured/scoring_test.py +6 -0
- langfun/core/template.py +147 -11
- langfun/core/template_test.py +75 -0
- langfun/core/templates/selfplay_test.py +6 -2
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/METADATA +3 -2
- langfun-0.0.2.dev20240511.dist-info/RECORD +112 -0
- langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/LICENSE +0 -0
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/WHEEL +0 -0
- {langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/top_level.txt +0 -0
langfun/core/llms/openai_test.py
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
"""Tests for
|
14
|
+
"""Tests for OpenAI models."""
|
15
15
|
|
16
16
|
import unittest
|
17
17
|
from unittest import mock
|
@@ -32,11 +32,14 @@ def mock_completion_query(prompt, *, n=1, **kwargs):
|
|
32
32
|
text=f'Sample {k} for prompt {i}.',
|
33
33
|
logprobs=k / 10,
|
34
34
|
))
|
35
|
-
return pg.Dict(
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
return pg.Dict(
|
36
|
+
choices=choices,
|
37
|
+
usage=lf.LMSamplingUsage(
|
38
|
+
prompt_tokens=100,
|
39
|
+
completion_tokens=100,
|
40
|
+
total_tokens=200,
|
41
|
+
),
|
42
|
+
)
|
40
43
|
|
41
44
|
|
42
45
|
def mock_chat_completion_query(messages, *, n=1, **kwargs):
|
@@ -49,18 +52,22 @@ def mock_chat_completion_query(messages, *, n=1, **kwargs):
|
|
49
52
|
),
|
50
53
|
logprobs=None,
|
51
54
|
))
|
52
|
-
return pg.Dict(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
55
|
+
return pg.Dict(
|
56
|
+
choices=choices,
|
57
|
+
usage=lf.LMSamplingUsage(
|
58
|
+
prompt_tokens=100,
|
59
|
+
completion_tokens=100,
|
60
|
+
total_tokens=200,
|
61
|
+
),
|
62
|
+
)
|
57
63
|
|
58
64
|
|
59
65
|
def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
|
60
66
|
del kwargs
|
61
67
|
choices = []
|
62
68
|
urls = [
|
63
|
-
c['image_url']
|
69
|
+
c['image_url']['url']
|
70
|
+
for c in messages[0]['content'] if c['type'] == 'image_url'
|
64
71
|
]
|
65
72
|
for k in range(n):
|
66
73
|
choices.append(pg.Dict(
|
@@ -69,14 +76,17 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
|
|
69
76
|
),
|
70
77
|
logprobs=None,
|
71
78
|
))
|
72
|
-
return pg.Dict(
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
79
|
+
return pg.Dict(
|
80
|
+
choices=choices,
|
81
|
+
usage=lf.LMSamplingUsage(
|
82
|
+
prompt_tokens=100,
|
83
|
+
completion_tokens=100,
|
84
|
+
total_tokens=200,
|
85
|
+
),
|
86
|
+
)
|
77
87
|
|
78
88
|
|
79
|
-
class
|
89
|
+
class OpenAITest(unittest.TestCase):
|
80
90
|
"""Tests for OpenAI language model."""
|
81
91
|
|
82
92
|
def test_model_id(self):
|
@@ -89,7 +99,7 @@ class OpenaiTest(unittest.TestCase):
|
|
89
99
|
)
|
90
100
|
|
91
101
|
def test_max_concurrency(self):
|
92
|
-
self.
|
102
|
+
self.assertGreater(openai.Gpt35(api_key='test_key').max_concurrency, 0)
|
93
103
|
|
94
104
|
def test_get_request_args(self):
|
95
105
|
self.assertEqual(
|
@@ -121,7 +131,6 @@ class OpenaiTest(unittest.TestCase):
|
|
121
131
|
top_logprobs=None,
|
122
132
|
n=1,
|
123
133
|
temperature=1.0,
|
124
|
-
max_tokens=1024,
|
125
134
|
stream=False,
|
126
135
|
timeout=120.0,
|
127
136
|
stop=['\n'],
|
@@ -149,17 +158,19 @@ class OpenaiTest(unittest.TestCase):
|
|
149
158
|
def test_call_chat_completion_vision(self):
|
150
159
|
with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
|
151
160
|
mock_chat_completion.side_effect = mock_chat_completion_query_vision
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
161
|
+
lm_1 = openai.Gpt4Turbo(api_key='test_key')
|
162
|
+
lm_2 = openai.Gpt4VisionPreview(api_key='test_key')
|
163
|
+
for lm in (lm_1, lm_2):
|
164
|
+
self.assertEqual(
|
165
|
+
lm(
|
166
|
+
lf.UserMessage(
|
167
|
+
'hello {{image}}',
|
168
|
+
image=lf_modalities.Image.from_uri('https://fake/image')
|
169
|
+
),
|
170
|
+
sampling_options=lf.LMSamplingOptions(n=2)
|
171
|
+
),
|
172
|
+
'Sample 0 for message: https://fake/image',
|
173
|
+
)
|
163
174
|
|
164
175
|
def test_sample_completion(self):
|
165
176
|
with mock.patch('openai.Completion.create') as mock_completion:
|
@@ -170,18 +181,101 @@ class OpenaiTest(unittest.TestCase):
|
|
170
181
|
)
|
171
182
|
|
172
183
|
self.assertEqual(len(results), 2)
|
173
|
-
self.assertEqual(
|
174
|
-
|
175
|
-
lf.
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
184
|
+
self.assertEqual(
|
185
|
+
results[0],
|
186
|
+
lf.LMSamplingResult(
|
187
|
+
[
|
188
|
+
lf.LMSample(
|
189
|
+
lf.AIMessage(
|
190
|
+
'Sample 0 for prompt 0.',
|
191
|
+
score=0.0,
|
192
|
+
logprobs=None,
|
193
|
+
usage=lf.LMSamplingUsage(
|
194
|
+
prompt_tokens=33,
|
195
|
+
completion_tokens=33,
|
196
|
+
total_tokens=66
|
197
|
+
),
|
198
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
199
|
+
),
|
200
|
+
score=0.0,
|
201
|
+
logprobs=None,
|
202
|
+
),
|
203
|
+
lf.LMSample(
|
204
|
+
lf.AIMessage(
|
205
|
+
'Sample 1 for prompt 0.',
|
206
|
+
score=0.1,
|
207
|
+
logprobs=None,
|
208
|
+
usage=lf.LMSamplingUsage(
|
209
|
+
prompt_tokens=33,
|
210
|
+
completion_tokens=33,
|
211
|
+
total_tokens=66
|
212
|
+
),
|
213
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
214
|
+
),
|
215
|
+
score=0.1,
|
216
|
+
logprobs=None,
|
217
|
+
),
|
218
|
+
lf.LMSample(
|
219
|
+
lf.AIMessage(
|
220
|
+
'Sample 2 for prompt 0.',
|
221
|
+
score=0.2,
|
222
|
+
logprobs=None,
|
223
|
+
usage=lf.LMSamplingUsage(
|
224
|
+
prompt_tokens=33,
|
225
|
+
completion_tokens=33,
|
226
|
+
total_tokens=66
|
227
|
+
),
|
228
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
229
|
+
),
|
230
|
+
score=0.2,
|
231
|
+
logprobs=None,
|
232
|
+
),
|
233
|
+
],
|
234
|
+
usage=lf.LMSamplingUsage(
|
235
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
236
|
+
),
|
237
|
+
),
|
238
|
+
)
|
239
|
+
self.assertEqual(
|
240
|
+
results[1],
|
241
|
+
lf.LMSamplingResult(
|
242
|
+
[
|
243
|
+
lf.LMSample(
|
244
|
+
lf.AIMessage(
|
245
|
+
'Sample 0 for prompt 1.',
|
246
|
+
score=0.0,
|
247
|
+
logprobs=None,
|
248
|
+
usage=None,
|
249
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
250
|
+
),
|
251
|
+
score=0.0,
|
252
|
+
logprobs=None,
|
253
|
+
),
|
254
|
+
lf.LMSample(
|
255
|
+
lf.AIMessage(
|
256
|
+
'Sample 1 for prompt 1.',
|
257
|
+
score=0.1,
|
258
|
+
logprobs=None,
|
259
|
+
usage=None,
|
260
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
261
|
+
),
|
262
|
+
score=0.1,
|
263
|
+
logprobs=None,
|
264
|
+
),
|
265
|
+
lf.LMSample(
|
266
|
+
lf.AIMessage(
|
267
|
+
'Sample 2 for prompt 1.',
|
268
|
+
score=0.2,
|
269
|
+
logprobs=None,
|
270
|
+
usage=None,
|
271
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
272
|
+
),
|
273
|
+
score=0.2,
|
274
|
+
logprobs=None,
|
275
|
+
),
|
276
|
+
],
|
277
|
+
),
|
278
|
+
)
|
185
279
|
|
186
280
|
def test_sample_chat_completion(self):
|
187
281
|
with mock.patch('openai.ChatCompletion.create') as mock_chat_completion:
|
@@ -192,18 +286,116 @@ class OpenaiTest(unittest.TestCase):
|
|
192
286
|
)
|
193
287
|
|
194
288
|
self.assertEqual(len(results), 2)
|
195
|
-
self.assertEqual(
|
196
|
-
|
197
|
-
lf.
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
289
|
+
self.assertEqual(
|
290
|
+
results[0],
|
291
|
+
lf.LMSamplingResult(
|
292
|
+
[
|
293
|
+
lf.LMSample(
|
294
|
+
lf.AIMessage(
|
295
|
+
'Sample 0 for message.',
|
296
|
+
score=0.0,
|
297
|
+
logprobs=None,
|
298
|
+
usage=lf.LMSamplingUsage(
|
299
|
+
prompt_tokens=33,
|
300
|
+
completion_tokens=33,
|
301
|
+
total_tokens=66
|
302
|
+
),
|
303
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
304
|
+
),
|
305
|
+
score=0.0,
|
306
|
+
logprobs=None,
|
307
|
+
),
|
308
|
+
lf.LMSample(
|
309
|
+
lf.AIMessage(
|
310
|
+
'Sample 1 for message.',
|
311
|
+
score=0.0,
|
312
|
+
logprobs=None,
|
313
|
+
usage=lf.LMSamplingUsage(
|
314
|
+
prompt_tokens=33,
|
315
|
+
completion_tokens=33,
|
316
|
+
total_tokens=66
|
317
|
+
),
|
318
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
319
|
+
),
|
320
|
+
score=0.0,
|
321
|
+
logprobs=None,
|
322
|
+
),
|
323
|
+
lf.LMSample(
|
324
|
+
lf.AIMessage(
|
325
|
+
'Sample 2 for message.',
|
326
|
+
score=0.0,
|
327
|
+
logprobs=None,
|
328
|
+
usage=lf.LMSamplingUsage(
|
329
|
+
prompt_tokens=33,
|
330
|
+
completion_tokens=33,
|
331
|
+
total_tokens=66
|
332
|
+
),
|
333
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
334
|
+
),
|
335
|
+
score=0.0,
|
336
|
+
logprobs=None,
|
337
|
+
),
|
338
|
+
],
|
339
|
+
usage=lf.LMSamplingUsage(
|
340
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
341
|
+
),
|
342
|
+
),
|
343
|
+
)
|
344
|
+
self.assertEqual(
|
345
|
+
results[1],
|
346
|
+
lf.LMSamplingResult(
|
347
|
+
[
|
348
|
+
lf.LMSample(
|
349
|
+
lf.AIMessage(
|
350
|
+
'Sample 0 for message.',
|
351
|
+
score=0.0,
|
352
|
+
logprobs=None,
|
353
|
+
usage=lf.LMSamplingUsage(
|
354
|
+
prompt_tokens=33,
|
355
|
+
completion_tokens=33,
|
356
|
+
total_tokens=66
|
357
|
+
),
|
358
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
359
|
+
),
|
360
|
+
score=0.0,
|
361
|
+
logprobs=None,
|
362
|
+
),
|
363
|
+
lf.LMSample(
|
364
|
+
lf.AIMessage(
|
365
|
+
'Sample 1 for message.',
|
366
|
+
score=0.0,
|
367
|
+
logprobs=None,
|
368
|
+
usage=lf.LMSamplingUsage(
|
369
|
+
prompt_tokens=33,
|
370
|
+
completion_tokens=33,
|
371
|
+
total_tokens=66
|
372
|
+
),
|
373
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
374
|
+
),
|
375
|
+
score=0.0,
|
376
|
+
logprobs=None,
|
377
|
+
),
|
378
|
+
lf.LMSample(
|
379
|
+
lf.AIMessage(
|
380
|
+
'Sample 2 for message.',
|
381
|
+
score=0.0,
|
382
|
+
logprobs=None,
|
383
|
+
usage=lf.LMSamplingUsage(
|
384
|
+
prompt_tokens=33,
|
385
|
+
completion_tokens=33,
|
386
|
+
total_tokens=66
|
387
|
+
),
|
388
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
389
|
+
),
|
390
|
+
score=0.0,
|
391
|
+
logprobs=None,
|
392
|
+
),
|
393
|
+
],
|
394
|
+
usage=lf.LMSamplingUsage(
|
395
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
396
|
+
),
|
397
|
+
),
|
398
|
+
)
|
207
399
|
|
208
400
|
def test_sample_with_contextual_options(self):
|
209
401
|
with mock.patch('openai.Completion.create') as mock_completion:
|
@@ -213,11 +405,46 @@ class OpenaiTest(unittest.TestCase):
|
|
213
405
|
results = lm.sample(['hello'])
|
214
406
|
|
215
407
|
self.assertEqual(len(results), 1)
|
216
|
-
self.assertEqual(
|
217
|
-
|
218
|
-
lf.
|
219
|
-
|
220
|
-
|
408
|
+
self.assertEqual(
|
409
|
+
results[0],
|
410
|
+
lf.LMSamplingResult(
|
411
|
+
[
|
412
|
+
lf.LMSample(
|
413
|
+
lf.AIMessage(
|
414
|
+
'Sample 0 for prompt 0.',
|
415
|
+
score=0.0,
|
416
|
+
logprobs=None,
|
417
|
+
usage=lf.LMSamplingUsage(
|
418
|
+
prompt_tokens=50,
|
419
|
+
completion_tokens=50,
|
420
|
+
total_tokens=100,
|
421
|
+
),
|
422
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
423
|
+
),
|
424
|
+
score=0.0,
|
425
|
+
logprobs=None,
|
426
|
+
),
|
427
|
+
lf.LMSample(
|
428
|
+
lf.AIMessage(
|
429
|
+
'Sample 1 for prompt 0.',
|
430
|
+
score=0.1,
|
431
|
+
logprobs=None,
|
432
|
+
usage=lf.LMSamplingUsage(
|
433
|
+
prompt_tokens=50,
|
434
|
+
completion_tokens=50,
|
435
|
+
total_tokens=100,
|
436
|
+
),
|
437
|
+
tags=[lf.Message.TAG_LM_RESPONSE],
|
438
|
+
),
|
439
|
+
score=0.1,
|
440
|
+
logprobs=None,
|
441
|
+
),
|
442
|
+
],
|
443
|
+
usage=lf.LMSamplingUsage(
|
444
|
+
prompt_tokens=100, completion_tokens=100, total_tokens=200
|
445
|
+
),
|
446
|
+
),
|
447
|
+
)
|
221
448
|
|
222
449
|
|
223
450
|
if __name__ == '__main__':
|