langfun 0.1.2.dev202501080804__py3-none-any.whl → 0.1.2.dev202501240804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. langfun/core/__init__.py +1 -6
  2. langfun/core/coding/python/__init__.py +5 -11
  3. langfun/core/coding/python/correction.py +4 -7
  4. langfun/core/coding/python/correction_test.py +2 -3
  5. langfun/core/coding/python/execution.py +22 -211
  6. langfun/core/coding/python/execution_test.py +11 -90
  7. langfun/core/coding/python/generation.py +3 -2
  8. langfun/core/coding/python/generation_test.py +2 -2
  9. langfun/core/coding/python/parsing.py +108 -194
  10. langfun/core/coding/python/parsing_test.py +2 -105
  11. langfun/core/component.py +11 -273
  12. langfun/core/component_test.py +2 -29
  13. langfun/core/concurrent.py +187 -82
  14. langfun/core/concurrent_test.py +28 -19
  15. langfun/core/console.py +7 -3
  16. langfun/core/eval/base.py +2 -3
  17. langfun/core/eval/v2/evaluation.py +3 -1
  18. langfun/core/eval/v2/reporting.py +8 -4
  19. langfun/core/language_model.py +84 -8
  20. langfun/core/language_model_test.py +84 -29
  21. langfun/core/llms/__init__.py +46 -11
  22. langfun/core/llms/anthropic.py +1 -123
  23. langfun/core/llms/anthropic_test.py +0 -48
  24. langfun/core/llms/deepseek.py +117 -0
  25. langfun/core/llms/deepseek_test.py +61 -0
  26. langfun/core/llms/gemini.py +1 -1
  27. langfun/core/llms/groq.py +12 -99
  28. langfun/core/llms/groq_test.py +31 -137
  29. langfun/core/llms/llama_cpp.py +17 -54
  30. langfun/core/llms/llama_cpp_test.py +2 -34
  31. langfun/core/llms/openai.py +9 -147
  32. langfun/core/llms/openai_compatible.py +179 -0
  33. langfun/core/llms/openai_compatible_test.py +495 -0
  34. langfun/core/llms/openai_test.py +13 -423
  35. langfun/core/llms/rest_test.py +1 -1
  36. langfun/core/llms/vertexai.py +387 -18
  37. langfun/core/llms/vertexai_test.py +52 -0
  38. langfun/core/message_test.py +3 -3
  39. langfun/core/modalities/mime.py +8 -0
  40. langfun/core/modalities/mime_test.py +19 -4
  41. langfun/core/modality_test.py +0 -1
  42. langfun/core/structured/mapping.py +13 -13
  43. langfun/core/structured/mapping_test.py +2 -2
  44. langfun/core/structured/schema.py +16 -8
  45. langfun/core/structured/schema_generation.py +1 -1
  46. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/METADATA +13 -2
  47. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/RECORD +50 -52
  48. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/WHEEL +1 -1
  49. langfun/core/coding/python/errors.py +0 -108
  50. langfun/core/coding/python/errors_test.py +0 -99
  51. langfun/core/coding/python/permissions.py +0 -90
  52. langfun/core/coding/python/permissions_test.py +0 -86
  53. langfun/core/text_formatting.py +0 -168
  54. langfun/core/text_formatting_test.py +0 -65
  55. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/LICENSE +0 -0
  56. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/top_level.txt +0 -0
@@ -13,102 +13,9 @@
13
13
  # limitations under the License.
14
14
  """Tests for OpenAI models."""
15
15
 
16
- from typing import Any
17
16
  import unittest
18
- from unittest import mock
19
-
20
17
  import langfun.core as lf
21
- from langfun.core import modalities as lf_modalities
22
18
  from langfun.core.llms import openai
23
- import pyglove as pg
24
- import requests
25
-
26
-
27
- def mock_chat_completion_request(url: str, json: dict[str, Any], **kwargs):
28
- del url, kwargs
29
- messages = json['messages']
30
- if len(messages) > 1:
31
- system_message = f' system={messages[0]["content"]}'
32
- else:
33
- system_message = ''
34
-
35
- if 'response_format' in json:
36
- response_format = f' format={json["response_format"]["type"]}'
37
- else:
38
- response_format = ''
39
-
40
- choices = []
41
- for k in range(json['n']):
42
- if json.get('logprobs'):
43
- logprobs = dict(
44
- content=[
45
- dict(
46
- token='chosen_token',
47
- logprob=0.5,
48
- top_logprobs=[
49
- dict(
50
- token=f'alternative_token_{i + 1}',
51
- logprob=0.1
52
- ) for i in range(3)
53
- ]
54
- )
55
- ]
56
- )
57
- else:
58
- logprobs = None
59
-
60
- choices.append(dict(
61
- message=dict(
62
- content=(
63
- f'Sample {k} for message.{system_message}{response_format}'
64
- )
65
- ),
66
- logprobs=logprobs,
67
- ))
68
- response = requests.Response()
69
- response.status_code = 200
70
- response._content = pg.to_json_str(
71
- dict(
72
- choices=choices,
73
- usage=lf.LMSamplingUsage(
74
- prompt_tokens=100,
75
- completion_tokens=100,
76
- total_tokens=200,
77
- ),
78
- )
79
- ).encode()
80
- return response
81
-
82
-
83
- def mock_chat_completion_request_vision(
84
- url: str, json: dict[str, Any], **kwargs
85
- ):
86
- del url, kwargs
87
- choices = []
88
- urls = [
89
- c['image_url']['url']
90
- for c in json['messages'][0]['content'] if c['type'] == 'image_url'
91
- ]
92
- for k in range(json['n']):
93
- choices.append(pg.Dict(
94
- message=pg.Dict(
95
- content=f'Sample {k} for message: {"".join(urls)}'
96
- ),
97
- logprobs=None,
98
- ))
99
- response = requests.Response()
100
- response.status_code = 200
101
- response._content = pg.to_json_str(
102
- dict(
103
- choices=choices,
104
- usage=lf.LMSamplingUsage(
105
- prompt_tokens=100,
106
- completion_tokens=100,
107
- total_tokens=200,
108
- ),
109
- )
110
- ).encode()
111
- return response
112
19
 
113
20
 
114
21
  class OpenAITest(unittest.TestCase):
@@ -130,6 +37,15 @@ class OpenAITest(unittest.TestCase):
130
37
  openai.Gpt35(api_key='test_key').resource_id, 'OpenAI(text-davinci-003)'
131
38
  )
132
39
 
40
+ def test_headers(self):
41
+ self.assertEqual(
42
+ openai.Gpt35(api_key='test_key').headers,
43
+ {
44
+ 'Content-Type': 'application/json',
45
+ 'Authorization': 'Bearer test_key',
46
+ },
47
+ )
48
+
133
49
  def test_max_concurrency(self):
134
50
  self.assertGreater(openai.Gpt35(api_key='test_key').max_concurrency, 0)
135
51
 
@@ -156,340 +72,14 @@ class OpenAITest(unittest.TestCase):
156
72
  )
157
73
  )
158
74
 
159
- def test_call_chat_completion(self):
160
- with mock.patch('requests.Session.post') as mock_request:
161
- mock_request.side_effect = mock_chat_completion_request
162
- lm = openai.OpenAI(
163
- model='gpt-4',
164
- api_key='test_key',
165
- organization='my_org',
166
- project='my_project'
167
- )
168
- self.assertEqual(
169
- lm('hello', sampling_options=lf.LMSamplingOptions(n=2)),
170
- 'Sample 0 for message.',
171
- )
172
-
173
- def test_call_chat_completion_with_logprobs(self):
174
- with mock.patch('requests.Session.post') as mock_request:
175
- mock_request.side_effect = mock_chat_completion_request
176
- lm = openai.OpenAI(
177
- model='gpt-4',
178
- api_key='test_key',
179
- organization='my_org',
180
- project='my_project'
181
- )
182
- results = lm.sample(['hello'], logprobs=True)
183
- self.assertEqual(len(results), 1)
184
- self.assertEqual(
185
- results[0],
186
- lf.LMSamplingResult(
187
- [
188
- lf.LMSample(
189
- response=lf.AIMessage(
190
- text='Sample 0 for message.',
191
- metadata={
192
- 'score': 0.0,
193
- 'logprobs': [(
194
- 'chosen_token',
195
- 0.5,
196
- [
197
- ('alternative_token_1', 0.1),
198
- ('alternative_token_2', 0.1),
199
- ('alternative_token_3', 0.1),
200
- ],
201
- )],
202
- 'is_cached': False,
203
- 'usage': lf.LMSamplingUsage(
204
- prompt_tokens=100,
205
- completion_tokens=100,
206
- total_tokens=200,
207
- estimated_cost=0.009,
208
- ),
209
- },
210
- tags=['lm-response'],
211
- ),
212
- logprobs=[(
213
- 'chosen_token',
214
- 0.5,
215
- [
216
- ('alternative_token_1', 0.1),
217
- ('alternative_token_2', 0.1),
218
- ('alternative_token_3', 0.1),
219
- ],
220
- )],
221
- )
222
- ],
223
- usage=lf.LMSamplingUsage(
224
- prompt_tokens=100,
225
- completion_tokens=100,
226
- total_tokens=200,
227
- estimated_cost=0.009,
228
- ),
229
- ),
230
- )
231
-
232
- def test_call_chat_completion_vision(self):
233
- with mock.patch('requests.Session.post') as mock_request:
234
- mock_request.side_effect = mock_chat_completion_request_vision
235
- lm_1 = openai.Gpt4Turbo(api_key='test_key')
236
- lm_2 = openai.Gpt4VisionPreview(api_key='test_key')
237
- for lm in (lm_1, lm_2):
238
- self.assertEqual(
239
- lm(
240
- lf.UserMessage(
241
- 'hello <<[[image]]>>',
242
- image=lf_modalities.Image.from_uri('https://fake/image')
243
- ),
244
- sampling_options=lf.LMSamplingOptions(n=2)
245
- ),
246
- 'Sample 0 for message: https://fake/image',
247
- )
248
- lm_3 = openai.Gpt35Turbo(api_key='test_key')
249
- with self.assertRaisesRegex(ValueError, 'Unsupported modality'):
250
- lm_3(
251
- lf.UserMessage(
252
- 'hello <<[[image]]>>',
253
- image=lf_modalities.Image.from_uri('https://fake/image')
254
- ),
255
- )
256
-
257
- def test_sample_chat_completion(self):
258
- with mock.patch('requests.Session.post') as mock_request:
259
- mock_request.side_effect = mock_chat_completion_request
260
- openai.SUPPORTED_MODELS_AND_SETTINGS['gpt-4'].update({
261
- 'cost_per_1k_input_tokens': 1.0,
262
- 'cost_per_1k_output_tokens': 1.0,
263
- })
264
- lm = openai.OpenAI(api_key='test_key', model='gpt-4')
265
- results = lm.sample(
266
- ['hello', 'bye'], sampling_options=lf.LMSamplingOptions(n=3)
267
- )
268
-
269
- self.assertEqual(len(results), 2)
270
- print(results[0])
271
- self.assertEqual(
272
- results[0],
273
- lf.LMSamplingResult(
274
- [
275
- lf.LMSample(
276
- lf.AIMessage(
277
- 'Sample 0 for message.',
278
- score=0.0,
279
- logprobs=None,
280
- is_cached=False,
281
- usage=lf.LMSamplingUsage(
282
- prompt_tokens=33,
283
- completion_tokens=33,
284
- total_tokens=66,
285
- estimated_cost=0.2 / 3,
286
- ),
287
- tags=[lf.Message.TAG_LM_RESPONSE],
288
- ),
289
- score=0.0,
290
- logprobs=None,
291
- ),
292
- lf.LMSample(
293
- lf.AIMessage(
294
- 'Sample 1 for message.',
295
- score=0.0,
296
- logprobs=None,
297
- is_cached=False,
298
- usage=lf.LMSamplingUsage(
299
- prompt_tokens=33,
300
- completion_tokens=33,
301
- total_tokens=66,
302
- estimated_cost=0.2 / 3,
303
- ),
304
- tags=[lf.Message.TAG_LM_RESPONSE],
305
- ),
306
- score=0.0,
307
- logprobs=None,
308
- ),
309
- lf.LMSample(
310
- lf.AIMessage(
311
- 'Sample 2 for message.',
312
- score=0.0,
313
- logprobs=None,
314
- is_cached=False,
315
- usage=lf.LMSamplingUsage(
316
- prompt_tokens=33,
317
- completion_tokens=33,
318
- total_tokens=66,
319
- estimated_cost=0.2 / 3,
320
- ),
321
- tags=[lf.Message.TAG_LM_RESPONSE],
322
- ),
323
- score=0.0,
324
- logprobs=None,
325
- ),
326
- ],
327
- usage=lf.LMSamplingUsage(
328
- prompt_tokens=100, completion_tokens=100, total_tokens=200,
329
- estimated_cost=0.2,
330
- ),
331
- ),
332
- )
75
+ def test_estimate_cost(self):
333
76
  self.assertEqual(
334
- results[1],
335
- lf.LMSamplingResult(
336
- [
337
- lf.LMSample(
338
- lf.AIMessage(
339
- 'Sample 0 for message.',
340
- score=0.0,
341
- logprobs=None,
342
- is_cached=False,
343
- usage=lf.LMSamplingUsage(
344
- prompt_tokens=33,
345
- completion_tokens=33,
346
- total_tokens=66,
347
- estimated_cost=0.2 / 3,
348
- ),
349
- tags=[lf.Message.TAG_LM_RESPONSE],
350
- ),
351
- score=0.0,
352
- logprobs=None,
353
- ),
354
- lf.LMSample(
355
- lf.AIMessage(
356
- 'Sample 1 for message.',
357
- score=0.0,
358
- logprobs=None,
359
- is_cached=False,
360
- usage=lf.LMSamplingUsage(
361
- prompt_tokens=33,
362
- completion_tokens=33,
363
- total_tokens=66,
364
- estimated_cost=0.2 / 3,
365
- ),
366
- tags=[lf.Message.TAG_LM_RESPONSE],
367
- ),
368
- score=0.0,
369
- logprobs=None,
370
- ),
371
- lf.LMSample(
372
- lf.AIMessage(
373
- 'Sample 2 for message.',
374
- score=0.0,
375
- logprobs=None,
376
- is_cached=False,
377
- usage=lf.LMSamplingUsage(
378
- prompt_tokens=33,
379
- completion_tokens=33,
380
- total_tokens=66,
381
- estimated_cost=0.2 / 3,
382
- ),
383
- tags=[lf.Message.TAG_LM_RESPONSE],
384
- ),
385
- score=0.0,
386
- logprobs=None,
387
- ),
388
- ],
389
- usage=lf.LMSamplingUsage(
390
- prompt_tokens=100, completion_tokens=100, total_tokens=200,
391
- estimated_cost=0.2,
392
- ),
77
+ openai.Gpt4(api_key='test_key').estimate_cost(
78
+ num_input_tokens=100, num_output_tokens=100
393
79
  ),
80
+ 0.009
394
81
  )
395
82
 
396
- def test_sample_with_contextual_options(self):
397
- with mock.patch('requests.Session.post') as mock_request:
398
- mock_request.side_effect = mock_chat_completion_request
399
- lm = openai.OpenAI(api_key='test_key', model='text-davinci-003')
400
- with lf.use_settings(sampling_options=lf.LMSamplingOptions(n=2)):
401
- results = lm.sample(['hello'])
402
-
403
- self.assertEqual(len(results), 1)
404
- self.assertEqual(
405
- results[0],
406
- lf.LMSamplingResult(
407
- [
408
- lf.LMSample(
409
- lf.AIMessage(
410
- 'Sample 0 for message.',
411
- score=0.0,
412
- logprobs=None,
413
- is_cached=False,
414
- usage=lf.LMSamplingUsage(
415
- prompt_tokens=50,
416
- completion_tokens=50,
417
- total_tokens=100,
418
- ),
419
- tags=[lf.Message.TAG_LM_RESPONSE],
420
- ),
421
- score=0.0,
422
- logprobs=None,
423
- ),
424
- lf.LMSample(
425
- lf.AIMessage(
426
- 'Sample 1 for message.',
427
- score=0.0,
428
- logprobs=None,
429
- is_cached=False,
430
- usage=lf.LMSamplingUsage(
431
- prompt_tokens=50,
432
- completion_tokens=50,
433
- total_tokens=100,
434
- ),
435
- tags=[lf.Message.TAG_LM_RESPONSE],
436
- ),
437
- score=0.0,
438
- logprobs=None,
439
- ),
440
- ],
441
- usage=lf.LMSamplingUsage(
442
- prompt_tokens=100, completion_tokens=100, total_tokens=200
443
- ),
444
- )
445
- )
446
-
447
- def test_call_with_system_message(self):
448
- with mock.patch('requests.Session.post') as mock_request:
449
- mock_request.side_effect = mock_chat_completion_request
450
- lm = openai.OpenAI(api_key='test_key', model='gpt-4')
451
- self.assertEqual(
452
- lm(
453
- lf.UserMessage(
454
- 'hello',
455
- system_message='hi',
456
- ),
457
- sampling_options=lf.LMSamplingOptions(n=2)
458
- ),
459
- '''Sample 0 for message. system=[{'type': 'text', 'text': 'hi'}]''',
460
- )
461
-
462
- def test_call_with_json_schema(self):
463
- with mock.patch('requests.Session.post') as mock_request:
464
- mock_request.side_effect = mock_chat_completion_request
465
- lm = openai.OpenAI(api_key='test_key', model='gpt-4')
466
- self.assertEqual(
467
- lm(
468
- lf.UserMessage(
469
- 'hello',
470
- json_schema={
471
- 'type': 'object',
472
- 'properties': {
473
- 'name': {'type': 'string'},
474
- },
475
- 'required': ['name'],
476
- 'title': 'Person',
477
- }
478
- ),
479
- sampling_options=lf.LMSamplingOptions(n=2)
480
- ),
481
- 'Sample 0 for message. format=json_schema',
482
- )
483
-
484
- # Test bad json schema.
485
- with self.assertRaisesRegex(ValueError, '`json_schema` must be a dict'):
486
- lm(lf.UserMessage('hello', json_schema='foo'))
487
-
488
- with self.assertRaisesRegex(
489
- ValueError, 'The root of `json_schema` must have a `title` field'
490
- ):
491
- lm(lf.UserMessage('hello', json_schema={}))
492
-
493
83
 
494
84
  if __name__ == '__main__':
495
85
  unittest.main()
@@ -89,7 +89,7 @@ class RestTest(unittest.TestCase):
89
89
  "max_tokens=4096, stop=['\\n']."
90
90
  ),
91
91
  )
92
- self.assertEqual(response.usage, lf.UsageNotAvailable())
92
+ self.assertIsInstance(response.usage, lf.UsageNotAvailable)
93
93
 
94
94
  def test_call_errors(self):
95
95
  for status_code, error_type, error_message in [