langfun 0.1.2.dev202501080804__py3-none-any.whl → 0.1.2.dev202501240804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. langfun/core/__init__.py +1 -6
  2. langfun/core/coding/python/__init__.py +5 -11
  3. langfun/core/coding/python/correction.py +4 -7
  4. langfun/core/coding/python/correction_test.py +2 -3
  5. langfun/core/coding/python/execution.py +22 -211
  6. langfun/core/coding/python/execution_test.py +11 -90
  7. langfun/core/coding/python/generation.py +3 -2
  8. langfun/core/coding/python/generation_test.py +2 -2
  9. langfun/core/coding/python/parsing.py +108 -194
  10. langfun/core/coding/python/parsing_test.py +2 -105
  11. langfun/core/component.py +11 -273
  12. langfun/core/component_test.py +2 -29
  13. langfun/core/concurrent.py +187 -82
  14. langfun/core/concurrent_test.py +28 -19
  15. langfun/core/console.py +7 -3
  16. langfun/core/eval/base.py +2 -3
  17. langfun/core/eval/v2/evaluation.py +3 -1
  18. langfun/core/eval/v2/reporting.py +8 -4
  19. langfun/core/language_model.py +84 -8
  20. langfun/core/language_model_test.py +84 -29
  21. langfun/core/llms/__init__.py +46 -11
  22. langfun/core/llms/anthropic.py +1 -123
  23. langfun/core/llms/anthropic_test.py +0 -48
  24. langfun/core/llms/deepseek.py +117 -0
  25. langfun/core/llms/deepseek_test.py +61 -0
  26. langfun/core/llms/gemini.py +1 -1
  27. langfun/core/llms/groq.py +12 -99
  28. langfun/core/llms/groq_test.py +31 -137
  29. langfun/core/llms/llama_cpp.py +17 -54
  30. langfun/core/llms/llama_cpp_test.py +2 -34
  31. langfun/core/llms/openai.py +9 -147
  32. langfun/core/llms/openai_compatible.py +179 -0
  33. langfun/core/llms/openai_compatible_test.py +495 -0
  34. langfun/core/llms/openai_test.py +13 -423
  35. langfun/core/llms/rest_test.py +1 -1
  36. langfun/core/llms/vertexai.py +387 -18
  37. langfun/core/llms/vertexai_test.py +52 -0
  38. langfun/core/message_test.py +3 -3
  39. langfun/core/modalities/mime.py +8 -0
  40. langfun/core/modalities/mime_test.py +19 -4
  41. langfun/core/modality_test.py +0 -1
  42. langfun/core/structured/mapping.py +13 -13
  43. langfun/core/structured/mapping_test.py +2 -2
  44. langfun/core/structured/schema.py +16 -8
  45. langfun/core/structured/schema_generation.py +1 -1
  46. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/METADATA +13 -2
  47. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/RECORD +50 -52
  48. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/WHEEL +1 -1
  49. langfun/core/coding/python/errors.py +0 -108
  50. langfun/core/coding/python/errors_test.py +0 -99
  51. langfun/core/coding/python/permissions.py +0 -90
  52. langfun/core/coding/python/permissions_test.py +0 -86
  53. langfun/core/text_formatting.py +0 -168
  54. langfun/core/text_formatting_test.py +0 -65
  55. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/LICENSE +0 -0
  56. {langfun-0.1.2.dev202501080804.dist-info → langfun-0.1.2.dev202501240804.dist-info}/top_level.txt +0 -0
@@ -17,8 +17,7 @@ import os
17
17
  from typing import Annotated, Any
18
18
 
19
19
  import langfun.core as lf
20
- from langfun.core import modalities as lf_modalities
21
- from langfun.core.llms import rest
20
+ from langfun.core.llms import openai_compatible
22
21
  import pyglove as pg
23
22
 
24
23
 
@@ -299,7 +298,7 @@ SUPPORTED_MODELS_AND_SETTINGS = {
299
298
 
300
299
 
301
300
  @lf.use_init_args(['model'])
302
- class OpenAI(rest.REST):
301
+ class OpenAI(openai_compatible.OpenAICompatible):
303
302
  """OpenAI model."""
304
303
 
305
304
  model: pg.typing.Annotated[
@@ -311,11 +310,6 @@ class OpenAI(rest.REST):
311
310
 
312
311
  api_endpoint: str = 'https://api.openai.com/v1/chat/completions'
313
312
 
314
- multimodal: Annotated[
315
- bool,
316
- 'Whether this model has multimodal support.'
317
- ] = False
318
-
319
313
  api_key: Annotated[
320
314
  str | None,
321
315
  (
@@ -363,10 +357,9 @@ class OpenAI(rest.REST):
363
357
 
364
358
  @property
365
359
  def headers(self) -> dict[str, Any]:
366
- headers = {
367
- 'Content-Type': 'application/json',
368
- 'Authorization': f'Bearer {self._api_key}',
369
- }
360
+ assert self._api_initialized
361
+ headers = super().headers
362
+ headers['Authorization'] = f'Bearer {self._api_key}'
370
363
  if self._organization:
371
364
  headers['OpenAI-Organization'] = self._organization
372
365
  if self._project:
@@ -411,141 +404,10 @@ class OpenAI(rest.REST):
411
404
 
412
405
  def _request_args(
413
406
  self, options: lf.LMSamplingOptions) -> dict[str, Any]:
414
- # Reference:
415
- # https://platform.openai.com/docs/api-reference/completions/create
416
- # NOTE(daiyip): options.top_k is not applicable.
417
- args = dict(
418
- model=self.model,
419
- n=options.n,
420
- top_logprobs=options.top_logprobs,
421
- )
422
- if options.logprobs:
423
- # Reasoning models (o1 series) does not support `logprobs` by 2024/09/12.
424
- if self.model.startswith('o1-'):
425
- raise RuntimeError('`logprobs` is not supported on {self.model!r}.')
426
- args['logprobs'] = options.logprobs
427
-
428
- if options.temperature is not None:
429
- args['temperature'] = options.temperature
430
- if options.max_tokens is not None:
431
- args['max_completion_tokens'] = options.max_tokens
432
- if options.top_p is not None:
433
- args['top_p'] = options.top_p
434
- if options.stop:
435
- args['stop'] = options.stop
436
- if options.random_seed is not None:
437
- args['seed'] = options.random_seed
438
- return args
439
-
440
- def _content_from_message(self, message: lf.Message):
441
- """Returns a OpenAI content object from a Langfun message."""
442
- def _uri_from(chunk: lf.Modality) -> str:
443
- if chunk.uri and chunk.uri.lower().startswith(
444
- ('http:', 'https:', 'ftp:')
445
- ):
446
- return chunk.uri
447
- return chunk.content_uri
448
-
449
- content = []
450
- for chunk in message.chunk():
451
- if isinstance(chunk, str):
452
- item = dict(type='text', text=chunk)
453
- elif isinstance(chunk, lf_modalities.Image) and self.multimodal:
454
- item = dict(type='image_url', image_url=dict(url=_uri_from(chunk)))
455
- else:
456
- raise ValueError(f'Unsupported modality: {chunk!r}.')
457
- content.append(item)
458
- return content
459
-
460
- def request(
461
- self,
462
- prompt: lf.Message,
463
- sampling_options: lf.LMSamplingOptions
464
- ) -> dict[str, Any]:
465
- """Returns the JSON input for a message."""
466
- request_args = self._request_args(sampling_options)
467
-
468
- # Users could use `metadata_json_schema` to pass additional
469
- # request arguments.
470
- json_schema = prompt.metadata.get('json_schema')
471
- if json_schema is not None:
472
- if not isinstance(json_schema, dict):
473
- raise ValueError(
474
- f'`json_schema` must be a dict, got {json_schema!r}.'
475
- )
476
- if 'title' not in json_schema:
477
- raise ValueError(
478
- f'The root of `json_schema` must have a `title` field, '
479
- f'got {json_schema!r}.'
480
- )
481
- request_args.update(
482
- response_format=dict(
483
- type='json_schema',
484
- json_schema=dict(
485
- schema=json_schema,
486
- name=json_schema['title'],
487
- strict=True,
488
- )
489
- )
490
- )
491
- prompt.metadata.formatted_text = (
492
- prompt.text
493
- + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
494
- + pg.to_json_str(request_args['response_format'], json_indent=2)
495
- )
496
-
497
- # Prepare messages.
498
- messages = []
499
- # Users could use `metadata_system_message` to pass system message.
500
- system_message = prompt.metadata.get('system_message')
501
- if system_message:
502
- system_message = lf.SystemMessage.from_value(system_message)
503
- messages.append(
504
- dict(role='system',
505
- content=self._content_from_message(system_message))
506
- )
507
- messages.append(
508
- dict(role='user', content=self._content_from_message(prompt))
509
- )
510
- request = dict()
511
- request.update(request_args)
512
- request['messages'] = messages
513
- return request
514
-
515
- def _parse_choice(self, choice: dict[str, Any]) -> lf.LMSample:
516
- # Reference:
517
- # https://platform.openai.com/docs/api-reference/chat/object
518
- logprobs = None
519
- choice_logprobs = choice.get('logprobs')
520
- if choice_logprobs:
521
- logprobs = [
522
- (
523
- t['token'],
524
- t['logprob'],
525
- [(tt['token'], tt['logprob']) for tt in t['top_logprobs']],
526
- )
527
- for t in choice_logprobs['content']
528
- ]
529
- return lf.LMSample(
530
- choice['message']['content'],
531
- score=0.0,
532
- logprobs=logprobs,
533
- )
534
-
535
- def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
536
- usage = json['usage']
537
- return lf.LMSamplingResult(
538
- samples=[self._parse_choice(choice) for choice in json['choices']],
539
- usage=lf.LMSamplingUsage(
540
- prompt_tokens=usage['prompt_tokens'],
541
- completion_tokens=usage['completion_tokens'],
542
- total_tokens=usage['total_tokens'],
543
- estimated_cost=self.estimate_cost(
544
- num_input_tokens=usage['prompt_tokens'],
545
- num_output_tokens=usage['completion_tokens'],
546
- )
547
- ),
548
- )
407
+ # Reasoning models (o1 series) does not support `logprobs` by 2024/09/12.
408
+ if options.logprobs and self.model.startswith(('o1-', 'o3-')):
409
+ raise RuntimeError('`logprobs` is not supported on {self.model!r}.')
410
+ return super()._request_args(options)
549
411
 
550
412
 
551
413
  class GptO1(OpenAI):
@@ -0,0 +1,179 @@
1
+ # Copyright 2025 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Base for OpenAI compatible models (including OpenAI)."""
15
+
16
+ from typing import Annotated, Any
17
+
18
+ import langfun.core as lf
19
+ from langfun.core import modalities as lf_modalities
20
+ from langfun.core.llms import rest
21
+ import pyglove as pg
22
+
23
+
24
+ @lf.use_init_args(['api_endpoint', 'model'])
25
+ class OpenAICompatible(rest.REST):
26
+ """Base for OpenAI compatible models."""
27
+
28
+ model: Annotated[
29
+ str, 'The name of the model to use.',
30
+ ] = ''
31
+
32
+ multimodal: Annotated[
33
+ bool, 'Whether this model has multimodal support.'
34
+ ] = False
35
+
36
+ @property
37
+ def headers(self) -> dict[str, Any]:
38
+ return {
39
+ 'Content-Type': 'application/json'
40
+ }
41
+
42
+ def _request_args(
43
+ self, options: lf.LMSamplingOptions) -> dict[str, Any]:
44
+ """Returns a dict as request arguments."""
45
+ # Reference:
46
+ # https://platform.openai.com/docs/api-reference/completions/create
47
+ # NOTE(daiyip): options.top_k is not applicable.
48
+ args = dict(
49
+ n=options.n,
50
+ top_logprobs=options.top_logprobs,
51
+ )
52
+ if self.model:
53
+ args['model'] = self.model
54
+ if options.logprobs:
55
+ args['logprobs'] = options.logprobs
56
+ if options.temperature is not None:
57
+ args['temperature'] = options.temperature
58
+ if options.max_tokens is not None:
59
+ args['max_completion_tokens'] = options.max_tokens
60
+ if options.top_p is not None:
61
+ args['top_p'] = options.top_p
62
+ if options.stop:
63
+ args['stop'] = options.stop
64
+ if options.random_seed is not None:
65
+ args['seed'] = options.random_seed
66
+ return args
67
+
68
+ def _content_from_message(self, message: lf.Message) -> list[dict[str, Any]]:
69
+ """Returns a OpenAI content object from a Langfun message."""
70
+ content = []
71
+ for chunk in message.chunk():
72
+ if isinstance(chunk, str):
73
+ item = dict(type='text', text=chunk)
74
+ elif isinstance(chunk, lf_modalities.Image) and self.multimodal:
75
+ item = dict(type='image_url', image_url=dict(url=chunk.embeddable_uri))
76
+ else:
77
+ raise ValueError(f'Unsupported modality: {chunk!r}.')
78
+ content.append(item)
79
+ return content
80
+
81
+ def request(
82
+ self,
83
+ prompt: lf.Message,
84
+ sampling_options: lf.LMSamplingOptions
85
+ ) -> dict[str, Any]:
86
+ """Returns the JSON input for a message."""
87
+ request_args = self._request_args(sampling_options)
88
+
89
+ # Users could use `metadata_json_schema` to pass additional
90
+ # request arguments.
91
+ json_schema = prompt.metadata.get('json_schema')
92
+ if json_schema is not None:
93
+ if not isinstance(json_schema, dict):
94
+ raise ValueError(
95
+ f'`json_schema` must be a dict, got {json_schema!r}.'
96
+ )
97
+ if 'title' not in json_schema:
98
+ raise ValueError(
99
+ f'The root of `json_schema` must have a `title` field, '
100
+ f'got {json_schema!r}.'
101
+ )
102
+ request_args.update(
103
+ response_format=dict(
104
+ type='json_schema',
105
+ json_schema=dict(
106
+ schema=json_schema,
107
+ name=json_schema['title'],
108
+ strict=True,
109
+ )
110
+ )
111
+ )
112
+ prompt.metadata.formatted_text = (
113
+ prompt.text
114
+ + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
115
+ + pg.to_json_str(request_args['response_format'], json_indent=2)
116
+ )
117
+
118
+ # Prepare messages.
119
+ messages = []
120
+ # Users could use `metadata_system_message` to pass system message.
121
+ system_message = prompt.metadata.get('system_message')
122
+ if system_message:
123
+ system_message = lf.SystemMessage.from_value(system_message)
124
+ messages.append(
125
+ dict(role='system',
126
+ content=self._content_from_message(system_message))
127
+ )
128
+ messages.append(
129
+ dict(role='user', content=self._content_from_message(prompt))
130
+ )
131
+ request = dict()
132
+ request.update(request_args)
133
+ request['messages'] = messages
134
+ return request
135
+
136
+ def _parse_choice(self, choice: dict[str, Any]) -> lf.LMSample:
137
+ # Reference:
138
+ # https://platform.openai.com/docs/api-reference/chat/object
139
+ logprobs = None
140
+ choice_logprobs = choice.get('logprobs')
141
+ if choice_logprobs:
142
+ logprobs = [
143
+ (
144
+ t['token'],
145
+ t['logprob'],
146
+ [(tt['token'], tt['logprob']) for tt in t['top_logprobs']],
147
+ )
148
+ for t in choice_logprobs['content']
149
+ ]
150
+ return lf.LMSample(
151
+ choice['message']['content'],
152
+ score=0.0,
153
+ logprobs=logprobs,
154
+ )
155
+
156
+ def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
157
+ """Returns a LMSamplingResult from a JSON response."""
158
+ usage = json['usage']
159
+ return lf.LMSamplingResult(
160
+ samples=[self._parse_choice(choice) for choice in json['choices']],
161
+ usage=lf.LMSamplingUsage(
162
+ prompt_tokens=usage['prompt_tokens'],
163
+ completion_tokens=usage['completion_tokens'],
164
+ total_tokens=usage['total_tokens'],
165
+ estimated_cost=self.estimate_cost(
166
+ num_input_tokens=usage['prompt_tokens'],
167
+ num_output_tokens=usage['completion_tokens'],
168
+ )
169
+ ),
170
+ )
171
+
172
+ def estimate_cost(
173
+ self,
174
+ num_input_tokens: int,
175
+ num_output_tokens: int
176
+ ) -> float | None:
177
+ """Estimate the cost based on usage."""
178
+ del num_input_tokens, num_output_tokens
179
+ return None