langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +7 -1
  3. langfun/core/agentic/__init__.py +8 -1
  4. langfun/core/agentic/action.py +740 -112
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +189 -24
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +11 -2
  20. langfun/core/data/conversion/gemini_test.py +48 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +48 -44
  24. langfun/core/eval/base_test.py +5 -5
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +3 -0
  29. langfun/core/eval/v2/checkpointing.py +148 -46
  30. langfun/core/eval/v2/checkpointing_test.py +9 -2
  31. langfun/core/eval/v2/config_saver.py +37 -0
  32. langfun/core/eval/v2/config_saver_test.py +36 -0
  33. langfun/core/eval/v2/eval_test_helper.py +104 -3
  34. langfun/core/eval/v2/evaluation.py +102 -19
  35. langfun/core/eval/v2/evaluation_test.py +9 -3
  36. langfun/core/eval/v2/example.py +50 -40
  37. langfun/core/eval/v2/example_test.py +16 -8
  38. langfun/core/eval/v2/experiment.py +95 -20
  39. langfun/core/eval/v2/experiment_test.py +19 -0
  40. langfun/core/eval/v2/metric_values.py +31 -3
  41. langfun/core/eval/v2/metric_values_test.py +32 -0
  42. langfun/core/eval/v2/metrics.py +157 -44
  43. langfun/core/eval/v2/metrics_test.py +39 -18
  44. langfun/core/eval/v2/progress.py +31 -1
  45. langfun/core/eval/v2/progress_test.py +27 -0
  46. langfun/core/eval/v2/progress_tracking.py +13 -5
  47. langfun/core/eval/v2/progress_tracking_test.py +9 -1
  48. langfun/core/eval/v2/reporting.py +88 -71
  49. langfun/core/eval/v2/reporting_test.py +24 -6
  50. langfun/core/eval/v2/runners/__init__.py +30 -0
  51. langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
  52. langfun/core/eval/v2/runners/beam.py +354 -0
  53. langfun/core/eval/v2/runners/beam_test.py +153 -0
  54. langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
  55. langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
  56. langfun/core/eval/v2/runners/debug.py +40 -0
  57. langfun/core/eval/v2/runners/debug_test.py +76 -0
  58. langfun/core/eval/v2/runners/parallel.py +243 -0
  59. langfun/core/eval/v2/runners/parallel_test.py +182 -0
  60. langfun/core/eval/v2/runners/sequential.py +47 -0
  61. langfun/core/eval/v2/runners/sequential_test.py +169 -0
  62. langfun/core/langfunc.py +45 -130
  63. langfun/core/langfunc_test.py +7 -5
  64. langfun/core/language_model.py +189 -36
  65. langfun/core/language_model_test.py +54 -3
  66. langfun/core/llms/__init__.py +14 -1
  67. langfun/core/llms/anthropic.py +157 -2
  68. langfun/core/llms/azure_openai.py +29 -17
  69. langfun/core/llms/cache/base.py +25 -3
  70. langfun/core/llms/cache/in_memory.py +48 -7
  71. langfun/core/llms/cache/in_memory_test.py +14 -4
  72. langfun/core/llms/compositional.py +25 -1
  73. langfun/core/llms/deepseek.py +30 -2
  74. langfun/core/llms/fake.py +32 -1
  75. langfun/core/llms/gemini.py +90 -12
  76. langfun/core/llms/gemini_test.py +110 -0
  77. langfun/core/llms/google_genai.py +52 -1
  78. langfun/core/llms/groq.py +28 -3
  79. langfun/core/llms/llama_cpp.py +23 -4
  80. langfun/core/llms/openai.py +120 -3
  81. langfun/core/llms/openai_compatible.py +148 -27
  82. langfun/core/llms/openai_compatible_test.py +207 -20
  83. langfun/core/llms/openai_test.py +0 -2
  84. langfun/core/llms/rest.py +16 -1
  85. langfun/core/llms/vertexai.py +78 -8
  86. langfun/core/logging.py +1 -1
  87. langfun/core/mcp/__init__.py +10 -0
  88. langfun/core/mcp/client.py +177 -0
  89. langfun/core/mcp/client_test.py +71 -0
  90. langfun/core/mcp/session.py +241 -0
  91. langfun/core/mcp/session_test.py +54 -0
  92. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  93. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  94. langfun/core/mcp/tool.py +254 -0
  95. langfun/core/mcp/tool_test.py +197 -0
  96. langfun/core/memory.py +1 -0
  97. langfun/core/message.py +160 -55
  98. langfun/core/message_test.py +65 -81
  99. langfun/core/modalities/__init__.py +8 -0
  100. langfun/core/modalities/audio.py +21 -1
  101. langfun/core/modalities/image.py +73 -3
  102. langfun/core/modalities/image_test.py +116 -0
  103. langfun/core/modalities/mime.py +78 -4
  104. langfun/core/modalities/mime_test.py +59 -0
  105. langfun/core/modalities/pdf.py +19 -1
  106. langfun/core/modalities/video.py +21 -1
  107. langfun/core/modality.py +167 -29
  108. langfun/core/modality_test.py +42 -12
  109. langfun/core/natural_language.py +1 -1
  110. langfun/core/sampling.py +4 -4
  111. langfun/core/sampling_test.py +20 -4
  112. langfun/core/structured/__init__.py +2 -24
  113. langfun/core/structured/completion.py +34 -44
  114. langfun/core/structured/completion_test.py +23 -43
  115. langfun/core/structured/description.py +54 -50
  116. langfun/core/structured/function_generation.py +29 -12
  117. langfun/core/structured/mapping.py +81 -37
  118. langfun/core/structured/parsing.py +95 -79
  119. langfun/core/structured/parsing_test.py +0 -3
  120. langfun/core/structured/querying.py +230 -154
  121. langfun/core/structured/querying_test.py +69 -33
  122. langfun/core/structured/schema/__init__.py +49 -0
  123. langfun/core/structured/schema/base.py +664 -0
  124. langfun/core/structured/schema/base_test.py +531 -0
  125. langfun/core/structured/schema/json.py +174 -0
  126. langfun/core/structured/schema/json_test.py +121 -0
  127. langfun/core/structured/schema/python.py +316 -0
  128. langfun/core/structured/schema/python_test.py +410 -0
  129. langfun/core/structured/schema_generation.py +33 -14
  130. langfun/core/structured/scoring.py +47 -36
  131. langfun/core/structured/tokenization.py +26 -11
  132. langfun/core/subscription.py +2 -2
  133. langfun/core/template.py +175 -50
  134. langfun/core/template_test.py +123 -17
  135. langfun/env/__init__.py +43 -0
  136. langfun/env/base_environment.py +827 -0
  137. langfun/env/base_environment_test.py +473 -0
  138. langfun/env/base_feature.py +304 -0
  139. langfun/env/base_feature_test.py +228 -0
  140. langfun/env/base_sandbox.py +842 -0
  141. langfun/env/base_sandbox_test.py +1235 -0
  142. langfun/env/event_handlers/__init__.py +14 -0
  143. langfun/env/event_handlers/chain.py +233 -0
  144. langfun/env/event_handlers/chain_test.py +253 -0
  145. langfun/env/event_handlers/event_logger.py +472 -0
  146. langfun/env/event_handlers/event_logger_test.py +304 -0
  147. langfun/env/event_handlers/metric_writer.py +726 -0
  148. langfun/env/event_handlers/metric_writer_test.py +214 -0
  149. langfun/env/interface.py +1640 -0
  150. langfun/env/interface_test.py +153 -0
  151. langfun/env/load_balancers.py +59 -0
  152. langfun/env/load_balancers_test.py +141 -0
  153. langfun/env/test_utils.py +507 -0
  154. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
  155. langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
  156. langfun/core/eval/v2/runners_test.py +0 -343
  157. langfun/core/structured/schema.py +0 -987
  158. langfun/core/structured/schema_test.py +0 -982
  159. langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
  160. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
  161. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
  162. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0
@@ -151,6 +151,55 @@ SUPPORTED_MODELS = [
151
151
  #
152
152
  # Production models.
153
153
  #
154
+ # Gemini 3 Pro Preview
155
+ GeminiModelInfo(
156
+ model_id='gemini-3-pro-preview',
157
+ in_service=True,
158
+ provider=pg.oneof(['Google GenAI', 'VertexAI']),
159
+ model_type='instruction-tuned',
160
+ description='Gemini 3 Pro Preview.',
161
+ release_date=datetime.datetime(2025, 11, 18),
162
+ input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
163
+ context_length=lf.ModelInfo.ContextLength(
164
+ max_input_tokens=1_048_576,
165
+ max_output_tokens=65_536,
166
+ ),
167
+ pricing=GeminiModelInfo.Pricing(
168
+ cost_per_1m_cached_input_tokens=0.2,
169
+ cost_per_1m_input_tokens=2.0,
170
+ cost_per_1m_output_tokens=12.0,
171
+ cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
172
+ cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
173
+ cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
174
+ ),
175
+ rate_limits=lf.ModelInfo.RateLimits(
176
+ max_requests_per_minute=2000,
177
+ max_tokens_per_minute=4_000_000,
178
+ ),
179
+ ),
180
+ # Gemini 3 Pro Image Preview
181
+ GeminiModelInfo(
182
+ model_id='gemini-3-pro-image-preview',
183
+ in_service=True,
184
+ experimental=True,
185
+ provider=pg.oneof(['Google GenAI', 'VertexAI']),
186
+ model_type='instruction-tuned',
187
+ description=(
188
+ 'Gemini 3 Pro Image Preview for high-fidelity image generation,'
189
+ ' editing, and visual reasoning.'
190
+ ),
191
+ release_date=datetime.datetime(2025, 12, 9),
192
+ input_modalities=GeminiModelInfo.INPUT_IMAGE_TYPES
193
+ + GeminiModelInfo.INPUT_DOC_TYPES,
194
+ context_length=lf.ModelInfo.ContextLength(
195
+ max_input_tokens=65_536,
196
+ max_output_tokens=32_768,
197
+ ),
198
+ rate_limits=lf.ModelInfo.RateLimits(
199
+ max_requests_per_minute=200,
200
+ max_tokens_per_minute=1_000_000,
201
+ ),
202
+ ),
154
203
  # Gemini 2.5 Flash
155
204
  GeminiModelInfo(
156
205
  model_id='gemini-2.5-flash',
@@ -696,7 +745,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
696
745
 
697
746
  @pg.use_init_args(['model'])
698
747
  class Gemini(rest.REST):
699
- """Language models provided by Google GenAI."""
748
+ """Base class for Gemini models served on Google GenAI and Vertex AI.
749
+
750
+ This class implements the Gemini API protocol, shared by
751
+ `lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
752
+ formatting and response parsing for Gemini models.
753
+
754
+ It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
755
+ `lf.llms.VertexAI` instead.
756
+ """
700
757
 
701
758
  model: pg.typing.Annotated[
702
759
  pg.typing.Enum(
@@ -752,6 +809,13 @@ class Gemini(rest.REST):
752
809
  prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
753
810
  )
754
811
  request['contents'] = contents
812
+ request['toolConfig'] = {
813
+ 'functionCallingConfig': {
814
+ 'mode': 'NONE',
815
+ }
816
+ }
817
+ if sampling_options.extras:
818
+ request.update(sampling_options.extras)
755
819
  return request
756
820
 
757
821
  def _generation_config(
@@ -783,11 +847,21 @@ class Gemini(rest.REST):
783
847
  + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
784
848
  + pg.to_json_str(json_schema, json_indent=2)
785
849
  )
850
+ thinking_config_data = {}
786
851
  if options.max_thinking_tokens is not None:
787
- config['thinkingConfig'] = {
788
- 'includeThoughts': options.max_thinking_tokens > 0,
789
- 'thinkingBudget': options.max_thinking_tokens,
790
- }
852
+ thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
853
+ thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
854
+ if options.thinking_level is not None:
855
+ thinking_config_data['thinkingLevel'] = options.thinking_level
856
+ if thinking_config_data:
857
+ config['thinkingConfig'] = thinking_config_data
858
+
859
+ # This is the new feature since Gemini 3.
860
+ # Skip for image generation models as they don't support mediaResolution.
861
+ if self.model_id.startswith('gemini-3') and not (
862
+ self.response_modalities and 'IMAGE' in self.response_modalities
863
+ ):
864
+ config['mediaResolution'] = 'MEDIA_RESOLUTION_HIGH'
791
865
 
792
866
  if self.response_modalities:
793
867
  config['responseModalities'] = self.response_modalities
@@ -803,10 +877,14 @@ class Gemini(rest.REST):
803
877
  'No candidates found in response. This is a Gemini API issue that '
804
878
  'happens occasionally, and retrying should fix it. '
805
879
  )
806
- messages = [
807
- lf.Message.from_value(candidate['content'], format='gemini')
808
- for candidate in candidates
809
- ]
880
+
881
+ messages = []
882
+ for candidate in candidates:
883
+ message = lf.Message.from_value(candidate['content'], format='gemini')
884
+ if finish_reason := candidate.get('finishReason'):
885
+ message.metadata['finish_reason'] = finish_reason
886
+ messages.append(message)
887
+
810
888
  usage = json['usageMetadata']
811
889
  input_tokens = usage['promptTokenCount']
812
890
  # NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
@@ -828,9 +906,9 @@ class Gemini(rest.REST):
828
906
  )
829
907
 
830
908
  def _error(self, status_code: int, content: str) -> lf.LMError:
831
- if (
832
- status_code == 400
833
- and b'exceeds the maximum number of tokens' in content
909
+ if status_code == 400 and (
910
+ b'exceeds the maximum number of tokens' in content
911
+ or b'Reduce the input token count and try again.' in content
834
912
  ):
835
913
  return lf.ContextLimitError(f'{status_code}: {content}')
836
914
  return super()._error(status_code, content)
@@ -177,6 +177,58 @@ class GeminiTest(unittest.TestCase):
177
177
  ),
178
178
  )
179
179
 
180
+ # Add test for thinkingConfig with thinking_level.
181
+ actual = model._generation_config(
182
+ lf.UserMessage('hi'),
183
+ lf.LMSamplingOptions(
184
+ thinking_level='high',
185
+ ),
186
+ )
187
+ self.assertEqual(
188
+ actual,
189
+ dict(
190
+ candidateCount=1,
191
+ temperature=None,
192
+ topP=None,
193
+ topK=40,
194
+ maxOutputTokens=None,
195
+ stopSequences=None,
196
+ responseLogprobs=False,
197
+ logprobs=None,
198
+ seed=None,
199
+ thinkingConfig={'thinkingLevel': 'high'},
200
+ ),
201
+ )
202
+
203
+ # Add test for thinkingConfig with both max_thinking_tokens and
204
+ # thinking_level.
205
+ actual = model._generation_config(
206
+ lf.UserMessage('hi'),
207
+ lf.LMSamplingOptions(
208
+ max_thinking_tokens=100,
209
+ thinking_level='low',
210
+ ),
211
+ )
212
+ self.assertEqual(
213
+ actual,
214
+ dict(
215
+ candidateCount=1,
216
+ temperature=None,
217
+ topP=None,
218
+ topK=40,
219
+ maxOutputTokens=None,
220
+ stopSequences=None,
221
+ responseLogprobs=False,
222
+ logprobs=None,
223
+ seed=None,
224
+ thinkingConfig={
225
+ 'includeThoughts': True,
226
+ 'thinkingBudget': 100,
227
+ 'thinkingLevel': 'low',
228
+ },
229
+ ),
230
+ )
231
+
180
232
  with self.assertRaisesRegex(
181
233
  ValueError, '`json_schema` must be a dict, got'
182
234
  ):
@@ -185,6 +237,32 @@ class GeminiTest(unittest.TestCase):
185
237
  lf.LMSamplingOptions(),
186
238
  )
187
239
 
240
+ def test_media_resolution_for_gemini3(self):
241
+ model = gemini.Gemini('gemini-3-pro-preview', api_endpoint='')
242
+ config = model._generation_config(
243
+ lf.UserMessage('hi'),
244
+ lf.LMSamplingOptions(),
245
+ )
246
+ self.assertEqual(config.get('mediaResolution'), 'MEDIA_RESOLUTION_HIGH')
247
+
248
+ model = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
249
+ config = model._generation_config(
250
+ lf.UserMessage('hi'),
251
+ lf.LMSamplingOptions(),
252
+ )
253
+ self.assertIsNone(config.get('mediaResolution'))
254
+
255
+ def test_request_tool_config(self):
256
+ model = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
257
+ request = model.request(
258
+ lf.UserMessage('hi'),
259
+ lf.LMSamplingOptions(),
260
+ )
261
+ self.assertEqual(
262
+ request.get('toolConfig'),
263
+ {'functionCallingConfig': {'mode': 'NONE'}},
264
+ )
265
+
188
266
  def test_call_model(self):
189
267
  with mock.patch('requests.Session.post') as mock_generate:
190
268
  mock_generate.side_effect = mock_requests_post
@@ -225,6 +303,38 @@ class GeminiTest(unittest.TestCase):
225
303
  ):
226
304
  lm('hello')
227
305
 
306
+ def test_call_model_with_max_tokens_error(self):
307
+ def mock_requests_post_error(*args, **kwargs):
308
+ del args, kwargs
309
+ response = requests.Response()
310
+ response.status_code = 200
311
+ response._content = pg.to_json_str({
312
+ 'candidates': [
313
+ {
314
+ 'finishReason': 'MAX_TOKENS',
315
+ 'content': {
316
+ 'parts': [
317
+ {
318
+ 'text': 'This is'
319
+ }
320
+ ]
321
+ }
322
+ },
323
+ ],
324
+ 'usageMetadata': {
325
+ 'promptTokenCount': 3,
326
+ 'candidatesTokenCount': 4,
327
+ }
328
+ }).encode()
329
+ return response
330
+
331
+ with mock.patch('requests.Session.post') as mock_generate:
332
+ mock_generate.side_effect = mock_requests_post_error
333
+ lm = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
334
+ m = lm('hello')
335
+ self.assertEqual(m.metadata.finish_reason, 'MAX_TOKENS')
336
+ self.assertEqual(m.text, 'This is')
337
+
228
338
  def test_call_model_with_system_message(self):
229
339
  with mock.patch('requests.Session.post') as mock_generate:
230
340
  mock_generate.side_effect = mock_requests_post
@@ -25,7 +25,35 @@ import pyglove as pg
25
25
  @lf.use_init_args(['model'])
26
26
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
27
27
  class GenAI(gemini.Gemini):
28
- """Language models provided by Google GenAI."""
28
+ """Google GenAI models.
29
+
30
+ **Quick Start:**
31
+
32
+ ```python
33
+ import langfun as lf
34
+
35
+ # Call Gemini 1.5 Flash using API key from environment variable
36
+ # 'GOOGLE_API_KEY'.
37
+ lm = lf.llms.Gemini15Flash()
38
+ r = lm('Who are you?')
39
+ print(r)
40
+ ```
41
+
42
+ **Setting up API key:**
43
+
44
+ The Google API key can be specified in following ways:
45
+
46
+ 1. At model instantiation:
47
+
48
+ ```python
49
+ lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
50
+ ```
51
+ 2. via environment variable `GOOGLE_API_KEY`.
52
+
53
+ **References:**
54
+
55
+ * https://ai.google.dev/docs
56
+ """
29
57
 
30
58
  model: pg.typing.Annotated[
31
59
  pg.typing.Enum(
@@ -87,9 +115,32 @@ class GenAI(gemini.Gemini):
87
115
 
88
116
  # pylint: disable=invalid-name
89
117
 
118
+
90
119
  #
91
120
  # Experimental models.
92
121
  #
122
+ class Gemini3ProPreview(GenAI):
123
+ """Gemini 3 Pro Preview model."""
124
+
125
+ model = 'gemini-3-pro-preview'
126
+
127
+
128
+ class Gemini3ProImagePreview(GenAI):
129
+ """Gemini 3 Pro Image Preview model for high-fidelity image generation.
130
+
131
+ This model supports:
132
+ - Text-to-image generation
133
+ - Image editing (multimodal input)
134
+ - Visual reasoning
135
+
136
+ Key Requirements:
137
+ - responseModalities must include 'IMAGE'
138
+ - Supported aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4
139
+ - Image sizes: 1K (default), 2K, 4K
140
+ """
141
+
142
+ model = 'gemini-3-pro-image-preview'
143
+ response_modalities = ['TEXT', 'IMAGE']
93
144
 
94
145
 
95
146
  class Gemini25FlashImagePreview(GenAI):
langfun/core/llms/groq.py CHANGED
@@ -259,10 +259,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
259
259
 
260
260
 
261
261
  @lf.use_init_args(['model'])
262
- class Groq(openai_compatible.OpenAICompatible):
263
- """Groq LLMs through REST APIs (OpenAI compatible).
262
+ class Groq(openai_compatible.OpenAIChatCompletionAPI):
263
+ """Groq models.
264
264
 
265
- See https://platform.openai.com/docs/api-reference/chat
265
+ **Quick Start:**
266
+
267
+ ```python
268
+ import langfun as lf
269
+
270
+ # Call Llama 3.3 70B on Groq using API key from environment variable
271
+ # 'GROQ_API_KEY'.
272
+ lm = lf.llms.GroqLlama33_70B_Versatile()
273
+ r = lm('Who are you?')
274
+ print(r)
275
+ ```
276
+
277
+ **Setting up API key:**
278
+
279
+ The Groq API key can be specified in following ways:
280
+
281
+ 1. At model instantiation:
282
+
283
+ ```python
284
+ lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
285
+ ```
286
+ 2. via environment variable `GROQ_API_KEY`.
287
+
288
+ **References:**
289
+
290
+ * https://console.groq.com/docs
266
291
  """
267
292
 
268
293
  model: pg.typing.Annotated[
@@ -20,11 +20,30 @@ import pyglove as pg
20
20
 
21
21
  @pg.use_init_args(['url', 'model'])
22
22
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
23
- class LlamaCppRemote(openai_compatible.OpenAICompatible):
24
- """The remote LLaMA C++ model.
23
+ class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
24
+ """LLaMA C++ models served via a remote server.
25
25
 
26
- The Remote LLaMA C++ models can be launched via
27
- https://github.com/ggerganov/llama.cpp/tree/master/examples/server
26
+ This class provides an interface to interact with language models
27
+ hosted on a LLaMA C++ server, which is compatible with the OpenAI
28
+ Chat Completions API format.
29
+
30
+ **Quick Start:**
31
+
32
+ Assuming a LLaMA C++ server is running at `http://localhost:8080`,
33
+ you can interact with it as follows:
34
+
35
+ ```python
36
+ import langfun as lf
37
+
38
+ # If model name is not specified, it will use server's default.
39
+ lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
40
+ r = lm('Who are you?')
41
+ print(r)
42
+ ```
43
+
44
+ **References:**
45
+
46
+ * https://github.com/ggerganov/llama.cpp/tree/master/examples/server
28
47
  """
29
48
  url: Annotated[
30
49
  str,
@@ -49,6 +49,75 @@ class OpenAIModelInfo(lf.ModelInfo):
49
49
  #
50
50
 
51
51
  SUPPORTED_MODELS = [
52
+ # GPT-5 models
53
+ OpenAIModelInfo(
54
+ model_id='gpt-5.1',
55
+ in_service=True,
56
+ model_type='instruction-tuned',
57
+ description='GPT 5.1 model (latest stable).',
58
+ url='https://platform.openai.com/docs/models/gpt-5.1',
59
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
60
+ context_length=lf.ModelInfo.ContextLength(
61
+ max_input_tokens=400_000,
62
+ max_output_tokens=128_000,
63
+ ),
64
+ pricing=lf.ModelInfo.Pricing(
65
+ cost_per_1m_cached_input_tokens=0.13,
66
+ cost_per_1m_input_tokens=1.25,
67
+ cost_per_1m_output_tokens=10.0,
68
+ ),
69
+ # Tier 5 rate limits.
70
+ rate_limits=lf.ModelInfo.RateLimits(
71
+ max_requests_per_minute=15_000,
72
+ max_tokens_per_minute=40_000_000,
73
+ ),
74
+ ),
75
+ OpenAIModelInfo(
76
+ model_id='gpt-5',
77
+ alias_for='gpt-5-2025-08-07',
78
+ in_service=True,
79
+ model_type='instruction-tuned',
80
+ description='GPT 5 model (latest stable).',
81
+ url='https://platform.openai.com/docs/models/gpt-5',
82
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
83
+ context_length=lf.ModelInfo.ContextLength(
84
+ max_input_tokens=400_000,
85
+ max_output_tokens=128_000,
86
+ ),
87
+ pricing=lf.ModelInfo.Pricing(
88
+ cost_per_1m_cached_input_tokens=0.125,
89
+ cost_per_1m_input_tokens=1.25,
90
+ cost_per_1m_output_tokens=10.0,
91
+ ),
92
+ # Tier 5 rate limits.
93
+ rate_limits=lf.ModelInfo.RateLimits(
94
+ max_requests_per_minute=15_000,
95
+ max_tokens_per_minute=40_000_000,
96
+ ),
97
+ ),
98
+ OpenAIModelInfo(
99
+ model_id='gpt-5-mini',
100
+ alias_for='gpt-5-mini-2025-08-07',
101
+ in_service=True,
102
+ model_type='instruction-tuned',
103
+ description='GPT 5 mini model (latest stable).',
104
+ url='https://platform.openai.com/docs/models/gpt-5-mini',
105
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
106
+ context_length=lf.ModelInfo.ContextLength(
107
+ max_input_tokens=400_000,
108
+ max_output_tokens=128_000,
109
+ ),
110
+ pricing=lf.ModelInfo.Pricing(
111
+ cost_per_1m_cached_input_tokens=0.025,
112
+ cost_per_1m_input_tokens=0.25,
113
+ cost_per_1m_output_tokens=2.0,
114
+ ),
115
+ # Tier 5 rate limits.
116
+ rate_limits=lf.ModelInfo.RateLimits(
117
+ max_requests_per_minute=180_000_000,
118
+ max_tokens_per_minute=30_000_000,
119
+ ),
120
+ ),
52
121
  # GPT-4.1 models
53
122
  OpenAIModelInfo(
54
123
  model_id='gpt-4.1',
@@ -984,8 +1053,36 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
984
1053
 
985
1054
 
986
1055
  @lf.use_init_args(['model'])
987
- class OpenAI(openai_compatible.OpenAICompatible):
988
- """OpenAI model."""
1056
+ class OpenAI(openai_compatible.OpenAIResponsesAPI):
1057
+ """OpenAI models.
1058
+
1059
+ **Quick Start:**
1060
+
1061
+ ```python
1062
+ import langfun as lf
1063
+
1064
+ # Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
1065
+ lm = lf.llms.Gpt4o()
1066
+ r = lm('Who are you?')
1067
+ print(r)
1068
+ ```
1069
+
1070
+ **Setting up API key:**
1071
+
1072
+ The OpenAI API key can be specified in following ways:
1073
+
1074
+ 1. At model instantiation:
1075
+
1076
+ ```python
1077
+ lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
1078
+ ```
1079
+ 2. via environment variable `OPENAI_API_KEY`.
1080
+
1081
+ **References:**
1082
+
1083
+ * https://platform.openai.com/docs/models
1084
+ * https://platform.openai.com/docs/api-reference
1085
+ """
989
1086
 
990
1087
  model: pg.typing.Annotated[
991
1088
  pg.typing.Enum(
@@ -994,7 +1091,12 @@ class OpenAI(openai_compatible.OpenAICompatible):
994
1091
  'The name of the model to use.',
995
1092
  ]
996
1093
 
997
- api_endpoint: str = 'https://api.openai.com/v1/chat/completions'
1094
+ # Disable message storage by default.
1095
+ sampling_options = lf.LMSamplingOptions(
1096
+ extras={'store': False}
1097
+ )
1098
+
1099
+ api_endpoint: str = 'https://api.openai.com/v1/responses'
998
1100
 
999
1101
  api_key: Annotated[
1000
1102
  str | None,
@@ -1069,6 +1171,21 @@ class OpenAI(openai_compatible.OpenAICompatible):
1069
1171
  return super()._request_args(options)
1070
1172
 
1071
1173
 
1174
+ class Gpt51(OpenAI):
1175
+ """GPT-5.1."""
1176
+ model = 'gpt-5.1'
1177
+
1178
+
1179
+ class Gpt5(OpenAI):
1180
+ """GPT-5."""
1181
+ model = 'gpt-5'
1182
+
1183
+
1184
+ class Gpt5Mini(OpenAI):
1185
+ """GPT-5 mini."""
1186
+ model = 'gpt-5-mini'
1187
+
1188
+
1072
1189
  class Gpt41(OpenAI):
1073
1190
  """GPT-4.1."""
1074
1191
  model = 'gpt-4.1'