langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +7 -1
- langfun/core/agentic/__init__.py +8 -1
- langfun/core/agentic/action.py +740 -112
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +189 -24
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +11 -2
- langfun/core/data/conversion/gemini_test.py +48 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +48 -44
- langfun/core/eval/base_test.py +5 -5
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +3 -0
- langfun/core/eval/v2/checkpointing.py +148 -46
- langfun/core/eval/v2/checkpointing_test.py +9 -2
- langfun/core/eval/v2/config_saver.py +37 -0
- langfun/core/eval/v2/config_saver_test.py +36 -0
- langfun/core/eval/v2/eval_test_helper.py +104 -3
- langfun/core/eval/v2/evaluation.py +102 -19
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +50 -40
- langfun/core/eval/v2/example_test.py +16 -8
- langfun/core/eval/v2/experiment.py +95 -20
- langfun/core/eval/v2/experiment_test.py +19 -0
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +31 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking.py +13 -5
- langfun/core/eval/v2/progress_tracking_test.py +9 -1
- langfun/core/eval/v2/reporting.py +88 -71
- langfun/core/eval/v2/reporting_test.py +24 -6
- langfun/core/eval/v2/runners/__init__.py +30 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
- langfun/core/eval/v2/runners/beam.py +354 -0
- langfun/core/eval/v2/runners/beam_test.py +153 -0
- langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
- langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +76 -0
- langfun/core/eval/v2/runners/parallel.py +243 -0
- langfun/core/eval/v2/runners/parallel_test.py +182 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +169 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +7 -5
- langfun/core/language_model.py +189 -36
- langfun/core/language_model_test.py +54 -3
- langfun/core/llms/__init__.py +14 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +90 -12
- langfun/core/llms/gemini_test.py +110 -0
- langfun/core/llms/google_genai.py +52 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +120 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +78 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +254 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +73 -3
- langfun/core/modalities/image_test.py +116 -0
- langfun/core/modalities/mime.py +78 -4
- langfun/core/modalities/mime_test.py +59 -0
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +230 -154
- langfun/core/structured/querying_test.py +69 -33
- langfun/core/structured/schema/__init__.py +49 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +153 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +141 -0
- langfun/env/test_utils.py +507 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0
langfun/core/llms/gemini.py
CHANGED
|
@@ -151,6 +151,55 @@ SUPPORTED_MODELS = [
|
|
|
151
151
|
#
|
|
152
152
|
# Production models.
|
|
153
153
|
#
|
|
154
|
+
# Gemini 3 Pro Preview
|
|
155
|
+
GeminiModelInfo(
|
|
156
|
+
model_id='gemini-3-pro-preview',
|
|
157
|
+
in_service=True,
|
|
158
|
+
provider=pg.oneof(['Google GenAI', 'VertexAI']),
|
|
159
|
+
model_type='instruction-tuned',
|
|
160
|
+
description='Gemini 3 Pro Preview.',
|
|
161
|
+
release_date=datetime.datetime(2025, 11, 18),
|
|
162
|
+
input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
|
|
163
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
164
|
+
max_input_tokens=1_048_576,
|
|
165
|
+
max_output_tokens=65_536,
|
|
166
|
+
),
|
|
167
|
+
pricing=GeminiModelInfo.Pricing(
|
|
168
|
+
cost_per_1m_cached_input_tokens=0.2,
|
|
169
|
+
cost_per_1m_input_tokens=2.0,
|
|
170
|
+
cost_per_1m_output_tokens=12.0,
|
|
171
|
+
cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
|
|
172
|
+
cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
|
|
173
|
+
cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
|
|
174
|
+
),
|
|
175
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
176
|
+
max_requests_per_minute=2000,
|
|
177
|
+
max_tokens_per_minute=4_000_000,
|
|
178
|
+
),
|
|
179
|
+
),
|
|
180
|
+
# Gemini 3 Pro Image Preview
|
|
181
|
+
GeminiModelInfo(
|
|
182
|
+
model_id='gemini-3-pro-image-preview',
|
|
183
|
+
in_service=True,
|
|
184
|
+
experimental=True,
|
|
185
|
+
provider=pg.oneof(['Google GenAI', 'VertexAI']),
|
|
186
|
+
model_type='instruction-tuned',
|
|
187
|
+
description=(
|
|
188
|
+
'Gemini 3 Pro Image Preview for high-fidelity image generation,'
|
|
189
|
+
' editing, and visual reasoning.'
|
|
190
|
+
),
|
|
191
|
+
release_date=datetime.datetime(2025, 12, 9),
|
|
192
|
+
input_modalities=GeminiModelInfo.INPUT_IMAGE_TYPES
|
|
193
|
+
+ GeminiModelInfo.INPUT_DOC_TYPES,
|
|
194
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
195
|
+
max_input_tokens=65_536,
|
|
196
|
+
max_output_tokens=32_768,
|
|
197
|
+
),
|
|
198
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
199
|
+
max_requests_per_minute=200,
|
|
200
|
+
max_tokens_per_minute=1_000_000,
|
|
201
|
+
),
|
|
202
|
+
),
|
|
154
203
|
# Gemini 2.5 Flash
|
|
155
204
|
GeminiModelInfo(
|
|
156
205
|
model_id='gemini-2.5-flash',
|
|
@@ -696,7 +745,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
696
745
|
|
|
697
746
|
@pg.use_init_args(['model'])
|
|
698
747
|
class Gemini(rest.REST):
|
|
699
|
-
"""
|
|
748
|
+
"""Base class for Gemini models served on Google GenAI and Vertex AI.
|
|
749
|
+
|
|
750
|
+
This class implements the Gemini API protocol, shared by
|
|
751
|
+
`lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
|
|
752
|
+
formatting and response parsing for Gemini models.
|
|
753
|
+
|
|
754
|
+
It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
|
|
755
|
+
`lf.llms.VertexAI` instead.
|
|
756
|
+
"""
|
|
700
757
|
|
|
701
758
|
model: pg.typing.Annotated[
|
|
702
759
|
pg.typing.Enum(
|
|
@@ -752,6 +809,13 @@ class Gemini(rest.REST):
|
|
|
752
809
|
prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
|
|
753
810
|
)
|
|
754
811
|
request['contents'] = contents
|
|
812
|
+
request['toolConfig'] = {
|
|
813
|
+
'functionCallingConfig': {
|
|
814
|
+
'mode': 'NONE',
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
if sampling_options.extras:
|
|
818
|
+
request.update(sampling_options.extras)
|
|
755
819
|
return request
|
|
756
820
|
|
|
757
821
|
def _generation_config(
|
|
@@ -783,11 +847,21 @@ class Gemini(rest.REST):
|
|
|
783
847
|
+ '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
|
|
784
848
|
+ pg.to_json_str(json_schema, json_indent=2)
|
|
785
849
|
)
|
|
850
|
+
thinking_config_data = {}
|
|
786
851
|
if options.max_thinking_tokens is not None:
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
852
|
+
thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
|
|
853
|
+
thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
|
|
854
|
+
if options.thinking_level is not None:
|
|
855
|
+
thinking_config_data['thinkingLevel'] = options.thinking_level
|
|
856
|
+
if thinking_config_data:
|
|
857
|
+
config['thinkingConfig'] = thinking_config_data
|
|
858
|
+
|
|
859
|
+
# This is the new feature since Gemini 3.
|
|
860
|
+
# Skip for image generation models as they don't support mediaResolution.
|
|
861
|
+
if self.model_id.startswith('gemini-3') and not (
|
|
862
|
+
self.response_modalities and 'IMAGE' in self.response_modalities
|
|
863
|
+
):
|
|
864
|
+
config['mediaResolution'] = 'MEDIA_RESOLUTION_HIGH'
|
|
791
865
|
|
|
792
866
|
if self.response_modalities:
|
|
793
867
|
config['responseModalities'] = self.response_modalities
|
|
@@ -803,10 +877,14 @@ class Gemini(rest.REST):
|
|
|
803
877
|
'No candidates found in response. This is a Gemini API issue that '
|
|
804
878
|
'happens occasionally, and retrying should fix it. '
|
|
805
879
|
)
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
880
|
+
|
|
881
|
+
messages = []
|
|
882
|
+
for candidate in candidates:
|
|
883
|
+
message = lf.Message.from_value(candidate['content'], format='gemini')
|
|
884
|
+
if finish_reason := candidate.get('finishReason'):
|
|
885
|
+
message.metadata['finish_reason'] = finish_reason
|
|
886
|
+
messages.append(message)
|
|
887
|
+
|
|
810
888
|
usage = json['usageMetadata']
|
|
811
889
|
input_tokens = usage['promptTokenCount']
|
|
812
890
|
# NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
|
|
@@ -828,9 +906,9 @@ class Gemini(rest.REST):
|
|
|
828
906
|
)
|
|
829
907
|
|
|
830
908
|
def _error(self, status_code: int, content: str) -> lf.LMError:
|
|
831
|
-
if (
|
|
832
|
-
|
|
833
|
-
|
|
909
|
+
if status_code == 400 and (
|
|
910
|
+
b'exceeds the maximum number of tokens' in content
|
|
911
|
+
or b'Reduce the input token count and try again.' in content
|
|
834
912
|
):
|
|
835
913
|
return lf.ContextLimitError(f'{status_code}: {content}')
|
|
836
914
|
return super()._error(status_code, content)
|
langfun/core/llms/gemini_test.py
CHANGED
|
@@ -177,6 +177,58 @@ class GeminiTest(unittest.TestCase):
|
|
|
177
177
|
),
|
|
178
178
|
)
|
|
179
179
|
|
|
180
|
+
# Add test for thinkingConfig with thinking_level.
|
|
181
|
+
actual = model._generation_config(
|
|
182
|
+
lf.UserMessage('hi'),
|
|
183
|
+
lf.LMSamplingOptions(
|
|
184
|
+
thinking_level='high',
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
self.assertEqual(
|
|
188
|
+
actual,
|
|
189
|
+
dict(
|
|
190
|
+
candidateCount=1,
|
|
191
|
+
temperature=None,
|
|
192
|
+
topP=None,
|
|
193
|
+
topK=40,
|
|
194
|
+
maxOutputTokens=None,
|
|
195
|
+
stopSequences=None,
|
|
196
|
+
responseLogprobs=False,
|
|
197
|
+
logprobs=None,
|
|
198
|
+
seed=None,
|
|
199
|
+
thinkingConfig={'thinkingLevel': 'high'},
|
|
200
|
+
),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Add test for thinkingConfig with both max_thinking_tokens and
|
|
204
|
+
# thinking_level.
|
|
205
|
+
actual = model._generation_config(
|
|
206
|
+
lf.UserMessage('hi'),
|
|
207
|
+
lf.LMSamplingOptions(
|
|
208
|
+
max_thinking_tokens=100,
|
|
209
|
+
thinking_level='low',
|
|
210
|
+
),
|
|
211
|
+
)
|
|
212
|
+
self.assertEqual(
|
|
213
|
+
actual,
|
|
214
|
+
dict(
|
|
215
|
+
candidateCount=1,
|
|
216
|
+
temperature=None,
|
|
217
|
+
topP=None,
|
|
218
|
+
topK=40,
|
|
219
|
+
maxOutputTokens=None,
|
|
220
|
+
stopSequences=None,
|
|
221
|
+
responseLogprobs=False,
|
|
222
|
+
logprobs=None,
|
|
223
|
+
seed=None,
|
|
224
|
+
thinkingConfig={
|
|
225
|
+
'includeThoughts': True,
|
|
226
|
+
'thinkingBudget': 100,
|
|
227
|
+
'thinkingLevel': 'low',
|
|
228
|
+
},
|
|
229
|
+
),
|
|
230
|
+
)
|
|
231
|
+
|
|
180
232
|
with self.assertRaisesRegex(
|
|
181
233
|
ValueError, '`json_schema` must be a dict, got'
|
|
182
234
|
):
|
|
@@ -185,6 +237,32 @@ class GeminiTest(unittest.TestCase):
|
|
|
185
237
|
lf.LMSamplingOptions(),
|
|
186
238
|
)
|
|
187
239
|
|
|
240
|
+
def test_media_resolution_for_gemini3(self):
|
|
241
|
+
model = gemini.Gemini('gemini-3-pro-preview', api_endpoint='')
|
|
242
|
+
config = model._generation_config(
|
|
243
|
+
lf.UserMessage('hi'),
|
|
244
|
+
lf.LMSamplingOptions(),
|
|
245
|
+
)
|
|
246
|
+
self.assertEqual(config.get('mediaResolution'), 'MEDIA_RESOLUTION_HIGH')
|
|
247
|
+
|
|
248
|
+
model = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
|
|
249
|
+
config = model._generation_config(
|
|
250
|
+
lf.UserMessage('hi'),
|
|
251
|
+
lf.LMSamplingOptions(),
|
|
252
|
+
)
|
|
253
|
+
self.assertIsNone(config.get('mediaResolution'))
|
|
254
|
+
|
|
255
|
+
def test_request_tool_config(self):
|
|
256
|
+
model = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
|
|
257
|
+
request = model.request(
|
|
258
|
+
lf.UserMessage('hi'),
|
|
259
|
+
lf.LMSamplingOptions(),
|
|
260
|
+
)
|
|
261
|
+
self.assertEqual(
|
|
262
|
+
request.get('toolConfig'),
|
|
263
|
+
{'functionCallingConfig': {'mode': 'NONE'}},
|
|
264
|
+
)
|
|
265
|
+
|
|
188
266
|
def test_call_model(self):
|
|
189
267
|
with mock.patch('requests.Session.post') as mock_generate:
|
|
190
268
|
mock_generate.side_effect = mock_requests_post
|
|
@@ -225,6 +303,38 @@ class GeminiTest(unittest.TestCase):
|
|
|
225
303
|
):
|
|
226
304
|
lm('hello')
|
|
227
305
|
|
|
306
|
+
def test_call_model_with_max_tokens_error(self):
|
|
307
|
+
def mock_requests_post_error(*args, **kwargs):
|
|
308
|
+
del args, kwargs
|
|
309
|
+
response = requests.Response()
|
|
310
|
+
response.status_code = 200
|
|
311
|
+
response._content = pg.to_json_str({
|
|
312
|
+
'candidates': [
|
|
313
|
+
{
|
|
314
|
+
'finishReason': 'MAX_TOKENS',
|
|
315
|
+
'content': {
|
|
316
|
+
'parts': [
|
|
317
|
+
{
|
|
318
|
+
'text': 'This is'
|
|
319
|
+
}
|
|
320
|
+
]
|
|
321
|
+
}
|
|
322
|
+
},
|
|
323
|
+
],
|
|
324
|
+
'usageMetadata': {
|
|
325
|
+
'promptTokenCount': 3,
|
|
326
|
+
'candidatesTokenCount': 4,
|
|
327
|
+
}
|
|
328
|
+
}).encode()
|
|
329
|
+
return response
|
|
330
|
+
|
|
331
|
+
with mock.patch('requests.Session.post') as mock_generate:
|
|
332
|
+
mock_generate.side_effect = mock_requests_post_error
|
|
333
|
+
lm = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
|
|
334
|
+
m = lm('hello')
|
|
335
|
+
self.assertEqual(m.metadata.finish_reason, 'MAX_TOKENS')
|
|
336
|
+
self.assertEqual(m.text, 'This is')
|
|
337
|
+
|
|
228
338
|
def test_call_model_with_system_message(self):
|
|
229
339
|
with mock.patch('requests.Session.post') as mock_generate:
|
|
230
340
|
mock_generate.side_effect = mock_requests_post
|
|
@@ -25,7 +25,35 @@ import pyglove as pg
|
|
|
25
25
|
@lf.use_init_args(['model'])
|
|
26
26
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
27
27
|
class GenAI(gemini.Gemini):
|
|
28
|
-
"""
|
|
28
|
+
"""Google GenAI models.
|
|
29
|
+
|
|
30
|
+
**Quick Start:**
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import langfun as lf
|
|
34
|
+
|
|
35
|
+
# Call Gemini 1.5 Flash using API key from environment variable
|
|
36
|
+
# 'GOOGLE_API_KEY'.
|
|
37
|
+
lm = lf.llms.Gemini15Flash()
|
|
38
|
+
r = lm('Who are you?')
|
|
39
|
+
print(r)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Setting up API key:**
|
|
43
|
+
|
|
44
|
+
The Google API key can be specified in following ways:
|
|
45
|
+
|
|
46
|
+
1. At model instantiation:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
|
|
50
|
+
```
|
|
51
|
+
2. via environment variable `GOOGLE_API_KEY`.
|
|
52
|
+
|
|
53
|
+
**References:**
|
|
54
|
+
|
|
55
|
+
* https://ai.google.dev/docs
|
|
56
|
+
"""
|
|
29
57
|
|
|
30
58
|
model: pg.typing.Annotated[
|
|
31
59
|
pg.typing.Enum(
|
|
@@ -87,9 +115,32 @@ class GenAI(gemini.Gemini):
|
|
|
87
115
|
|
|
88
116
|
# pylint: disable=invalid-name
|
|
89
117
|
|
|
118
|
+
|
|
90
119
|
#
|
|
91
120
|
# Experimental models.
|
|
92
121
|
#
|
|
122
|
+
class Gemini3ProPreview(GenAI):
|
|
123
|
+
"""Gemini 3 Pro Preview model."""
|
|
124
|
+
|
|
125
|
+
model = 'gemini-3-pro-preview'
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class Gemini3ProImagePreview(GenAI):
|
|
129
|
+
"""Gemini 3 Pro Image Preview model for high-fidelity image generation.
|
|
130
|
+
|
|
131
|
+
This model supports:
|
|
132
|
+
- Text-to-image generation
|
|
133
|
+
- Image editing (multimodal input)
|
|
134
|
+
- Visual reasoning
|
|
135
|
+
|
|
136
|
+
Key Requirements:
|
|
137
|
+
- responseModalities must include 'IMAGE'
|
|
138
|
+
- Supported aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4
|
|
139
|
+
- Image sizes: 1K (default), 2K, 4K
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
model = 'gemini-3-pro-image-preview'
|
|
143
|
+
response_modalities = ['TEXT', 'IMAGE']
|
|
93
144
|
|
|
94
145
|
|
|
95
146
|
class Gemini25FlashImagePreview(GenAI):
|
langfun/core/llms/groq.py
CHANGED
|
@@ -259,10 +259,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
259
259
|
|
|
260
260
|
|
|
261
261
|
@lf.use_init_args(['model'])
|
|
262
|
-
class Groq(openai_compatible.
|
|
263
|
-
"""Groq
|
|
262
|
+
class Groq(openai_compatible.OpenAIChatCompletionAPI):
|
|
263
|
+
"""Groq models.
|
|
264
264
|
|
|
265
|
-
|
|
265
|
+
**Quick Start:**
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
import langfun as lf
|
|
269
|
+
|
|
270
|
+
# Call Llama 3.3 70B on Groq using API key from environment variable
|
|
271
|
+
# 'GROQ_API_KEY'.
|
|
272
|
+
lm = lf.llms.GroqLlama33_70B_Versatile()
|
|
273
|
+
r = lm('Who are you?')
|
|
274
|
+
print(r)
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**Setting up API key:**
|
|
278
|
+
|
|
279
|
+
The Groq API key can be specified in following ways:
|
|
280
|
+
|
|
281
|
+
1. At model instantiation:
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
|
|
285
|
+
```
|
|
286
|
+
2. via environment variable `GROQ_API_KEY`.
|
|
287
|
+
|
|
288
|
+
**References:**
|
|
289
|
+
|
|
290
|
+
* https://console.groq.com/docs
|
|
266
291
|
"""
|
|
267
292
|
|
|
268
293
|
model: pg.typing.Annotated[
|
langfun/core/llms/llama_cpp.py
CHANGED
|
@@ -20,11 +20,30 @@ import pyglove as pg
|
|
|
20
20
|
|
|
21
21
|
@pg.use_init_args(['url', 'model'])
|
|
22
22
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
23
|
-
class LlamaCppRemote(openai_compatible.
|
|
24
|
-
"""
|
|
23
|
+
class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
|
|
24
|
+
"""LLaMA C++ models served via a remote server.
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
This class provides an interface to interact with language models
|
|
27
|
+
hosted on a LLaMA C++ server, which is compatible with the OpenAI
|
|
28
|
+
Chat Completions API format.
|
|
29
|
+
|
|
30
|
+
**Quick Start:**
|
|
31
|
+
|
|
32
|
+
Assuming a LLaMA C++ server is running at `http://localhost:8080`,
|
|
33
|
+
you can interact with it as follows:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import langfun as lf
|
|
37
|
+
|
|
38
|
+
# If model name is not specified, it will use server's default.
|
|
39
|
+
lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
|
|
40
|
+
r = lm('Who are you?')
|
|
41
|
+
print(r)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**References:**
|
|
45
|
+
|
|
46
|
+
* https://github.com/ggerganov/llama.cpp/tree/master/examples/server
|
|
28
47
|
"""
|
|
29
48
|
url: Annotated[
|
|
30
49
|
str,
|
langfun/core/llms/openai.py
CHANGED
|
@@ -49,6 +49,75 @@ class OpenAIModelInfo(lf.ModelInfo):
|
|
|
49
49
|
#
|
|
50
50
|
|
|
51
51
|
SUPPORTED_MODELS = [
|
|
52
|
+
# GPT-5 models
|
|
53
|
+
OpenAIModelInfo(
|
|
54
|
+
model_id='gpt-5.1',
|
|
55
|
+
in_service=True,
|
|
56
|
+
model_type='instruction-tuned',
|
|
57
|
+
description='GPT 5.1 model (latest stable).',
|
|
58
|
+
url='https://platform.openai.com/docs/models/gpt-5.1',
|
|
59
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
|
60
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
61
|
+
max_input_tokens=400_000,
|
|
62
|
+
max_output_tokens=128_000,
|
|
63
|
+
),
|
|
64
|
+
pricing=lf.ModelInfo.Pricing(
|
|
65
|
+
cost_per_1m_cached_input_tokens=0.13,
|
|
66
|
+
cost_per_1m_input_tokens=1.25,
|
|
67
|
+
cost_per_1m_output_tokens=10.0,
|
|
68
|
+
),
|
|
69
|
+
# Tier 5 rate limits.
|
|
70
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
71
|
+
max_requests_per_minute=15_000,
|
|
72
|
+
max_tokens_per_minute=40_000_000,
|
|
73
|
+
),
|
|
74
|
+
),
|
|
75
|
+
OpenAIModelInfo(
|
|
76
|
+
model_id='gpt-5',
|
|
77
|
+
alias_for='gpt-5-2025-08-07',
|
|
78
|
+
in_service=True,
|
|
79
|
+
model_type='instruction-tuned',
|
|
80
|
+
description='GPT 5 model (latest stable).',
|
|
81
|
+
url='https://platform.openai.com/docs/models/gpt-5',
|
|
82
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
|
83
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
84
|
+
max_input_tokens=400_000,
|
|
85
|
+
max_output_tokens=128_000,
|
|
86
|
+
),
|
|
87
|
+
pricing=lf.ModelInfo.Pricing(
|
|
88
|
+
cost_per_1m_cached_input_tokens=0.125,
|
|
89
|
+
cost_per_1m_input_tokens=1.25,
|
|
90
|
+
cost_per_1m_output_tokens=10.0,
|
|
91
|
+
),
|
|
92
|
+
# Tier 5 rate limits.
|
|
93
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
94
|
+
max_requests_per_minute=15_000,
|
|
95
|
+
max_tokens_per_minute=40_000_000,
|
|
96
|
+
),
|
|
97
|
+
),
|
|
98
|
+
OpenAIModelInfo(
|
|
99
|
+
model_id='gpt-5-mini',
|
|
100
|
+
alias_for='gpt-5-mini-2025-08-07',
|
|
101
|
+
in_service=True,
|
|
102
|
+
model_type='instruction-tuned',
|
|
103
|
+
description='GPT 5 mini model (latest stable).',
|
|
104
|
+
url='https://platform.openai.com/docs/models/gpt-5-mini',
|
|
105
|
+
input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
|
|
106
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
107
|
+
max_input_tokens=400_000,
|
|
108
|
+
max_output_tokens=128_000,
|
|
109
|
+
),
|
|
110
|
+
pricing=lf.ModelInfo.Pricing(
|
|
111
|
+
cost_per_1m_cached_input_tokens=0.025,
|
|
112
|
+
cost_per_1m_input_tokens=0.25,
|
|
113
|
+
cost_per_1m_output_tokens=2.0,
|
|
114
|
+
),
|
|
115
|
+
# Tier 5 rate limits.
|
|
116
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
117
|
+
max_requests_per_minute=180_000_000,
|
|
118
|
+
max_tokens_per_minute=30_000_000,
|
|
119
|
+
),
|
|
120
|
+
),
|
|
52
121
|
# GPT-4.1 models
|
|
53
122
|
OpenAIModelInfo(
|
|
54
123
|
model_id='gpt-4.1',
|
|
@@ -984,8 +1053,36 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
984
1053
|
|
|
985
1054
|
|
|
986
1055
|
@lf.use_init_args(['model'])
|
|
987
|
-
class OpenAI(openai_compatible.
|
|
988
|
-
"""OpenAI
|
|
1056
|
+
class OpenAI(openai_compatible.OpenAIResponsesAPI):
|
|
1057
|
+
"""OpenAI models.
|
|
1058
|
+
|
|
1059
|
+
**Quick Start:**
|
|
1060
|
+
|
|
1061
|
+
```python
|
|
1062
|
+
import langfun as lf
|
|
1063
|
+
|
|
1064
|
+
# Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
|
|
1065
|
+
lm = lf.llms.Gpt4o()
|
|
1066
|
+
r = lm('Who are you?')
|
|
1067
|
+
print(r)
|
|
1068
|
+
```
|
|
1069
|
+
|
|
1070
|
+
**Setting up API key:**
|
|
1071
|
+
|
|
1072
|
+
The OpenAI API key can be specified in following ways:
|
|
1073
|
+
|
|
1074
|
+
1. At model instantiation:
|
|
1075
|
+
|
|
1076
|
+
```python
|
|
1077
|
+
lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
|
|
1078
|
+
```
|
|
1079
|
+
2. via environment variable `OPENAI_API_KEY`.
|
|
1080
|
+
|
|
1081
|
+
**References:**
|
|
1082
|
+
|
|
1083
|
+
* https://platform.openai.com/docs/models
|
|
1084
|
+
* https://platform.openai.com/docs/api-reference
|
|
1085
|
+
"""
|
|
989
1086
|
|
|
990
1087
|
model: pg.typing.Annotated[
|
|
991
1088
|
pg.typing.Enum(
|
|
@@ -994,7 +1091,12 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
|
994
1091
|
'The name of the model to use.',
|
|
995
1092
|
]
|
|
996
1093
|
|
|
997
|
-
|
|
1094
|
+
# Disable message storage by default.
|
|
1095
|
+
sampling_options = lf.LMSamplingOptions(
|
|
1096
|
+
extras={'store': False}
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
api_endpoint: str = 'https://api.openai.com/v1/responses'
|
|
998
1100
|
|
|
999
1101
|
api_key: Annotated[
|
|
1000
1102
|
str | None,
|
|
@@ -1069,6 +1171,21 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
|
1069
1171
|
return super()._request_args(options)
|
|
1070
1172
|
|
|
1071
1173
|
|
|
1174
|
+
class Gpt51(OpenAI):
|
|
1175
|
+
"""GPT-5.1."""
|
|
1176
|
+
model = 'gpt-5.1'
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
class Gpt5(OpenAI):
|
|
1180
|
+
"""GPT-5."""
|
|
1181
|
+
model = 'gpt-5'
|
|
1182
|
+
|
|
1183
|
+
|
|
1184
|
+
class Gpt5Mini(OpenAI):
|
|
1185
|
+
"""GPT-5 mini."""
|
|
1186
|
+
model = 'gpt-5-mini'
|
|
1187
|
+
|
|
1188
|
+
|
|
1072
1189
|
class Gpt41(OpenAI):
|
|
1073
1190
|
"""GPT-4.1."""
|
|
1074
1191
|
model = 'gpt-4.1'
|