langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/core/__init__.py +2 -0
- langfun/core/agentic/__init__.py +4 -1
- langfun/core/agentic/action.py +447 -29
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +149 -21
- langfun/core/async_support.py +32 -3
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +1 -0
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +47 -43
- langfun/core/eval/base_test.py +5 -5
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +64 -6
- langfun/core/eval/v2/checkpointing_test.py +9 -2
- langfun/core/eval/v2/eval_test_helper.py +103 -2
- langfun/core/eval/v2/evaluation.py +91 -16
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +50 -40
- langfun/core/eval/v2/example_test.py +16 -8
- langfun/core/eval/v2/experiment.py +74 -8
- langfun/core/eval/v2/experiment_test.py +19 -0
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking.py +12 -3
- langfun/core/eval/v2/progress_tracking_test.py +6 -1
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +24 -6
- langfun/core/eval/v2/runners/__init__.py +30 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
- langfun/core/eval/v2/runners/beam.py +341 -0
- langfun/core/eval/v2/runners/beam_test.py +131 -0
- langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
- langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +76 -0
- langfun/core/eval/v2/runners/parallel.py +100 -0
- langfun/core/eval/v2/runners/parallel_test.py +95 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +172 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +7 -5
- langfun/core/language_model.py +141 -21
- langfun/core/language_model_test.py +54 -3
- langfun/core/llms/__init__.py +9 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +55 -17
- langfun/core/llms/gemini_test.py +84 -0
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +36 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +12 -1
- langfun/core/llms/vertexai.py +58 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/client.py +77 -22
- langfun/core/mcp/client_test.py +8 -35
- langfun/core/mcp/session.py +94 -29
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/tool.py +151 -22
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +64 -3
- langfun/core/modalities/mime_test.py +11 -0
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +215 -142
- langfun/core/structured/querying_test.py +65 -29
- langfun/core/structured/schema/__init__.py +49 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +174 -49
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +8 -2
- langfun/env/base_environment.py +320 -128
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +92 -15
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +84 -361
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +1 -1
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +95 -98
- langfun/env/event_handlers/event_logger_test.py +21 -21
- langfun/env/event_handlers/metric_writer.py +225 -140
- langfun/env/event_handlers/metric_writer_test.py +23 -6
- langfun/env/interface.py +854 -40
- langfun/env/interface_test.py +112 -2
- langfun/env/load_balancers_test.py +23 -2
- langfun/env/test_utils.py +126 -84
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
- langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun/env/base_test.py +0 -1481
- langfun/env/event_handlers/base.py +0 -350
- langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0
|
@@ -24,7 +24,32 @@ import pyglove as pg
|
|
|
24
24
|
|
|
25
25
|
@pg.use_init_args(['filename', 'ttl', 'key'])
|
|
26
26
|
class InMemory(base.LMCacheBase):
|
|
27
|
-
"""
|
|
27
|
+
"""An in-memory cache for language model lookups.
|
|
28
|
+
|
|
29
|
+
`InMemory` stores LM prompts and their corresponding responses in memory,
|
|
30
|
+
providing a simple and fast caching mechanism for a single session.
|
|
31
|
+
Optionally, it can persist the cache to a JSON file on disk, allowing
|
|
32
|
+
results to be reused across sessions.
|
|
33
|
+
|
|
34
|
+
When a filename is provided, the cache will be loaded from the file upon
|
|
35
|
+
initialization and saved to the file when `save()` is called. This is
|
|
36
|
+
useful for caching results in interactive environments like Colab or
|
|
37
|
+
when running batch jobs.
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import langfun as lf
|
|
43
|
+
# Using in-memory cache without persistence
|
|
44
|
+
lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
|
|
45
|
+
r = lm.query('hello')
|
|
46
|
+
|
|
47
|
+
# Using in-memory cache with persistence
|
|
48
|
+
lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
|
|
49
|
+
r = lm.query('hello')
|
|
50
|
+
lm.cache.save()
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
28
53
|
|
|
29
54
|
filename: Annotated[
|
|
30
55
|
str | None,
|
|
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
|
|
|
144
169
|
|
|
145
170
|
@contextlib.contextmanager
|
|
146
171
|
def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
|
|
147
|
-
"""Context manager to enable cache for LMs
|
|
172
|
+
"""Context manager to enable in-memory cache for LMs in the current context.
|
|
173
|
+
|
|
174
|
+
This context manager sets an `InMemory` cache as the default cache for
|
|
175
|
+
any Langfun language model instantiated within its scope, unless a model
|
|
176
|
+
is explicitly configured with a different cache.
|
|
177
|
+
|
|
178
|
+
If a `filename` is provided, the cache will be loaded from the specified
|
|
179
|
+
file at the beginning of the context and automatically saved back to the
|
|
180
|
+
file upon exiting the context. This is a convenient way to manage
|
|
181
|
+
persistent caching for a block of code.
|
|
182
|
+
|
|
183
|
+
Example:
|
|
148
184
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
185
|
+
```python
|
|
186
|
+
import langfun as lf
|
|
187
|
+
with lf.lm_cache('my_cache.json'):
|
|
188
|
+
# LMs created here will use 'my_cache.json' for caching.
|
|
189
|
+
lm = lf.llms.GeminiPro()
|
|
190
|
+
print(lm.query('hello'))
|
|
191
|
+
```
|
|
152
192
|
|
|
153
193
|
Args:
|
|
154
|
-
filename: If
|
|
194
|
+
filename: If provided, specifies the JSON file for loading and saving
|
|
195
|
+
the cache.
|
|
155
196
|
|
|
156
197
|
Yields:
|
|
157
|
-
|
|
198
|
+
The `InMemory` cache instance created for this context.
|
|
158
199
|
"""
|
|
159
200
|
cache = InMemory(filename)
|
|
160
201
|
try:
|
|
@@ -175,18 +175,28 @@ class InMemoryLMCacheTest(unittest.TestCase):
|
|
|
175
175
|
|
|
176
176
|
cache = in_memory.InMemory()
|
|
177
177
|
lm = fake.StaticSequence(['1', '2', '3', '4', '5', '6'], cache=cache)
|
|
178
|
-
|
|
179
|
-
|
|
178
|
+
image_foo = CustomModality('foo')
|
|
179
|
+
image_bar = CustomModality('bar')
|
|
180
|
+
lm(
|
|
181
|
+
lf.UserMessage(
|
|
182
|
+
f'hi <<[[{image_foo.id}]]>>', referred_modalities=[image_foo]
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
lm(
|
|
186
|
+
lf.UserMessage(
|
|
187
|
+
f'hi <<[[{image_bar.id}]]>>', referred_modalities=[image_bar]
|
|
188
|
+
)
|
|
189
|
+
)
|
|
180
190
|
self.assertEqual(
|
|
181
191
|
list(cache.keys()),
|
|
182
192
|
[
|
|
183
193
|
(
|
|
184
|
-
'hi <<[[
|
|
194
|
+
f'hi <<[[{image_foo.id}]]>>',
|
|
185
195
|
(None, None, 1, 40, None, None),
|
|
186
196
|
0,
|
|
187
197
|
),
|
|
188
198
|
(
|
|
189
|
-
'hi <<[[
|
|
199
|
+
f'hi <<[[{image_bar.id}]]>>',
|
|
190
200
|
(None, None, 1, 40, None, None),
|
|
191
201
|
0,
|
|
192
202
|
),
|
|
@@ -21,7 +21,31 @@ import pyglove as pg
|
|
|
21
21
|
|
|
22
22
|
@pg.use_init_args(['candidates', 'seed'])
|
|
23
23
|
class RandomChoice(lf.LanguageModel):
|
|
24
|
-
"""
|
|
24
|
+
"""A composite language model that randomly selects from a list of candidates.
|
|
25
|
+
|
|
26
|
+
`RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
|
|
27
|
+
`tokenize`, or `__call__`) to one of the `candidates` selected randomly.
|
|
28
|
+
This can be useful for load balancing across multiple LLM endpoints,
|
|
29
|
+
for A/B testing different models, or for ensembling model outputs
|
|
30
|
+
by calling it multiple times.
|
|
31
|
+
|
|
32
|
+
The selection is determined by the provided `seed`, ensuring reproducibility
|
|
33
|
+
if needed.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import langfun as lf
|
|
39
|
+
|
|
40
|
+
lm = lf.llms.RandomChoice([
|
|
41
|
+
lf.llms.GeminiPro(),
|
|
42
|
+
lf.llms.GPT4(),
|
|
43
|
+
])
|
|
44
|
+
|
|
45
|
+
# This call will be handled by either GeminiPro or GPT4, chosen randomly.
|
|
46
|
+
r = lm.sample('hello')
|
|
47
|
+
```
|
|
48
|
+
"""
|
|
25
49
|
|
|
26
50
|
candidates: Annotated[
|
|
27
51
|
list[lf.LanguageModel],
|
langfun/core/llms/deepseek.py
CHANGED
|
@@ -93,8 +93,36 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
93
93
|
# DeepSeek API uses an API format compatible with OpenAI.
|
|
94
94
|
# Reference: https://api-docs.deepseek.com/
|
|
95
95
|
@lf.use_init_args(['model'])
|
|
96
|
-
class DeepSeek(openai_compatible.
|
|
97
|
-
"""DeepSeek
|
|
96
|
+
class DeepSeek(openai_compatible.OpenAIChatCompletionAPI):
|
|
97
|
+
"""DeepSeek models.
|
|
98
|
+
|
|
99
|
+
**Quick Start:**
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import langfun as lf
|
|
103
|
+
|
|
104
|
+
# Call DeepSeek-V3 using API key from environment variable
|
|
105
|
+
# 'DEEPSEEK_API_KEY'.
|
|
106
|
+
lm = lf.llms.DeepSeekV3()
|
|
107
|
+
r = lm('Who are you?')
|
|
108
|
+
print(r)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Setting up API key:**
|
|
112
|
+
|
|
113
|
+
The DeepSeek API key can be specified in following ways:
|
|
114
|
+
|
|
115
|
+
1. At model instantiation:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
lm = lf.llms.DeepSeekV3(api_key='MY_API_KEY')
|
|
119
|
+
```
|
|
120
|
+
2. via environment variable `DEEPSEEK_API_KEY`.
|
|
121
|
+
|
|
122
|
+
**References:**
|
|
123
|
+
|
|
124
|
+
* https://api-docs.deepseek.com/
|
|
125
|
+
"""
|
|
98
126
|
|
|
99
127
|
model: pg.typing.Annotated[
|
|
100
128
|
pg.typing.Enum(
|
langfun/core/llms/fake.py
CHANGED
|
@@ -20,7 +20,38 @@ import langfun.core as lf
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class Fake(lf.LanguageModel):
|
|
23
|
-
"""
|
|
23
|
+
"""Base class for fake language models, used for testing.
|
|
24
|
+
|
|
25
|
+
Fake models simulate the behavior of real language models but return
|
|
26
|
+
pre-defined responses, making them useful for testing prompts,
|
|
27
|
+
data processing logic, and agent behavior without incurring API costs
|
|
28
|
+
or relying on external services.
|
|
29
|
+
|
|
30
|
+
Langfun provides several fake models:
|
|
31
|
+
* `lf.llms.Echo`: Echoes the prompt back as the response.
|
|
32
|
+
* `lf.llms.StaticResponse`: Returns a fixed, pre-defined response for
|
|
33
|
+
any prompt.
|
|
34
|
+
* `lf.llms.StaticMapping`: Returns responses based on a prompt-to-response
|
|
35
|
+
dictionary.
|
|
36
|
+
* `lf.llms.StaticSequence`: Returns responses from a pre-defined sequence
|
|
37
|
+
in order.
|
|
38
|
+
|
|
39
|
+
**Example:**
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import langfun as lf
|
|
43
|
+
|
|
44
|
+
# Use Echo model for testing
|
|
45
|
+
lm = lf.llms.Echo()
|
|
46
|
+
response = lm('hello')
|
|
47
|
+
assert response.text == 'hello'
|
|
48
|
+
|
|
49
|
+
# Use StaticResponse model
|
|
50
|
+
lm = lf.llms.StaticResponse('world')
|
|
51
|
+
response = lm('hello')
|
|
52
|
+
assert response.text == 'world'
|
|
53
|
+
```
|
|
54
|
+
"""
|
|
24
55
|
|
|
25
56
|
def _score(self, prompt: lf.Message| list[lf.Message],
|
|
26
57
|
completions: list[lf.Message]):
|
langfun/core/llms/gemini.py
CHANGED
|
@@ -151,6 +151,32 @@ SUPPORTED_MODELS = [
|
|
|
151
151
|
#
|
|
152
152
|
# Production models.
|
|
153
153
|
#
|
|
154
|
+
# Gemini 3 Pro Preview
|
|
155
|
+
GeminiModelInfo(
|
|
156
|
+
model_id='gemini-3-pro-preview',
|
|
157
|
+
in_service=True,
|
|
158
|
+
provider=pg.oneof(['Google GenAI', 'VertexAI']),
|
|
159
|
+
model_type='instruction-tuned',
|
|
160
|
+
description='Gemini 3 Pro Preview.',
|
|
161
|
+
release_date=datetime.datetime(2025, 11, 18),
|
|
162
|
+
input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
|
|
163
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
164
|
+
max_input_tokens=1_048_576,
|
|
165
|
+
max_output_tokens=65_536,
|
|
166
|
+
),
|
|
167
|
+
pricing=GeminiModelInfo.Pricing(
|
|
168
|
+
cost_per_1m_cached_input_tokens=0.2,
|
|
169
|
+
cost_per_1m_input_tokens=2.0,
|
|
170
|
+
cost_per_1m_output_tokens=12.0,
|
|
171
|
+
cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
|
|
172
|
+
cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
|
|
173
|
+
cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
|
|
174
|
+
),
|
|
175
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
176
|
+
max_requests_per_minute=2000,
|
|
177
|
+
max_tokens_per_minute=4_000_000,
|
|
178
|
+
),
|
|
179
|
+
),
|
|
154
180
|
# Gemini 2.5 Flash
|
|
155
181
|
GeminiModelInfo(
|
|
156
182
|
model_id='gemini-2.5-flash',
|
|
@@ -696,7 +722,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
696
722
|
|
|
697
723
|
@pg.use_init_args(['model'])
|
|
698
724
|
class Gemini(rest.REST):
|
|
699
|
-
"""
|
|
725
|
+
"""Base class for Gemini models served on Google GenAI and Vertex AI.
|
|
726
|
+
|
|
727
|
+
This class implements the Gemini API protocol, shared by
|
|
728
|
+
`lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
|
|
729
|
+
formatting and response parsing for Gemini models.
|
|
730
|
+
|
|
731
|
+
It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
|
|
732
|
+
`lf.llms.VertexAI` instead.
|
|
733
|
+
"""
|
|
700
734
|
|
|
701
735
|
model: pg.typing.Annotated[
|
|
702
736
|
pg.typing.Enum(
|
|
@@ -752,11 +786,8 @@ class Gemini(rest.REST):
|
|
|
752
786
|
prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
|
|
753
787
|
)
|
|
754
788
|
request['contents'] = contents
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
# metadata_gemini_tools=[{'google_search': {}}]
|
|
758
|
-
if tools := prompt.metadata.get('gemini_tools'):
|
|
759
|
-
request['tools'] = tools
|
|
789
|
+
if sampling_options.extras:
|
|
790
|
+
request.update(sampling_options.extras)
|
|
760
791
|
return request
|
|
761
792
|
|
|
762
793
|
def _generation_config(
|
|
@@ -788,11 +819,14 @@ class Gemini(rest.REST):
|
|
|
788
819
|
+ '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
|
|
789
820
|
+ pg.to_json_str(json_schema, json_indent=2)
|
|
790
821
|
)
|
|
822
|
+
thinking_config_data = {}
|
|
791
823
|
if options.max_thinking_tokens is not None:
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
824
|
+
thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
|
|
825
|
+
thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
|
|
826
|
+
if options.thinking_level is not None:
|
|
827
|
+
thinking_config_data['thinkingLevel'] = options.thinking_level
|
|
828
|
+
if thinking_config_data:
|
|
829
|
+
config['thinkingConfig'] = thinking_config_data
|
|
796
830
|
|
|
797
831
|
if self.response_modalities:
|
|
798
832
|
config['responseModalities'] = self.response_modalities
|
|
@@ -808,10 +842,14 @@ class Gemini(rest.REST):
|
|
|
808
842
|
'No candidates found in response. This is a Gemini API issue that '
|
|
809
843
|
'happens occasionally, and retrying should fix it. '
|
|
810
844
|
)
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
845
|
+
|
|
846
|
+
messages = []
|
|
847
|
+
for candidate in candidates:
|
|
848
|
+
message = lf.Message.from_value(candidate['content'], format='gemini')
|
|
849
|
+
if finish_reason := candidate.get('finishReason'):
|
|
850
|
+
message.metadata['finish_reason'] = finish_reason
|
|
851
|
+
messages.append(message)
|
|
852
|
+
|
|
815
853
|
usage = json['usageMetadata']
|
|
816
854
|
input_tokens = usage['promptTokenCount']
|
|
817
855
|
# NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
|
|
@@ -833,9 +871,9 @@ class Gemini(rest.REST):
|
|
|
833
871
|
)
|
|
834
872
|
|
|
835
873
|
def _error(self, status_code: int, content: str) -> lf.LMError:
|
|
836
|
-
if (
|
|
837
|
-
|
|
838
|
-
|
|
874
|
+
if status_code == 400 and (
|
|
875
|
+
b'exceeds the maximum number of tokens' in content
|
|
876
|
+
or b'Reduce the input token count and try again.' in content
|
|
839
877
|
):
|
|
840
878
|
return lf.ContextLimitError(f'{status_code}: {content}')
|
|
841
879
|
return super()._error(status_code, content)
|
langfun/core/llms/gemini_test.py
CHANGED
|
@@ -177,6 +177,58 @@ class GeminiTest(unittest.TestCase):
|
|
|
177
177
|
),
|
|
178
178
|
)
|
|
179
179
|
|
|
180
|
+
# Add test for thinkingConfig with thinking_level.
|
|
181
|
+
actual = model._generation_config(
|
|
182
|
+
lf.UserMessage('hi'),
|
|
183
|
+
lf.LMSamplingOptions(
|
|
184
|
+
thinking_level='high',
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
self.assertEqual(
|
|
188
|
+
actual,
|
|
189
|
+
dict(
|
|
190
|
+
candidateCount=1,
|
|
191
|
+
temperature=None,
|
|
192
|
+
topP=None,
|
|
193
|
+
topK=40,
|
|
194
|
+
maxOutputTokens=None,
|
|
195
|
+
stopSequences=None,
|
|
196
|
+
responseLogprobs=False,
|
|
197
|
+
logprobs=None,
|
|
198
|
+
seed=None,
|
|
199
|
+
thinkingConfig={'thinkingLevel': 'high'},
|
|
200
|
+
),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Add test for thinkingConfig with both max_thinking_tokens and
|
|
204
|
+
# thinking_level.
|
|
205
|
+
actual = model._generation_config(
|
|
206
|
+
lf.UserMessage('hi'),
|
|
207
|
+
lf.LMSamplingOptions(
|
|
208
|
+
max_thinking_tokens=100,
|
|
209
|
+
thinking_level='low',
|
|
210
|
+
),
|
|
211
|
+
)
|
|
212
|
+
self.assertEqual(
|
|
213
|
+
actual,
|
|
214
|
+
dict(
|
|
215
|
+
candidateCount=1,
|
|
216
|
+
temperature=None,
|
|
217
|
+
topP=None,
|
|
218
|
+
topK=40,
|
|
219
|
+
maxOutputTokens=None,
|
|
220
|
+
stopSequences=None,
|
|
221
|
+
responseLogprobs=False,
|
|
222
|
+
logprobs=None,
|
|
223
|
+
seed=None,
|
|
224
|
+
thinkingConfig={
|
|
225
|
+
'includeThoughts': True,
|
|
226
|
+
'thinkingBudget': 100,
|
|
227
|
+
'thinkingLevel': 'low',
|
|
228
|
+
},
|
|
229
|
+
),
|
|
230
|
+
)
|
|
231
|
+
|
|
180
232
|
with self.assertRaisesRegex(
|
|
181
233
|
ValueError, '`json_schema` must be a dict, got'
|
|
182
234
|
):
|
|
@@ -225,6 +277,38 @@ class GeminiTest(unittest.TestCase):
|
|
|
225
277
|
):
|
|
226
278
|
lm('hello')
|
|
227
279
|
|
|
280
|
+
def test_call_model_with_max_tokens_error(self):
|
|
281
|
+
def mock_requests_post_error(*args, **kwargs):
|
|
282
|
+
del args, kwargs
|
|
283
|
+
response = requests.Response()
|
|
284
|
+
response.status_code = 200
|
|
285
|
+
response._content = pg.to_json_str({
|
|
286
|
+
'candidates': [
|
|
287
|
+
{
|
|
288
|
+
'finishReason': 'MAX_TOKENS',
|
|
289
|
+
'content': {
|
|
290
|
+
'parts': [
|
|
291
|
+
{
|
|
292
|
+
'text': 'This is'
|
|
293
|
+
}
|
|
294
|
+
]
|
|
295
|
+
}
|
|
296
|
+
},
|
|
297
|
+
],
|
|
298
|
+
'usageMetadata': {
|
|
299
|
+
'promptTokenCount': 3,
|
|
300
|
+
'candidatesTokenCount': 4,
|
|
301
|
+
}
|
|
302
|
+
}).encode()
|
|
303
|
+
return response
|
|
304
|
+
|
|
305
|
+
with mock.patch('requests.Session.post') as mock_generate:
|
|
306
|
+
mock_generate.side_effect = mock_requests_post_error
|
|
307
|
+
lm = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
|
|
308
|
+
m = lm('hello')
|
|
309
|
+
self.assertEqual(m.metadata.finish_reason, 'MAX_TOKENS')
|
|
310
|
+
self.assertEqual(m.text, 'This is')
|
|
311
|
+
|
|
228
312
|
def test_call_model_with_system_message(self):
|
|
229
313
|
with mock.patch('requests.Session.post') as mock_generate:
|
|
230
314
|
mock_generate.side_effect = mock_requests_post
|
|
@@ -25,7 +25,35 @@ import pyglove as pg
|
|
|
25
25
|
@lf.use_init_args(['model'])
|
|
26
26
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
27
27
|
class GenAI(gemini.Gemini):
|
|
28
|
-
"""
|
|
28
|
+
"""Google GenAI models.
|
|
29
|
+
|
|
30
|
+
**Quick Start:**
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import langfun as lf
|
|
34
|
+
|
|
35
|
+
# Call Gemini 1.5 Flash using API key from environment variable
|
|
36
|
+
# 'GOOGLE_API_KEY'.
|
|
37
|
+
lm = lf.llms.Gemini15Flash()
|
|
38
|
+
r = lm('Who are you?')
|
|
39
|
+
print(r)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Setting up API key:**
|
|
43
|
+
|
|
44
|
+
The Google API key can be specified in following ways:
|
|
45
|
+
|
|
46
|
+
1. At model instantiation:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
|
|
50
|
+
```
|
|
51
|
+
2. via environment variable `GOOGLE_API_KEY`.
|
|
52
|
+
|
|
53
|
+
**References:**
|
|
54
|
+
|
|
55
|
+
* https://ai.google.dev/docs
|
|
56
|
+
"""
|
|
29
57
|
|
|
30
58
|
model: pg.typing.Annotated[
|
|
31
59
|
pg.typing.Enum(
|
|
@@ -87,9 +115,14 @@ class GenAI(gemini.Gemini):
|
|
|
87
115
|
|
|
88
116
|
# pylint: disable=invalid-name
|
|
89
117
|
|
|
118
|
+
|
|
90
119
|
#
|
|
91
120
|
# Experimental models.
|
|
92
121
|
#
|
|
122
|
+
class Gemini3ProPreview(GenAI):
|
|
123
|
+
"""Gemini 3 Pro Preview model."""
|
|
124
|
+
|
|
125
|
+
model = 'gemini-3-pro-preview'
|
|
93
126
|
|
|
94
127
|
|
|
95
128
|
class Gemini25FlashImagePreview(GenAI):
|
langfun/core/llms/groq.py
CHANGED
|
@@ -259,10 +259,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
259
259
|
|
|
260
260
|
|
|
261
261
|
@lf.use_init_args(['model'])
|
|
262
|
-
class Groq(openai_compatible.
|
|
263
|
-
"""Groq
|
|
262
|
+
class Groq(openai_compatible.OpenAIChatCompletionAPI):
|
|
263
|
+
"""Groq models.
|
|
264
264
|
|
|
265
|
-
|
|
265
|
+
**Quick Start:**
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
import langfun as lf
|
|
269
|
+
|
|
270
|
+
# Call Llama 3.3 70B on Groq using API key from environment variable
|
|
271
|
+
# 'GROQ_API_KEY'.
|
|
272
|
+
lm = lf.llms.GroqLlama33_70B_Versatile()
|
|
273
|
+
r = lm('Who are you?')
|
|
274
|
+
print(r)
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**Setting up API key:**
|
|
278
|
+
|
|
279
|
+
The Groq API key can be specified in following ways:
|
|
280
|
+
|
|
281
|
+
1. At model instantiation:
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
|
|
285
|
+
```
|
|
286
|
+
2. via environment variable `GROQ_API_KEY`.
|
|
287
|
+
|
|
288
|
+
**References:**
|
|
289
|
+
|
|
290
|
+
* https://console.groq.com/docs
|
|
266
291
|
"""
|
|
267
292
|
|
|
268
293
|
model: pg.typing.Annotated[
|
langfun/core/llms/llama_cpp.py
CHANGED
|
@@ -20,11 +20,30 @@ import pyglove as pg
|
|
|
20
20
|
|
|
21
21
|
@pg.use_init_args(['url', 'model'])
|
|
22
22
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
23
|
-
class LlamaCppRemote(openai_compatible.
|
|
24
|
-
"""
|
|
23
|
+
class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
|
|
24
|
+
"""LLaMA C++ models served via a remote server.
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
This class provides an interface to interact with language models
|
|
27
|
+
hosted on a LLaMA C++ server, which is compatible with the OpenAI
|
|
28
|
+
Chat Completions API format.
|
|
29
|
+
|
|
30
|
+
**Quick Start:**
|
|
31
|
+
|
|
32
|
+
Assuming a LLaMA C++ server is running at `http://localhost:8080`,
|
|
33
|
+
you can interact with it as follows:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import langfun as lf
|
|
37
|
+
|
|
38
|
+
# If model name is not specified, it will use server's default.
|
|
39
|
+
lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
|
|
40
|
+
r = lm('Who are you?')
|
|
41
|
+
print(r)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**References:**
|
|
45
|
+
|
|
46
|
+
* https://github.com/ggerganov/llama.cpp/tree/master/examples/server
|
|
28
47
|
"""
|
|
29
48
|
url: Annotated[
|
|
30
49
|
str,
|
langfun/core/llms/openai.py
CHANGED
|
@@ -1031,8 +1031,36 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
1031
1031
|
|
|
1032
1032
|
|
|
1033
1033
|
@lf.use_init_args(['model'])
|
|
1034
|
-
class OpenAI(openai_compatible.
|
|
1035
|
-
"""OpenAI
|
|
1034
|
+
class OpenAI(openai_compatible.OpenAIResponsesAPI):
|
|
1035
|
+
"""OpenAI models.
|
|
1036
|
+
|
|
1037
|
+
**Quick Start:**
|
|
1038
|
+
|
|
1039
|
+
```python
|
|
1040
|
+
import langfun as lf
|
|
1041
|
+
|
|
1042
|
+
# Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
|
|
1043
|
+
lm = lf.llms.Gpt4o()
|
|
1044
|
+
r = lm('Who are you?')
|
|
1045
|
+
print(r)
|
|
1046
|
+
```
|
|
1047
|
+
|
|
1048
|
+
**Setting up API key:**
|
|
1049
|
+
|
|
1050
|
+
The OpenAI API key can be specified in following ways:
|
|
1051
|
+
|
|
1052
|
+
1. At model instantiation:
|
|
1053
|
+
|
|
1054
|
+
```python
|
|
1055
|
+
lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
|
|
1056
|
+
```
|
|
1057
|
+
2. via environment variable `OPENAI_API_KEY`.
|
|
1058
|
+
|
|
1059
|
+
**References:**
|
|
1060
|
+
|
|
1061
|
+
* https://platform.openai.com/docs/models
|
|
1062
|
+
* https://platform.openai.com/docs/api-reference
|
|
1063
|
+
"""
|
|
1036
1064
|
|
|
1037
1065
|
model: pg.typing.Annotated[
|
|
1038
1066
|
pg.typing.Enum(
|
|
@@ -1041,7 +1069,12 @@ class OpenAI(openai_compatible.OpenAICompatible):
|
|
|
1041
1069
|
'The name of the model to use.',
|
|
1042
1070
|
]
|
|
1043
1071
|
|
|
1044
|
-
|
|
1072
|
+
# Disable message storage by default.
|
|
1073
|
+
sampling_options = lf.LMSamplingOptions(
|
|
1074
|
+
extras={'store': False}
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
api_endpoint: str = 'https://api.openai.com/v1/responses'
|
|
1045
1078
|
|
|
1046
1079
|
api_key: Annotated[
|
|
1047
1080
|
str | None,
|