langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512040805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +7 -1
- langfun/core/agentic/__init__.py +8 -1
- langfun/core/agentic/action.py +740 -112
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +189 -24
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +11 -2
- langfun/core/data/conversion/gemini_test.py +48 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +48 -44
- langfun/core/eval/base_test.py +5 -5
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +2 -0
- langfun/core/eval/v2/checkpointing.py +76 -7
- langfun/core/eval/v2/checkpointing_test.py +9 -2
- langfun/core/eval/v2/config_saver.py +37 -0
- langfun/core/eval/v2/config_saver_test.py +36 -0
- langfun/core/eval/v2/eval_test_helper.py +104 -3
- langfun/core/eval/v2/evaluation.py +92 -17
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +50 -40
- langfun/core/eval/v2/example_test.py +16 -8
- langfun/core/eval/v2/experiment.py +84 -15
- langfun/core/eval/v2/experiment_test.py +19 -0
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +31 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking.py +13 -5
- langfun/core/eval/v2/progress_tracking_test.py +9 -1
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +24 -6
- langfun/core/eval/v2/runners/__init__.py +30 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +72 -180
- langfun/core/eval/v2/runners/beam.py +354 -0
- langfun/core/eval/v2/runners/beam_test.py +153 -0
- langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
- langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +76 -0
- langfun/core/eval/v2/runners/parallel.py +243 -0
- langfun/core/eval/v2/runners/parallel_test.py +182 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +169 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +7 -5
- langfun/core/language_model.py +189 -36
- langfun/core/language_model_test.py +54 -3
- langfun/core/llms/__init__.py +12 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +64 -12
- langfun/core/llms/gemini_test.py +110 -0
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +120 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +58 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +254 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +73 -3
- langfun/core/modalities/image_test.py +116 -0
- langfun/core/modalities/mime.py +64 -3
- langfun/core/modalities/mime_test.py +11 -0
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +230 -154
- langfun/core/structured/querying_test.py +69 -33
- langfun/core/structured/schema/__init__.py +49 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +153 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +141 -0
- langfun/env/test_utils.py +507 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202512040805.dist-info/RECORD +217 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/top_level.txt +0 -0
langfun/core/llms/anthropic.py
CHANGED
|
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
SUPPORTED_MODELS = [
|
|
62
|
+
AnthropicModelInfo(
|
|
63
|
+
model_id='claude-haiku-4-5-20251001',
|
|
64
|
+
provider='Anthropic',
|
|
65
|
+
in_service=True,
|
|
66
|
+
description='Claude 4.5 Haiku model (10/15/2025).',
|
|
67
|
+
release_date=datetime.datetime(2025, 10, 15),
|
|
68
|
+
input_modalities=(
|
|
69
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
70
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
71
|
+
),
|
|
72
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
73
|
+
max_input_tokens=200_000,
|
|
74
|
+
max_output_tokens=64_000,
|
|
75
|
+
),
|
|
76
|
+
pricing=lf.ModelInfo.Pricing(
|
|
77
|
+
cost_per_1m_cached_input_tokens=0.1,
|
|
78
|
+
cost_per_1m_input_tokens=1,
|
|
79
|
+
cost_per_1m_output_tokens=5,
|
|
80
|
+
),
|
|
81
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
82
|
+
# Tier 4 rate limits
|
|
83
|
+
max_requests_per_minute=4000,
|
|
84
|
+
max_input_tokens_per_minute=4_000_000,
|
|
85
|
+
max_output_tokens_per_minute=800_000,
|
|
86
|
+
),
|
|
87
|
+
),
|
|
88
|
+
AnthropicModelInfo(
|
|
89
|
+
model_id='claude-sonnet-4-5-20250929',
|
|
90
|
+
provider='Anthropic',
|
|
91
|
+
in_service=True,
|
|
92
|
+
description='Claude 4.5 Sonnet model (9/29/2025).',
|
|
93
|
+
release_date=datetime.datetime(2025, 9, 29),
|
|
94
|
+
input_modalities=(
|
|
95
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
96
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
97
|
+
),
|
|
98
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
99
|
+
max_input_tokens=200_000,
|
|
100
|
+
max_output_tokens=64_000,
|
|
101
|
+
),
|
|
102
|
+
pricing=lf.ModelInfo.Pricing(
|
|
103
|
+
cost_per_1m_cached_input_tokens=0.3,
|
|
104
|
+
cost_per_1m_input_tokens=3,
|
|
105
|
+
cost_per_1m_output_tokens=15,
|
|
106
|
+
),
|
|
107
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
108
|
+
# Tier 4 rate limits
|
|
109
|
+
# This rate limit is a total limit that applies to combined traffic
|
|
110
|
+
# across both Sonnet 4 and Sonnet 4.5.
|
|
111
|
+
max_requests_per_minute=4000,
|
|
112
|
+
max_input_tokens_per_minute=2_000_000,
|
|
113
|
+
max_output_tokens_per_minute=400_000,
|
|
114
|
+
),
|
|
115
|
+
),
|
|
62
116
|
AnthropicModelInfo(
|
|
63
117
|
model_id='claude-4-opus-20250514',
|
|
64
118
|
provider='Anthropic',
|
|
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
|
|
|
190
244
|
max_output_tokens_per_minute=80_000,
|
|
191
245
|
),
|
|
192
246
|
),
|
|
247
|
+
AnthropicModelInfo(
|
|
248
|
+
model_id='claude-haiku-4-5@20251001',
|
|
249
|
+
alias_for='claude-haiku-4-5-20251001',
|
|
250
|
+
provider='VertexAI',
|
|
251
|
+
in_service=True,
|
|
252
|
+
description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
|
|
253
|
+
release_date=datetime.datetime(2025, 10, 15),
|
|
254
|
+
input_modalities=(
|
|
255
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
256
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
257
|
+
),
|
|
258
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
259
|
+
max_input_tokens=200_000,
|
|
260
|
+
max_output_tokens=64_000,
|
|
261
|
+
),
|
|
262
|
+
pricing=lf.ModelInfo.Pricing(
|
|
263
|
+
# For global endpoint
|
|
264
|
+
cost_per_1m_cached_input_tokens=0.1,
|
|
265
|
+
cost_per_1m_input_tokens=1,
|
|
266
|
+
cost_per_1m_output_tokens=5,
|
|
267
|
+
),
|
|
268
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
269
|
+
# For global endpoint
|
|
270
|
+
max_requests_per_minute=2500,
|
|
271
|
+
max_input_tokens_per_minute=200_000,
|
|
272
|
+
max_output_tokens_per_minute=0,
|
|
273
|
+
),
|
|
274
|
+
),
|
|
275
|
+
AnthropicModelInfo(
|
|
276
|
+
model_id='claude-sonnet-4-5@20250929',
|
|
277
|
+
alias_for='claude-sonnet-4-5-20250929',
|
|
278
|
+
provider='VertexAI',
|
|
279
|
+
in_service=True,
|
|
280
|
+
description='Claude 4.5 Sonnet model (9/29/2025).',
|
|
281
|
+
release_date=datetime.datetime(2025, 9, 29),
|
|
282
|
+
input_modalities=(
|
|
283
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
284
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
285
|
+
),
|
|
286
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
287
|
+
max_input_tokens=200_000,
|
|
288
|
+
max_output_tokens=64_000,
|
|
289
|
+
),
|
|
290
|
+
pricing=lf.ModelInfo.Pricing(
|
|
291
|
+
# For global endpoint
|
|
292
|
+
cost_per_1m_cached_input_tokens=0.3,
|
|
293
|
+
cost_per_1m_input_tokens=3,
|
|
294
|
+
cost_per_1m_output_tokens=15,
|
|
295
|
+
),
|
|
296
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
297
|
+
# For global endpoint
|
|
298
|
+
max_requests_per_minute=1500,
|
|
299
|
+
max_input_tokens_per_minute=200_000,
|
|
300
|
+
max_output_tokens_per_minute=0,
|
|
301
|
+
),
|
|
302
|
+
),
|
|
193
303
|
AnthropicModelInfo(
|
|
194
304
|
model_id='claude-opus-4@20250514',
|
|
195
305
|
alias_for='claude-opus-4-20250514',
|
|
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
540
650
|
|
|
541
651
|
@lf.use_init_args(['model'])
|
|
542
652
|
class Anthropic(rest.REST):
|
|
543
|
-
"""Anthropic
|
|
653
|
+
"""Anthropic Claude models.
|
|
654
|
+
|
|
655
|
+
**Quick Start:**
|
|
656
|
+
|
|
657
|
+
```python
|
|
658
|
+
import langfun as lf
|
|
659
|
+
|
|
660
|
+
# Call Claude 3.5 Sonnet using API key from environment variable
|
|
661
|
+
# 'ANTHROPIC_API_KEY'.
|
|
662
|
+
lm = lf.llms.Claude35Sonnet()
|
|
663
|
+
r = lm('Who are you?')
|
|
664
|
+
print(r)
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
**Setting up API key:**
|
|
668
|
+
|
|
669
|
+
The Anthropic API key can be specified in following ways:
|
|
670
|
+
|
|
671
|
+
1. At model instantiation:
|
|
672
|
+
|
|
673
|
+
```python
|
|
674
|
+
lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
|
|
675
|
+
|
|
676
|
+
2. via environment variable `ANTHROPIC_API_KEY`.
|
|
544
677
|
|
|
545
|
-
|
|
678
|
+
**References:**
|
|
679
|
+
|
|
680
|
+
* https://docs.anthropic.com/claude/reference/messages_post
|
|
546
681
|
"""
|
|
547
682
|
|
|
548
683
|
model: pg.typing.Annotated[
|
|
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
|
|
|
658
793
|
args.pop('temperature', None)
|
|
659
794
|
args.pop('top_k', None)
|
|
660
795
|
args.pop('top_p', None)
|
|
796
|
+
if options.extras:
|
|
797
|
+
args.update(options.extras)
|
|
661
798
|
return args
|
|
662
799
|
|
|
663
800
|
def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
|
|
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
|
|
|
679
816
|
return super()._error(status_code, content)
|
|
680
817
|
|
|
681
818
|
|
|
819
|
+
class Claude45(Anthropic):
|
|
820
|
+
"""Base class for Claude 4.5 models."""
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
# pylint: disable=invalid-name
|
|
824
|
+
class Claude45Haiku_20251001(Claude45):
|
|
825
|
+
"""Claude 4.5 Haiku model 20251001."""
|
|
826
|
+
|
|
827
|
+
model = 'claude-haiku-4-5-20251001'
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
# pylint: disable=invalid-name
|
|
831
|
+
class Claude45Sonnet_20250929(Claude45):
|
|
832
|
+
"""Claude 4.5 Sonnet model 20250929."""
|
|
833
|
+
|
|
834
|
+
model = 'claude-sonnet-4-5-20250929'
|
|
835
|
+
|
|
836
|
+
|
|
682
837
|
class Claude4(Anthropic):
|
|
683
838
|
"""Base class for Claude 4 models."""
|
|
684
839
|
|
|
@@ -23,23 +23,35 @@ import pyglove as pg
|
|
|
23
23
|
@lf.use_init_args(['model', 'deployment_name'])
|
|
24
24
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
25
25
|
class AzureOpenAI(openai.OpenAI):
|
|
26
|
-
"""Azure OpenAI
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
26
|
+
"""Azure OpenAI models.
|
|
27
|
+
|
|
28
|
+
**Quick Start:**
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import langfun as lf
|
|
32
|
+
|
|
33
|
+
# Call GPT-4o on Azure using API key from environment variable
|
|
34
|
+
# 'AZURE_OPENAI_API_KEY'.
|
|
35
|
+
lm = lf.llms.AzureOpenAI(
|
|
36
|
+
model='gpt-4o',
|
|
37
|
+
deployment_name='my-gpt4o-deployment',
|
|
38
|
+
api_version='2024-08-01-preview',
|
|
39
|
+
azure_endpoint='https://my-resource.openai.azure.com/',
|
|
40
|
+
)
|
|
41
|
+
r = lm('Who are you?')
|
|
42
|
+
print(r)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Setting up API key:**
|
|
46
|
+
|
|
47
|
+
The Azure OpenAI API key can be specified in following ways:
|
|
48
|
+
|
|
49
|
+
1. At model instantiation:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
|
|
53
|
+
```
|
|
54
|
+
2. via environment variable `AZURE_OPENAI_API_KEY`.
|
|
43
55
|
"""
|
|
44
56
|
|
|
45
57
|
deployment_name: Annotated[
|
langfun/core/llms/cache/base.py
CHANGED
|
@@ -22,13 +22,33 @@ import langfun.core as lf
|
|
|
22
22
|
|
|
23
23
|
@dataclasses.dataclass(frozen=True)
|
|
24
24
|
class LMCacheEntry:
|
|
25
|
-
"""
|
|
25
|
+
"""Represents a single entry in the language model cache.
|
|
26
|
+
|
|
27
|
+
An `LMCacheEntry` stores the result of a language model sampling operation
|
|
28
|
+
and an optional expiration timestamp.
|
|
29
|
+
"""
|
|
26
30
|
result: lf.LMSamplingResult
|
|
27
31
|
expire: datetime.datetime | None = None
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
class LMCacheBase(lf.LMCache):
|
|
31
|
-
"""
|
|
35
|
+
"""Base class for language model cache implementations.
|
|
36
|
+
|
|
37
|
+
`LMCacheBase` provides the core logic for a key-value based cache,
|
|
38
|
+
handling key generation, expiration (TTL), and statistics tracking.
|
|
39
|
+
Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
|
|
40
|
+
to provide the specific storage mechanism (e.g., in-memory, file-based).
|
|
41
|
+
|
|
42
|
+
**Key Features:**
|
|
43
|
+
|
|
44
|
+
* **Customizable Keying**: Allows specifying a custom function to generate
|
|
45
|
+
cache keys based on the language model, prompt, and seed. If not provided,
|
|
46
|
+
a default key based on prompt text, sampling options, and seed is used.
|
|
47
|
+
* **Time-to-Live (TTL)**: Supports setting an expiration time for cache
|
|
48
|
+
entries, after which they are considered invalid and removed upon access.
|
|
49
|
+
* **Cache Statistics**: Tracks metrics like hits, misses, updates,
|
|
50
|
+
deletions, and expired hits through the `stats` property.
|
|
51
|
+
"""
|
|
32
52
|
|
|
33
53
|
key: Annotated[
|
|
34
54
|
Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
|
|
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
|
|
|
121
141
|
|
|
122
142
|
def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
|
|
123
143
|
"""Default key for LM cache."""
|
|
124
|
-
|
|
144
|
+
# prompt text already contains the modality id for referenced modality
|
|
145
|
+
# objects, so no need to include them in the key.
|
|
146
|
+
return (prompt.text, lm.sampling_options.cache_key(), seed)
|
|
@@ -24,7 +24,32 @@ import pyglove as pg
|
|
|
24
24
|
|
|
25
25
|
@pg.use_init_args(['filename', 'ttl', 'key'])
|
|
26
26
|
class InMemory(base.LMCacheBase):
|
|
27
|
-
"""
|
|
27
|
+
"""An in-memory cache for language model lookups.
|
|
28
|
+
|
|
29
|
+
`InMemory` stores LM prompts and their corresponding responses in memory,
|
|
30
|
+
providing a simple and fast caching mechanism for a single session.
|
|
31
|
+
Optionally, it can persist the cache to a JSON file on disk, allowing
|
|
32
|
+
results to be reused across sessions.
|
|
33
|
+
|
|
34
|
+
When a filename is provided, the cache will be loaded from the file upon
|
|
35
|
+
initialization and saved to the file when `save()` is called. This is
|
|
36
|
+
useful for caching results in interactive environments like Colab or
|
|
37
|
+
when running batch jobs.
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import langfun as lf
|
|
43
|
+
# Using in-memory cache without persistence
|
|
44
|
+
lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
|
|
45
|
+
r = lm.query('hello')
|
|
46
|
+
|
|
47
|
+
# Using in-memory cache with persistence
|
|
48
|
+
lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
|
|
49
|
+
r = lm.query('hello')
|
|
50
|
+
lm.cache.save()
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
28
53
|
|
|
29
54
|
filename: Annotated[
|
|
30
55
|
str | None,
|
|
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
|
|
|
144
169
|
|
|
145
170
|
@contextlib.contextmanager
|
|
146
171
|
def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
|
|
147
|
-
"""Context manager to enable cache for LMs
|
|
172
|
+
"""Context manager to enable in-memory cache for LMs in the current context.
|
|
173
|
+
|
|
174
|
+
This context manager sets an `InMemory` cache as the default cache for
|
|
175
|
+
any Langfun language model instantiated within its scope, unless a model
|
|
176
|
+
is explicitly configured with a different cache.
|
|
177
|
+
|
|
178
|
+
If a `filename` is provided, the cache will be loaded from the specified
|
|
179
|
+
file at the beginning of the context and automatically saved back to the
|
|
180
|
+
file upon exiting the context. This is a convenient way to manage
|
|
181
|
+
persistent caching for a block of code.
|
|
182
|
+
|
|
183
|
+
Example:
|
|
148
184
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
185
|
+
```python
|
|
186
|
+
import langfun as lf
|
|
187
|
+
with lf.lm_cache('my_cache.json'):
|
|
188
|
+
# LMs created here will use 'my_cache.json' for caching.
|
|
189
|
+
lm = lf.llms.GeminiPro()
|
|
190
|
+
print(lm.query('hello'))
|
|
191
|
+
```
|
|
152
192
|
|
|
153
193
|
Args:
|
|
154
|
-
filename: If
|
|
194
|
+
filename: If provided, specifies the JSON file for loading and saving
|
|
195
|
+
the cache.
|
|
155
196
|
|
|
156
197
|
Yields:
|
|
157
|
-
|
|
198
|
+
The `InMemory` cache instance created for this context.
|
|
158
199
|
"""
|
|
159
200
|
cache = InMemory(filename)
|
|
160
201
|
try:
|
|
@@ -175,18 +175,28 @@ class InMemoryLMCacheTest(unittest.TestCase):
|
|
|
175
175
|
|
|
176
176
|
cache = in_memory.InMemory()
|
|
177
177
|
lm = fake.StaticSequence(['1', '2', '3', '4', '5', '6'], cache=cache)
|
|
178
|
-
|
|
179
|
-
|
|
178
|
+
image_foo = CustomModality('foo')
|
|
179
|
+
image_bar = CustomModality('bar')
|
|
180
|
+
lm(
|
|
181
|
+
lf.UserMessage(
|
|
182
|
+
f'hi <<[[{image_foo.id}]]>>', referred_modalities=[image_foo]
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
lm(
|
|
186
|
+
lf.UserMessage(
|
|
187
|
+
f'hi <<[[{image_bar.id}]]>>', referred_modalities=[image_bar]
|
|
188
|
+
)
|
|
189
|
+
)
|
|
180
190
|
self.assertEqual(
|
|
181
191
|
list(cache.keys()),
|
|
182
192
|
[
|
|
183
193
|
(
|
|
184
|
-
'hi <<[[
|
|
194
|
+
f'hi <<[[{image_foo.id}]]>>',
|
|
185
195
|
(None, None, 1, 40, None, None),
|
|
186
196
|
0,
|
|
187
197
|
),
|
|
188
198
|
(
|
|
189
|
-
'hi <<[[
|
|
199
|
+
f'hi <<[[{image_bar.id}]]>>',
|
|
190
200
|
(None, None, 1, 40, None, None),
|
|
191
201
|
0,
|
|
192
202
|
),
|
|
@@ -21,7 +21,31 @@ import pyglove as pg
|
|
|
21
21
|
|
|
22
22
|
@pg.use_init_args(['candidates', 'seed'])
|
|
23
23
|
class RandomChoice(lf.LanguageModel):
|
|
24
|
-
"""
|
|
24
|
+
"""A composite language model that randomly selects from a list of candidates.
|
|
25
|
+
|
|
26
|
+
`RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
|
|
27
|
+
`tokenize`, or `__call__`) to one of the `candidates` selected randomly.
|
|
28
|
+
This can be useful for load balancing across multiple LLM endpoints,
|
|
29
|
+
for A/B testing different models, or for ensembling model outputs
|
|
30
|
+
by calling it multiple times.
|
|
31
|
+
|
|
32
|
+
The selection is determined by the provided `seed`, ensuring reproducibility
|
|
33
|
+
if needed.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import langfun as lf
|
|
39
|
+
|
|
40
|
+
lm = lf.llms.RandomChoice([
|
|
41
|
+
lf.llms.GeminiPro(),
|
|
42
|
+
lf.llms.GPT4(),
|
|
43
|
+
])
|
|
44
|
+
|
|
45
|
+
# This call will be handled by either GeminiPro or GPT4, chosen randomly.
|
|
46
|
+
r = lm.sample('hello')
|
|
47
|
+
```
|
|
48
|
+
"""
|
|
25
49
|
|
|
26
50
|
candidates: Annotated[
|
|
27
51
|
list[lf.LanguageModel],
|
langfun/core/llms/deepseek.py
CHANGED
|
@@ -93,8 +93,36 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
93
93
|
# DeepSeek API uses an API format compatible with OpenAI.
|
|
94
94
|
# Reference: https://api-docs.deepseek.com/
|
|
95
95
|
@lf.use_init_args(['model'])
|
|
96
|
-
class DeepSeek(openai_compatible.
|
|
97
|
-
"""DeepSeek
|
|
96
|
+
class DeepSeek(openai_compatible.OpenAIChatCompletionAPI):
|
|
97
|
+
"""DeepSeek models.
|
|
98
|
+
|
|
99
|
+
**Quick Start:**
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import langfun as lf
|
|
103
|
+
|
|
104
|
+
# Call DeepSeek-V3 using API key from environment variable
|
|
105
|
+
# 'DEEPSEEK_API_KEY'.
|
|
106
|
+
lm = lf.llms.DeepSeekV3()
|
|
107
|
+
r = lm('Who are you?')
|
|
108
|
+
print(r)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Setting up API key:**
|
|
112
|
+
|
|
113
|
+
The DeepSeek API key can be specified in following ways:
|
|
114
|
+
|
|
115
|
+
1. At model instantiation:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
lm = lf.llms.DeepSeekV3(api_key='MY_API_KEY')
|
|
119
|
+
```
|
|
120
|
+
2. via environment variable `DEEPSEEK_API_KEY`.
|
|
121
|
+
|
|
122
|
+
**References:**
|
|
123
|
+
|
|
124
|
+
* https://api-docs.deepseek.com/
|
|
125
|
+
"""
|
|
98
126
|
|
|
99
127
|
model: pg.typing.Annotated[
|
|
100
128
|
pg.typing.Enum(
|
langfun/core/llms/fake.py
CHANGED
|
@@ -20,7 +20,38 @@ import langfun.core as lf
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class Fake(lf.LanguageModel):
|
|
23
|
-
"""
|
|
23
|
+
"""Base class for fake language models, used for testing.
|
|
24
|
+
|
|
25
|
+
Fake models simulate the behavior of real language models but return
|
|
26
|
+
pre-defined responses, making them useful for testing prompts,
|
|
27
|
+
data processing logic, and agent behavior without incurring API costs
|
|
28
|
+
or relying on external services.
|
|
29
|
+
|
|
30
|
+
Langfun provides several fake models:
|
|
31
|
+
* `lf.llms.Echo`: Echoes the prompt back as the response.
|
|
32
|
+
* `lf.llms.StaticResponse`: Returns a fixed, pre-defined response for
|
|
33
|
+
any prompt.
|
|
34
|
+
* `lf.llms.StaticMapping`: Returns responses based on a prompt-to-response
|
|
35
|
+
dictionary.
|
|
36
|
+
* `lf.llms.StaticSequence`: Returns responses from a pre-defined sequence
|
|
37
|
+
in order.
|
|
38
|
+
|
|
39
|
+
**Example:**
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import langfun as lf
|
|
43
|
+
|
|
44
|
+
# Use Echo model for testing
|
|
45
|
+
lm = lf.llms.Echo()
|
|
46
|
+
response = lm('hello')
|
|
47
|
+
assert response.text == 'hello'
|
|
48
|
+
|
|
49
|
+
# Use StaticResponse model
|
|
50
|
+
lm = lf.llms.StaticResponse('world')
|
|
51
|
+
response = lm('hello')
|
|
52
|
+
assert response.text == 'world'
|
|
53
|
+
```
|
|
54
|
+
"""
|
|
24
55
|
|
|
25
56
|
def _score(self, prompt: lf.Message| list[lf.Message],
|
|
26
57
|
completions: list[lf.Message]):
|
langfun/core/llms/gemini.py
CHANGED
|
@@ -151,6 +151,32 @@ SUPPORTED_MODELS = [
|
|
|
151
151
|
#
|
|
152
152
|
# Production models.
|
|
153
153
|
#
|
|
154
|
+
# Gemini 3 Pro Preview
|
|
155
|
+
GeminiModelInfo(
|
|
156
|
+
model_id='gemini-3-pro-preview',
|
|
157
|
+
in_service=True,
|
|
158
|
+
provider=pg.oneof(['Google GenAI', 'VertexAI']),
|
|
159
|
+
model_type='instruction-tuned',
|
|
160
|
+
description='Gemini 3 Pro Preview.',
|
|
161
|
+
release_date=datetime.datetime(2025, 11, 18),
|
|
162
|
+
input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
|
|
163
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
164
|
+
max_input_tokens=1_048_576,
|
|
165
|
+
max_output_tokens=65_536,
|
|
166
|
+
),
|
|
167
|
+
pricing=GeminiModelInfo.Pricing(
|
|
168
|
+
cost_per_1m_cached_input_tokens=0.2,
|
|
169
|
+
cost_per_1m_input_tokens=2.0,
|
|
170
|
+
cost_per_1m_output_tokens=12.0,
|
|
171
|
+
cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
|
|
172
|
+
cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
|
|
173
|
+
cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
|
|
174
|
+
),
|
|
175
|
+
rate_limits=lf.ModelInfo.RateLimits(
|
|
176
|
+
max_requests_per_minute=2000,
|
|
177
|
+
max_tokens_per_minute=4_000_000,
|
|
178
|
+
),
|
|
179
|
+
),
|
|
154
180
|
# Gemini 2.5 Flash
|
|
155
181
|
GeminiModelInfo(
|
|
156
182
|
model_id='gemini-2.5-flash',
|
|
@@ -696,7 +722,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
696
722
|
|
|
697
723
|
@pg.use_init_args(['model'])
|
|
698
724
|
class Gemini(rest.REST):
|
|
699
|
-
"""
|
|
725
|
+
"""Base class for Gemini models served on Google GenAI and Vertex AI.
|
|
726
|
+
|
|
727
|
+
This class implements the Gemini API protocol, shared by
|
|
728
|
+
`lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
|
|
729
|
+
formatting and response parsing for Gemini models.
|
|
730
|
+
|
|
731
|
+
It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
|
|
732
|
+
`lf.llms.VertexAI` instead.
|
|
733
|
+
"""
|
|
700
734
|
|
|
701
735
|
model: pg.typing.Annotated[
|
|
702
736
|
pg.typing.Enum(
|
|
@@ -752,6 +786,13 @@ class Gemini(rest.REST):
|
|
|
752
786
|
prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
|
|
753
787
|
)
|
|
754
788
|
request['contents'] = contents
|
|
789
|
+
request['toolConfig'] = {
|
|
790
|
+
'functionCallingConfig': {
|
|
791
|
+
'mode': 'NONE',
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
if sampling_options.extras:
|
|
795
|
+
request.update(sampling_options.extras)
|
|
755
796
|
return request
|
|
756
797
|
|
|
757
798
|
def _generation_config(
|
|
@@ -783,11 +824,18 @@ class Gemini(rest.REST):
|
|
|
783
824
|
+ '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
|
|
784
825
|
+ pg.to_json_str(json_schema, json_indent=2)
|
|
785
826
|
)
|
|
827
|
+
thinking_config_data = {}
|
|
786
828
|
if options.max_thinking_tokens is not None:
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
829
|
+
thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
|
|
830
|
+
thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
|
|
831
|
+
if options.thinking_level is not None:
|
|
832
|
+
thinking_config_data['thinkingLevel'] = options.thinking_level
|
|
833
|
+
if thinking_config_data:
|
|
834
|
+
config['thinkingConfig'] = thinking_config_data
|
|
835
|
+
|
|
836
|
+
# This is the new feature since Gemini 3.
|
|
837
|
+
if self.model_id.startswith('gemini-3'):
|
|
838
|
+
config['mediaResolution'] = 'MEDIA_RESOLUTION_HIGH'
|
|
791
839
|
|
|
792
840
|
if self.response_modalities:
|
|
793
841
|
config['responseModalities'] = self.response_modalities
|
|
@@ -803,10 +851,14 @@ class Gemini(rest.REST):
|
|
|
803
851
|
'No candidates found in response. This is a Gemini API issue that '
|
|
804
852
|
'happens occasionally, and retrying should fix it. '
|
|
805
853
|
)
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
854
|
+
|
|
855
|
+
messages = []
|
|
856
|
+
for candidate in candidates:
|
|
857
|
+
message = lf.Message.from_value(candidate['content'], format='gemini')
|
|
858
|
+
if finish_reason := candidate.get('finishReason'):
|
|
859
|
+
message.metadata['finish_reason'] = finish_reason
|
|
860
|
+
messages.append(message)
|
|
861
|
+
|
|
810
862
|
usage = json['usageMetadata']
|
|
811
863
|
input_tokens = usage['promptTokenCount']
|
|
812
864
|
# NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
|
|
@@ -828,9 +880,9 @@ class Gemini(rest.REST):
|
|
|
828
880
|
)
|
|
829
881
|
|
|
830
882
|
def _error(self, status_code: int, content: str) -> lf.LMError:
|
|
831
|
-
if (
|
|
832
|
-
|
|
833
|
-
|
|
883
|
+
if status_code == 400 and (
|
|
884
|
+
b'exceeds the maximum number of tokens' in content
|
|
885
|
+
or b'Reduce the input token count and try again.' in content
|
|
834
886
|
):
|
|
835
887
|
return lf.ContextLimitError(f'{status_code}: {content}')
|
|
836
888
|
return super()._error(status_code, content)
|