langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512040805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +7 -1
  3. langfun/core/agentic/__init__.py +8 -1
  4. langfun/core/agentic/action.py +740 -112
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +189 -24
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +11 -2
  20. langfun/core/data/conversion/gemini_test.py +48 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +48 -44
  24. langfun/core/eval/base_test.py +5 -5
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +2 -0
  29. langfun/core/eval/v2/checkpointing.py +76 -7
  30. langfun/core/eval/v2/checkpointing_test.py +9 -2
  31. langfun/core/eval/v2/config_saver.py +37 -0
  32. langfun/core/eval/v2/config_saver_test.py +36 -0
  33. langfun/core/eval/v2/eval_test_helper.py +104 -3
  34. langfun/core/eval/v2/evaluation.py +92 -17
  35. langfun/core/eval/v2/evaluation_test.py +9 -3
  36. langfun/core/eval/v2/example.py +50 -40
  37. langfun/core/eval/v2/example_test.py +16 -8
  38. langfun/core/eval/v2/experiment.py +84 -15
  39. langfun/core/eval/v2/experiment_test.py +19 -0
  40. langfun/core/eval/v2/metric_values.py +31 -3
  41. langfun/core/eval/v2/metric_values_test.py +32 -0
  42. langfun/core/eval/v2/metrics.py +157 -44
  43. langfun/core/eval/v2/metrics_test.py +39 -18
  44. langfun/core/eval/v2/progress.py +31 -1
  45. langfun/core/eval/v2/progress_test.py +27 -0
  46. langfun/core/eval/v2/progress_tracking.py +13 -5
  47. langfun/core/eval/v2/progress_tracking_test.py +9 -1
  48. langfun/core/eval/v2/reporting.py +90 -71
  49. langfun/core/eval/v2/reporting_test.py +24 -6
  50. langfun/core/eval/v2/runners/__init__.py +30 -0
  51. langfun/core/eval/v2/{runners.py → runners/base.py} +72 -180
  52. langfun/core/eval/v2/runners/beam.py +354 -0
  53. langfun/core/eval/v2/runners/beam_test.py +153 -0
  54. langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
  55. langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
  56. langfun/core/eval/v2/runners/debug.py +40 -0
  57. langfun/core/eval/v2/runners/debug_test.py +76 -0
  58. langfun/core/eval/v2/runners/parallel.py +243 -0
  59. langfun/core/eval/v2/runners/parallel_test.py +182 -0
  60. langfun/core/eval/v2/runners/sequential.py +47 -0
  61. langfun/core/eval/v2/runners/sequential_test.py +169 -0
  62. langfun/core/langfunc.py +45 -130
  63. langfun/core/langfunc_test.py +7 -5
  64. langfun/core/language_model.py +189 -36
  65. langfun/core/language_model_test.py +54 -3
  66. langfun/core/llms/__init__.py +12 -1
  67. langfun/core/llms/anthropic.py +157 -2
  68. langfun/core/llms/azure_openai.py +29 -17
  69. langfun/core/llms/cache/base.py +25 -3
  70. langfun/core/llms/cache/in_memory.py +48 -7
  71. langfun/core/llms/cache/in_memory_test.py +14 -4
  72. langfun/core/llms/compositional.py +25 -1
  73. langfun/core/llms/deepseek.py +30 -2
  74. langfun/core/llms/fake.py +32 -1
  75. langfun/core/llms/gemini.py +64 -12
  76. langfun/core/llms/gemini_test.py +110 -0
  77. langfun/core/llms/google_genai.py +34 -1
  78. langfun/core/llms/groq.py +28 -3
  79. langfun/core/llms/llama_cpp.py +23 -4
  80. langfun/core/llms/openai.py +120 -3
  81. langfun/core/llms/openai_compatible.py +148 -27
  82. langfun/core/llms/openai_compatible_test.py +207 -20
  83. langfun/core/llms/openai_test.py +0 -2
  84. langfun/core/llms/rest.py +16 -1
  85. langfun/core/llms/vertexai.py +58 -8
  86. langfun/core/logging.py +1 -1
  87. langfun/core/mcp/__init__.py +10 -0
  88. langfun/core/mcp/client.py +177 -0
  89. langfun/core/mcp/client_test.py +71 -0
  90. langfun/core/mcp/session.py +241 -0
  91. langfun/core/mcp/session_test.py +54 -0
  92. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  93. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  94. langfun/core/mcp/tool.py +254 -0
  95. langfun/core/mcp/tool_test.py +197 -0
  96. langfun/core/memory.py +1 -0
  97. langfun/core/message.py +160 -55
  98. langfun/core/message_test.py +65 -81
  99. langfun/core/modalities/__init__.py +8 -0
  100. langfun/core/modalities/audio.py +21 -1
  101. langfun/core/modalities/image.py +73 -3
  102. langfun/core/modalities/image_test.py +116 -0
  103. langfun/core/modalities/mime.py +64 -3
  104. langfun/core/modalities/mime_test.py +11 -0
  105. langfun/core/modalities/pdf.py +19 -1
  106. langfun/core/modalities/video.py +21 -1
  107. langfun/core/modality.py +167 -29
  108. langfun/core/modality_test.py +42 -12
  109. langfun/core/natural_language.py +1 -1
  110. langfun/core/sampling.py +4 -4
  111. langfun/core/sampling_test.py +20 -4
  112. langfun/core/structured/__init__.py +2 -24
  113. langfun/core/structured/completion.py +34 -44
  114. langfun/core/structured/completion_test.py +23 -43
  115. langfun/core/structured/description.py +54 -50
  116. langfun/core/structured/function_generation.py +29 -12
  117. langfun/core/structured/mapping.py +81 -37
  118. langfun/core/structured/parsing.py +95 -79
  119. langfun/core/structured/parsing_test.py +0 -3
  120. langfun/core/structured/querying.py +230 -154
  121. langfun/core/structured/querying_test.py +69 -33
  122. langfun/core/structured/schema/__init__.py +49 -0
  123. langfun/core/structured/schema/base.py +664 -0
  124. langfun/core/structured/schema/base_test.py +531 -0
  125. langfun/core/structured/schema/json.py +174 -0
  126. langfun/core/structured/schema/json_test.py +121 -0
  127. langfun/core/structured/schema/python.py +316 -0
  128. langfun/core/structured/schema/python_test.py +410 -0
  129. langfun/core/structured/schema_generation.py +33 -14
  130. langfun/core/structured/scoring.py +47 -36
  131. langfun/core/structured/tokenization.py +26 -11
  132. langfun/core/subscription.py +2 -2
  133. langfun/core/template.py +175 -50
  134. langfun/core/template_test.py +123 -17
  135. langfun/env/__init__.py +43 -0
  136. langfun/env/base_environment.py +827 -0
  137. langfun/env/base_environment_test.py +473 -0
  138. langfun/env/base_feature.py +304 -0
  139. langfun/env/base_feature_test.py +228 -0
  140. langfun/env/base_sandbox.py +842 -0
  141. langfun/env/base_sandbox_test.py +1235 -0
  142. langfun/env/event_handlers/__init__.py +14 -0
  143. langfun/env/event_handlers/chain.py +233 -0
  144. langfun/env/event_handlers/chain_test.py +253 -0
  145. langfun/env/event_handlers/event_logger.py +472 -0
  146. langfun/env/event_handlers/event_logger_test.py +304 -0
  147. langfun/env/event_handlers/metric_writer.py +726 -0
  148. langfun/env/event_handlers/metric_writer_test.py +214 -0
  149. langfun/env/interface.py +1640 -0
  150. langfun/env/interface_test.py +153 -0
  151. langfun/env/load_balancers.py +59 -0
  152. langfun/env/load_balancers_test.py +141 -0
  153. langfun/env/test_utils.py +507 -0
  154. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/METADATA +7 -3
  155. langfun-0.1.2.dev202512040805.dist-info/RECORD +217 -0
  156. langfun/core/eval/v2/runners_test.py +0 -343
  157. langfun/core/structured/schema.py +0 -987
  158. langfun/core/structured/schema_test.py +0 -982
  159. langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
  160. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/WHEEL +0 -0
  161. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/licenses/LICENSE +0 -0
  162. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/top_level.txt +0 -0
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
59
59
 
60
60
 
61
61
  SUPPORTED_MODELS = [
62
+ AnthropicModelInfo(
63
+ model_id='claude-haiku-4-5-20251001',
64
+ provider='Anthropic',
65
+ in_service=True,
66
+ description='Claude 4.5 Haiku model (10/15/2025).',
67
+ release_date=datetime.datetime(2025, 10, 15),
68
+ input_modalities=(
69
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
70
+ + AnthropicModelInfo.INPUT_DOC_TYPES
71
+ ),
72
+ context_length=lf.ModelInfo.ContextLength(
73
+ max_input_tokens=200_000,
74
+ max_output_tokens=64_000,
75
+ ),
76
+ pricing=lf.ModelInfo.Pricing(
77
+ cost_per_1m_cached_input_tokens=0.1,
78
+ cost_per_1m_input_tokens=1,
79
+ cost_per_1m_output_tokens=5,
80
+ ),
81
+ rate_limits=AnthropicModelInfo.RateLimits(
82
+ # Tier 4 rate limits
83
+ max_requests_per_minute=4000,
84
+ max_input_tokens_per_minute=4_000_000,
85
+ max_output_tokens_per_minute=800_000,
86
+ ),
87
+ ),
88
+ AnthropicModelInfo(
89
+ model_id='claude-sonnet-4-5-20250929',
90
+ provider='Anthropic',
91
+ in_service=True,
92
+ description='Claude 4.5 Sonnet model (9/29/2025).',
93
+ release_date=datetime.datetime(2025, 9, 29),
94
+ input_modalities=(
95
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
96
+ + AnthropicModelInfo.INPUT_DOC_TYPES
97
+ ),
98
+ context_length=lf.ModelInfo.ContextLength(
99
+ max_input_tokens=200_000,
100
+ max_output_tokens=64_000,
101
+ ),
102
+ pricing=lf.ModelInfo.Pricing(
103
+ cost_per_1m_cached_input_tokens=0.3,
104
+ cost_per_1m_input_tokens=3,
105
+ cost_per_1m_output_tokens=15,
106
+ ),
107
+ rate_limits=AnthropicModelInfo.RateLimits(
108
+ # Tier 4 rate limits
109
+ # This rate limit is a total limit that applies to combined traffic
110
+ # across both Sonnet 4 and Sonnet 4.5.
111
+ max_requests_per_minute=4000,
112
+ max_input_tokens_per_minute=2_000_000,
113
+ max_output_tokens_per_minute=400_000,
114
+ ),
115
+ ),
62
116
  AnthropicModelInfo(
63
117
  model_id='claude-4-opus-20250514',
64
118
  provider='Anthropic',
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
190
244
  max_output_tokens_per_minute=80_000,
191
245
  ),
192
246
  ),
247
+ AnthropicModelInfo(
248
+ model_id='claude-haiku-4-5@20251001',
249
+ alias_for='claude-haiku-4-5-20251001',
250
+ provider='VertexAI',
251
+ in_service=True,
252
+ description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
253
+ release_date=datetime.datetime(2025, 10, 15),
254
+ input_modalities=(
255
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
256
+ + AnthropicModelInfo.INPUT_DOC_TYPES
257
+ ),
258
+ context_length=lf.ModelInfo.ContextLength(
259
+ max_input_tokens=200_000,
260
+ max_output_tokens=64_000,
261
+ ),
262
+ pricing=lf.ModelInfo.Pricing(
263
+ # For global endpoint
264
+ cost_per_1m_cached_input_tokens=0.1,
265
+ cost_per_1m_input_tokens=1,
266
+ cost_per_1m_output_tokens=5,
267
+ ),
268
+ rate_limits=AnthropicModelInfo.RateLimits(
269
+ # For global endpoint
270
+ max_requests_per_minute=2500,
271
+ max_input_tokens_per_minute=200_000,
272
+ max_output_tokens_per_minute=0,
273
+ ),
274
+ ),
275
+ AnthropicModelInfo(
276
+ model_id='claude-sonnet-4-5@20250929',
277
+ alias_for='claude-sonnet-4-5-20250929',
278
+ provider='VertexAI',
279
+ in_service=True,
280
+ description='Claude 4.5 Sonnet model (9/29/2025).',
281
+ release_date=datetime.datetime(2025, 9, 29),
282
+ input_modalities=(
283
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
284
+ + AnthropicModelInfo.INPUT_DOC_TYPES
285
+ ),
286
+ context_length=lf.ModelInfo.ContextLength(
287
+ max_input_tokens=200_000,
288
+ max_output_tokens=64_000,
289
+ ),
290
+ pricing=lf.ModelInfo.Pricing(
291
+ # For global endpoint
292
+ cost_per_1m_cached_input_tokens=0.3,
293
+ cost_per_1m_input_tokens=3,
294
+ cost_per_1m_output_tokens=15,
295
+ ),
296
+ rate_limits=AnthropicModelInfo.RateLimits(
297
+ # For global endpoint
298
+ max_requests_per_minute=1500,
299
+ max_input_tokens_per_minute=200_000,
300
+ max_output_tokens_per_minute=0,
301
+ ),
302
+ ),
193
303
  AnthropicModelInfo(
194
304
  model_id='claude-opus-4@20250514',
195
305
  alias_for='claude-opus-4-20250514',
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
540
650
 
541
651
  @lf.use_init_args(['model'])
542
652
  class Anthropic(rest.REST):
543
- """Anthropic LLMs (Claude) through REST APIs.
653
+ """Anthropic Claude models.
654
+
655
+ **Quick Start:**
656
+
657
+ ```python
658
+ import langfun as lf
659
+
660
+ # Call Claude 3.5 Sonnet using API key from environment variable
661
+ # 'ANTHROPIC_API_KEY'.
662
+ lm = lf.llms.Claude35Sonnet()
663
+ r = lm('Who are you?')
664
+ print(r)
665
+ ```
666
+
667
+ **Setting up API key:**
668
+
669
+ The Anthropic API key can be specified in following ways:
670
+
671
+ 1. At model instantiation:
672
+
673
+ ```python
674
+ lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
675
+
676
+ 2. via environment variable `ANTHROPIC_API_KEY`.
544
677
 
545
- See https://docs.anthropic.com/claude/reference/messages_post
678
+ **References:**
679
+
680
+ * https://docs.anthropic.com/claude/reference/messages_post
546
681
  """
547
682
 
548
683
  model: pg.typing.Annotated[
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
658
793
  args.pop('temperature', None)
659
794
  args.pop('top_k', None)
660
795
  args.pop('top_p', None)
796
+ if options.extras:
797
+ args.update(options.extras)
661
798
  return args
662
799
 
663
800
  def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
679
816
  return super()._error(status_code, content)
680
817
 
681
818
 
819
+ class Claude45(Anthropic):
820
+ """Base class for Claude 4.5 models."""
821
+
822
+
823
+ # pylint: disable=invalid-name
824
+ class Claude45Haiku_20251001(Claude45):
825
+ """Claude 4.5 Haiku model 20251001."""
826
+
827
+ model = 'claude-haiku-4-5-20251001'
828
+
829
+
830
+ # pylint: disable=invalid-name
831
+ class Claude45Sonnet_20250929(Claude45):
832
+ """Claude 4.5 Sonnet model 20250929."""
833
+
834
+ model = 'claude-sonnet-4-5-20250929'
835
+
836
+
682
837
  class Claude4(Anthropic):
683
838
  """Base class for Claude 4 models."""
684
839
 
@@ -23,23 +23,35 @@ import pyglove as pg
23
23
  @lf.use_init_args(['model', 'deployment_name'])
24
24
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
25
25
  class AzureOpenAI(openai.OpenAI):
26
- """Azure OpenAI model service.
27
-
28
- This service interacts with the Azure OpenAI API to generate chat completions.
29
- It uses the deployment_name and API version to construct the endpoint, and
30
- authenticates using an API key provided via parameter or the
31
- AZURE_OPENAI_API_KEY environment variable.
32
-
33
- Example:
34
- lm = AzureOpenAI(
35
- model='gpt-4o',
36
- deployment_name='gpt-4o',
37
- api_version='2024-08-01-preview',
38
- azure_endpoint='https://trackname.openai.azure.com/',
39
- api_key='token'
40
- )
41
- response = lf.query(prompt="what the capital of France", lm=lm)
42
- print(response)
26
+ """Azure OpenAI models.
27
+
28
+ **Quick Start:**
29
+
30
+ ```python
31
+ import langfun as lf
32
+
33
+ # Call GPT-4o on Azure using API key from environment variable
34
+ # 'AZURE_OPENAI_API_KEY'.
35
+ lm = lf.llms.AzureOpenAI(
36
+ model='gpt-4o',
37
+ deployment_name='my-gpt4o-deployment',
38
+ api_version='2024-08-01-preview',
39
+ azure_endpoint='https://my-resource.openai.azure.com/',
40
+ )
41
+ r = lm('Who are you?')
42
+ print(r)
43
+ ```
44
+
45
+ **Setting up API key:**
46
+
47
+ The Azure OpenAI API key can be specified in following ways:
48
+
49
+ 1. At model instantiation:
50
+
51
+ ```python
52
+ lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
53
+ ```
54
+ 2. via environment variable `AZURE_OPENAI_API_KEY`.
43
55
  """
44
56
 
45
57
  deployment_name: Annotated[
@@ -22,13 +22,33 @@ import langfun.core as lf
22
22
 
23
23
  @dataclasses.dataclass(frozen=True)
24
24
  class LMCacheEntry:
25
- """LM cache entry."""
25
+ """Represents a single entry in the language model cache.
26
+
27
+ An `LMCacheEntry` stores the result of a language model sampling operation
28
+ and an optional expiration timestamp.
29
+ """
26
30
  result: lf.LMSamplingResult
27
31
  expire: datetime.datetime | None = None
28
32
 
29
33
 
30
34
  class LMCacheBase(lf.LMCache):
31
- """The common LMCache base."""
35
+ """Base class for language model cache implementations.
36
+
37
+ `LMCacheBase` provides the core logic for a key-value based cache,
38
+ handling key generation, expiration (TTL), and statistics tracking.
39
+ Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
40
+ to provide the specific storage mechanism (e.g., in-memory, file-based).
41
+
42
+ **Key Features:**
43
+
44
+ * **Customizable Keying**: Allows specifying a custom function to generate
45
+ cache keys based on the language model, prompt, and seed. If not provided,
46
+ a default key based on prompt text, sampling options, and seed is used.
47
+ * **Time-to-Live (TTL)**: Supports setting an expiration time for cache
48
+ entries, after which they are considered invalid and removed upon access.
49
+ * **Cache Statistics**: Tracks metrics like hits, misses, updates,
50
+ deletions, and expired hits through the `stats` property.
51
+ """
32
52
 
33
53
  key: Annotated[
34
54
  Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
121
141
 
122
142
  def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
123
143
  """Default key for LM cache."""
124
- return (prompt.text_with_modality_hash, lm.sampling_options.cache_key(), seed)
144
+ # prompt text already contains the modality id for referenced modality
145
+ # objects, so no need to include them in the key.
146
+ return (prompt.text, lm.sampling_options.cache_key(), seed)
@@ -24,7 +24,32 @@ import pyglove as pg
24
24
 
25
25
  @pg.use_init_args(['filename', 'ttl', 'key'])
26
26
  class InMemory(base.LMCacheBase):
27
- """In memory cache."""
27
+ """An in-memory cache for language model lookups.
28
+
29
+ `InMemory` stores LM prompts and their corresponding responses in memory,
30
+ providing a simple and fast caching mechanism for a single session.
31
+ Optionally, it can persist the cache to a JSON file on disk, allowing
32
+ results to be reused across sessions.
33
+
34
+ When a filename is provided, the cache will be loaded from the file upon
35
+ initialization and saved to the file when `save()` is called. This is
36
+ useful for caching results in interactive environments like Colab or
37
+ when running batch jobs.
38
+
39
+ Example:
40
+
41
+ ```python
42
+ import langfun as lf
43
+ # Using in-memory cache without persistence
44
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
45
+ r = lm.query('hello')
46
+
47
+ # Using in-memory cache with persistence
48
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
49
+ r = lm.query('hello')
50
+ lm.cache.save()
51
+ ```
52
+ """
28
53
 
29
54
  filename: Annotated[
30
55
  str | None,
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
144
169
 
145
170
  @contextlib.contextmanager
146
171
  def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
147
- """Context manager to enable cache for LMs under the context.
172
+ """Context manager to enable in-memory cache for LMs in the current context.
173
+
174
+ This context manager sets an `InMemory` cache as the default cache for
175
+ any Langfun language model instantiated within its scope, unless a model
176
+ is explicitly configured with a different cache.
177
+
178
+ If a `filename` is provided, the cache will be loaded from the specified
179
+ file at the beginning of the context and automatically saved back to the
180
+ file upon exiting the context. This is a convenient way to manage
181
+ persistent caching for a block of code.
182
+
183
+ Example:
148
184
 
149
- If LMs under the context manager have explicitly specified cache, they will
150
- use their own cache. Otherwise they will use the cache created by the context
151
- manager.
185
+ ```python
186
+ import langfun as lf
187
+ with lf.lm_cache('my_cache.json'):
188
+ # LMs created here will use 'my_cache.json' for caching.
189
+ lm = lf.llms.GeminiPro()
190
+ print(lm.query('hello'))
191
+ ```
152
192
 
153
193
  Args:
154
- filename: If not None, JSON file to load and save the cache.
194
+ filename: If provided, specifies the JSON file for loading and saving
195
+ the cache.
155
196
 
156
197
  Yields:
157
- A cache object created.
198
+ The `InMemory` cache instance created for this context.
158
199
  """
159
200
  cache = InMemory(filename)
160
201
  try:
@@ -175,18 +175,28 @@ class InMemoryLMCacheTest(unittest.TestCase):
175
175
 
176
176
  cache = in_memory.InMemory()
177
177
  lm = fake.StaticSequence(['1', '2', '3', '4', '5', '6'], cache=cache)
178
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('foo')))
179
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('bar')))
178
+ image_foo = CustomModality('foo')
179
+ image_bar = CustomModality('bar')
180
+ lm(
181
+ lf.UserMessage(
182
+ f'hi <<[[{image_foo.id}]]>>', referred_modalities=[image_foo]
183
+ )
184
+ )
185
+ lm(
186
+ lf.UserMessage(
187
+ f'hi <<[[{image_bar.id}]]>>', referred_modalities=[image_bar]
188
+ )
189
+ )
180
190
  self.assertEqual(
181
191
  list(cache.keys()),
182
192
  [
183
193
  (
184
- 'hi <<[[image]]>><image>acbd18db</image>',
194
+ f'hi <<[[{image_foo.id}]]>>',
185
195
  (None, None, 1, 40, None, None),
186
196
  0,
187
197
  ),
188
198
  (
189
- 'hi <<[[image]]>><image>37b51d19</image>',
199
+ f'hi <<[[{image_bar.id}]]>>',
190
200
  (None, None, 1, 40, None, None),
191
201
  0,
192
202
  ),
@@ -21,7 +21,31 @@ import pyglove as pg
21
21
 
22
22
  @pg.use_init_args(['candidates', 'seed'])
23
23
  class RandomChoice(lf.LanguageModel):
24
- """Random choice of a list of LLM models."""
24
+ """A composite language model that randomly selects from a list of candidates.
25
+
26
+ `RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
27
+ `tokenize`, or `__call__`) to one of the `candidates` selected randomly.
28
+ This can be useful for load balancing across multiple LLM endpoints,
29
+ for A/B testing different models, or for ensembling model outputs
30
+ by calling it multiple times.
31
+
32
+ The selection is determined by the provided `seed`, ensuring reproducibility
33
+ if needed.
34
+
35
+ Example:
36
+
37
+ ```python
38
+ import langfun as lf
39
+
40
+ lm = lf.llms.RandomChoice([
41
+ lf.llms.GeminiPro(),
42
+ lf.llms.GPT4(),
43
+ ])
44
+
45
+ # This call will be handled by either GeminiPro or GPT4, chosen randomly.
46
+ r = lm.sample('hello')
47
+ ```
48
+ """
25
49
 
26
50
  candidates: Annotated[
27
51
  list[lf.LanguageModel],
@@ -93,8 +93,36 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
93
93
  # DeepSeek API uses an API format compatible with OpenAI.
94
94
  # Reference: https://api-docs.deepseek.com/
95
95
  @lf.use_init_args(['model'])
96
- class DeepSeek(openai_compatible.OpenAICompatible):
97
- """DeepSeek model."""
96
+ class DeepSeek(openai_compatible.OpenAIChatCompletionAPI):
97
+ """DeepSeek models.
98
+
99
+ **Quick Start:**
100
+
101
+ ```python
102
+ import langfun as lf
103
+
104
+ # Call DeepSeek-V3 using API key from environment variable
105
+ # 'DEEPSEEK_API_KEY'.
106
+ lm = lf.llms.DeepSeekV3()
107
+ r = lm('Who are you?')
108
+ print(r)
109
+ ```
110
+
111
+ **Setting up API key:**
112
+
113
+ The DeepSeek API key can be specified in following ways:
114
+
115
+ 1. At model instantiation:
116
+
117
+ ```python
118
+ lm = lf.llms.DeepSeekV3(api_key='MY_API_KEY')
119
+ ```
120
+ 2. via environment variable `DEEPSEEK_API_KEY`.
121
+
122
+ **References:**
123
+
124
+ * https://api-docs.deepseek.com/
125
+ """
98
126
 
99
127
  model: pg.typing.Annotated[
100
128
  pg.typing.Enum(
langfun/core/llms/fake.py CHANGED
@@ -20,7 +20,38 @@ import langfun.core as lf
20
20
 
21
21
 
22
22
  class Fake(lf.LanguageModel):
23
- """The base class for all fake language models."""
23
+ """Base class for fake language models, used for testing.
24
+
25
+ Fake models simulate the behavior of real language models but return
26
+ pre-defined responses, making them useful for testing prompts,
27
+ data processing logic, and agent behavior without incurring API costs
28
+ or relying on external services.
29
+
30
+ Langfun provides several fake models:
31
+ * `lf.llms.Echo`: Echoes the prompt back as the response.
32
+ * `lf.llms.StaticResponse`: Returns a fixed, pre-defined response for
33
+ any prompt.
34
+ * `lf.llms.StaticMapping`: Returns responses based on a prompt-to-response
35
+ dictionary.
36
+ * `lf.llms.StaticSequence`: Returns responses from a pre-defined sequence
37
+ in order.
38
+
39
+ **Example:**
40
+
41
+ ```python
42
+ import langfun as lf
43
+
44
+ # Use Echo model for testing
45
+ lm = lf.llms.Echo()
46
+ response = lm('hello')
47
+ assert response.text == 'hello'
48
+
49
+ # Use StaticResponse model
50
+ lm = lf.llms.StaticResponse('world')
51
+ response = lm('hello')
52
+ assert response.text == 'world'
53
+ ```
54
+ """
24
55
 
25
56
  def _score(self, prompt: lf.Message| list[lf.Message],
26
57
  completions: list[lf.Message]):
@@ -151,6 +151,32 @@ SUPPORTED_MODELS = [
151
151
  #
152
152
  # Production models.
153
153
  #
154
+ # Gemini 3 Pro Preview
155
+ GeminiModelInfo(
156
+ model_id='gemini-3-pro-preview',
157
+ in_service=True,
158
+ provider=pg.oneof(['Google GenAI', 'VertexAI']),
159
+ model_type='instruction-tuned',
160
+ description='Gemini 3 Pro Preview.',
161
+ release_date=datetime.datetime(2025, 11, 18),
162
+ input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
163
+ context_length=lf.ModelInfo.ContextLength(
164
+ max_input_tokens=1_048_576,
165
+ max_output_tokens=65_536,
166
+ ),
167
+ pricing=GeminiModelInfo.Pricing(
168
+ cost_per_1m_cached_input_tokens=0.2,
169
+ cost_per_1m_input_tokens=2.0,
170
+ cost_per_1m_output_tokens=12.0,
171
+ cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
172
+ cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
173
+ cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
174
+ ),
175
+ rate_limits=lf.ModelInfo.RateLimits(
176
+ max_requests_per_minute=2000,
177
+ max_tokens_per_minute=4_000_000,
178
+ ),
179
+ ),
154
180
  # Gemini 2.5 Flash
155
181
  GeminiModelInfo(
156
182
  model_id='gemini-2.5-flash',
@@ -696,7 +722,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
696
722
 
697
723
  @pg.use_init_args(['model'])
698
724
  class Gemini(rest.REST):
699
- """Language models provided by Google GenAI."""
725
+ """Base class for Gemini models served on Google GenAI and Vertex AI.
726
+
727
+ This class implements the Gemini API protocol, shared by
728
+ `lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
729
+ formatting and response parsing for Gemini models.
730
+
731
+ It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
732
+ `lf.llms.VertexAI` instead.
733
+ """
700
734
 
701
735
  model: pg.typing.Annotated[
702
736
  pg.typing.Enum(
@@ -752,6 +786,13 @@ class Gemini(rest.REST):
752
786
  prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
753
787
  )
754
788
  request['contents'] = contents
789
+ request['toolConfig'] = {
790
+ 'functionCallingConfig': {
791
+ 'mode': 'NONE',
792
+ }
793
+ }
794
+ if sampling_options.extras:
795
+ request.update(sampling_options.extras)
755
796
  return request
756
797
 
757
798
  def _generation_config(
@@ -783,11 +824,18 @@ class Gemini(rest.REST):
783
824
  + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
784
825
  + pg.to_json_str(json_schema, json_indent=2)
785
826
  )
827
+ thinking_config_data = {}
786
828
  if options.max_thinking_tokens is not None:
787
- config['thinkingConfig'] = {
788
- 'includeThoughts': options.max_thinking_tokens > 0,
789
- 'thinkingBudget': options.max_thinking_tokens,
790
- }
829
+ thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
830
+ thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
831
+ if options.thinking_level is not None:
832
+ thinking_config_data['thinkingLevel'] = options.thinking_level
833
+ if thinking_config_data:
834
+ config['thinkingConfig'] = thinking_config_data
835
+
836
+ # This is the new feature since Gemini 3.
837
+ if self.model_id.startswith('gemini-3'):
838
+ config['mediaResolution'] = 'MEDIA_RESOLUTION_HIGH'
791
839
 
792
840
  if self.response_modalities:
793
841
  config['responseModalities'] = self.response_modalities
@@ -803,10 +851,14 @@ class Gemini(rest.REST):
803
851
  'No candidates found in response. This is a Gemini API issue that '
804
852
  'happens occasionally, and retrying should fix it. '
805
853
  )
806
- messages = [
807
- lf.Message.from_value(candidate['content'], format='gemini')
808
- for candidate in candidates
809
- ]
854
+
855
+ messages = []
856
+ for candidate in candidates:
857
+ message = lf.Message.from_value(candidate['content'], format='gemini')
858
+ if finish_reason := candidate.get('finishReason'):
859
+ message.metadata['finish_reason'] = finish_reason
860
+ messages.append(message)
861
+
810
862
  usage = json['usageMetadata']
811
863
  input_tokens = usage['promptTokenCount']
812
864
  # NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
@@ -828,9 +880,9 @@ class Gemini(rest.REST):
828
880
  )
829
881
 
830
882
  def _error(self, status_code: int, content: str) -> lf.LMError:
831
- if (
832
- status_code == 400
833
- and b'exceeds the maximum number of tokens' in content
883
+ if status_code == 400 and (
884
+ b'exceeds the maximum number of tokens' in content
885
+ or b'Reduce the input token count and try again.' in content
834
886
  ):
835
887
  return lf.ContextLimitError(f'{status_code}: {content}')
836
888
  return super()._error(status_code, content)