langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (155) hide show
  1. langfun/core/__init__.py +2 -0
  2. langfun/core/agentic/__init__.py +4 -1
  3. langfun/core/agentic/action.py +447 -29
  4. langfun/core/agentic/action_eval.py +9 -2
  5. langfun/core/agentic/action_test.py +149 -21
  6. langfun/core/async_support.py +32 -3
  7. langfun/core/coding/python/correction.py +19 -9
  8. langfun/core/coding/python/execution.py +14 -12
  9. langfun/core/coding/python/generation.py +21 -16
  10. langfun/core/coding/python/sandboxing.py +23 -3
  11. langfun/core/component.py +42 -3
  12. langfun/core/concurrent.py +70 -6
  13. langfun/core/concurrent_test.py +1 -0
  14. langfun/core/console.py +1 -1
  15. langfun/core/data/conversion/anthropic.py +12 -3
  16. langfun/core/data/conversion/anthropic_test.py +8 -6
  17. langfun/core/data/conversion/gemini.py +9 -2
  18. langfun/core/data/conversion/gemini_test.py +12 -9
  19. langfun/core/data/conversion/openai.py +145 -31
  20. langfun/core/data/conversion/openai_test.py +161 -17
  21. langfun/core/eval/base.py +47 -43
  22. langfun/core/eval/base_test.py +5 -5
  23. langfun/core/eval/matching.py +5 -2
  24. langfun/core/eval/patching.py +3 -3
  25. langfun/core/eval/scoring.py +4 -3
  26. langfun/core/eval/v2/__init__.py +1 -0
  27. langfun/core/eval/v2/checkpointing.py +64 -6
  28. langfun/core/eval/v2/checkpointing_test.py +9 -2
  29. langfun/core/eval/v2/eval_test_helper.py +103 -2
  30. langfun/core/eval/v2/evaluation.py +91 -16
  31. langfun/core/eval/v2/evaluation_test.py +9 -3
  32. langfun/core/eval/v2/example.py +50 -40
  33. langfun/core/eval/v2/example_test.py +16 -8
  34. langfun/core/eval/v2/experiment.py +74 -8
  35. langfun/core/eval/v2/experiment_test.py +19 -0
  36. langfun/core/eval/v2/metric_values.py +31 -3
  37. langfun/core/eval/v2/metric_values_test.py +32 -0
  38. langfun/core/eval/v2/metrics.py +157 -44
  39. langfun/core/eval/v2/metrics_test.py +39 -18
  40. langfun/core/eval/v2/progress.py +30 -1
  41. langfun/core/eval/v2/progress_test.py +27 -0
  42. langfun/core/eval/v2/progress_tracking.py +12 -3
  43. langfun/core/eval/v2/progress_tracking_test.py +6 -1
  44. langfun/core/eval/v2/reporting.py +90 -71
  45. langfun/core/eval/v2/reporting_test.py +24 -6
  46. langfun/core/eval/v2/runners/__init__.py +30 -0
  47. langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
  48. langfun/core/eval/v2/runners/beam.py +341 -0
  49. langfun/core/eval/v2/runners/beam_test.py +131 -0
  50. langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
  51. langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
  52. langfun/core/eval/v2/runners/debug.py +40 -0
  53. langfun/core/eval/v2/runners/debug_test.py +76 -0
  54. langfun/core/eval/v2/runners/parallel.py +100 -0
  55. langfun/core/eval/v2/runners/parallel_test.py +95 -0
  56. langfun/core/eval/v2/runners/sequential.py +47 -0
  57. langfun/core/eval/v2/runners/sequential_test.py +172 -0
  58. langfun/core/langfunc.py +45 -130
  59. langfun/core/langfunc_test.py +7 -5
  60. langfun/core/language_model.py +141 -21
  61. langfun/core/language_model_test.py +54 -3
  62. langfun/core/llms/__init__.py +9 -1
  63. langfun/core/llms/anthropic.py +157 -2
  64. langfun/core/llms/azure_openai.py +29 -17
  65. langfun/core/llms/cache/base.py +25 -3
  66. langfun/core/llms/cache/in_memory.py +48 -7
  67. langfun/core/llms/cache/in_memory_test.py +14 -4
  68. langfun/core/llms/compositional.py +25 -1
  69. langfun/core/llms/deepseek.py +30 -2
  70. langfun/core/llms/fake.py +32 -1
  71. langfun/core/llms/gemini.py +55 -17
  72. langfun/core/llms/gemini_test.py +84 -0
  73. langfun/core/llms/google_genai.py +34 -1
  74. langfun/core/llms/groq.py +28 -3
  75. langfun/core/llms/llama_cpp.py +23 -4
  76. langfun/core/llms/openai.py +36 -3
  77. langfun/core/llms/openai_compatible.py +148 -27
  78. langfun/core/llms/openai_compatible_test.py +207 -20
  79. langfun/core/llms/openai_test.py +0 -2
  80. langfun/core/llms/rest.py +12 -1
  81. langfun/core/llms/vertexai.py +58 -8
  82. langfun/core/logging.py +1 -1
  83. langfun/core/mcp/client.py +77 -22
  84. langfun/core/mcp/client_test.py +8 -35
  85. langfun/core/mcp/session.py +94 -29
  86. langfun/core/mcp/session_test.py +54 -0
  87. langfun/core/mcp/tool.py +151 -22
  88. langfun/core/mcp/tool_test.py +197 -0
  89. langfun/core/memory.py +1 -0
  90. langfun/core/message.py +160 -55
  91. langfun/core/message_test.py +65 -81
  92. langfun/core/modalities/__init__.py +8 -0
  93. langfun/core/modalities/audio.py +21 -1
  94. langfun/core/modalities/image.py +19 -1
  95. langfun/core/modalities/mime.py +64 -3
  96. langfun/core/modalities/mime_test.py +11 -0
  97. langfun/core/modalities/pdf.py +19 -1
  98. langfun/core/modalities/video.py +21 -1
  99. langfun/core/modality.py +167 -29
  100. langfun/core/modality_test.py +42 -12
  101. langfun/core/natural_language.py +1 -1
  102. langfun/core/sampling.py +4 -4
  103. langfun/core/sampling_test.py +20 -4
  104. langfun/core/structured/__init__.py +2 -24
  105. langfun/core/structured/completion.py +34 -44
  106. langfun/core/structured/completion_test.py +23 -43
  107. langfun/core/structured/description.py +54 -50
  108. langfun/core/structured/function_generation.py +29 -12
  109. langfun/core/structured/mapping.py +81 -37
  110. langfun/core/structured/parsing.py +95 -79
  111. langfun/core/structured/parsing_test.py +0 -3
  112. langfun/core/structured/querying.py +215 -142
  113. langfun/core/structured/querying_test.py +65 -29
  114. langfun/core/structured/schema/__init__.py +49 -0
  115. langfun/core/structured/schema/base.py +664 -0
  116. langfun/core/structured/schema/base_test.py +531 -0
  117. langfun/core/structured/schema/json.py +174 -0
  118. langfun/core/structured/schema/json_test.py +121 -0
  119. langfun/core/structured/schema/python.py +316 -0
  120. langfun/core/structured/schema/python_test.py +410 -0
  121. langfun/core/structured/schema_generation.py +33 -14
  122. langfun/core/structured/scoring.py +47 -36
  123. langfun/core/structured/tokenization.py +26 -11
  124. langfun/core/subscription.py +2 -2
  125. langfun/core/template.py +174 -49
  126. langfun/core/template_test.py +123 -17
  127. langfun/env/__init__.py +8 -2
  128. langfun/env/base_environment.py +320 -128
  129. langfun/env/base_environment_test.py +473 -0
  130. langfun/env/base_feature.py +92 -15
  131. langfun/env/base_feature_test.py +228 -0
  132. langfun/env/base_sandbox.py +84 -361
  133. langfun/env/base_sandbox_test.py +1235 -0
  134. langfun/env/event_handlers/__init__.py +1 -1
  135. langfun/env/event_handlers/chain.py +233 -0
  136. langfun/env/event_handlers/chain_test.py +253 -0
  137. langfun/env/event_handlers/event_logger.py +95 -98
  138. langfun/env/event_handlers/event_logger_test.py +21 -21
  139. langfun/env/event_handlers/metric_writer.py +225 -140
  140. langfun/env/event_handlers/metric_writer_test.py +23 -6
  141. langfun/env/interface.py +854 -40
  142. langfun/env/interface_test.py +112 -2
  143. langfun/env/load_balancers_test.py +23 -2
  144. langfun/env/test_utils.py +126 -84
  145. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
  146. langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
  147. langfun/core/eval/v2/runners_test.py +0 -343
  148. langfun/core/structured/schema.py +0 -987
  149. langfun/core/structured/schema_test.py +0 -982
  150. langfun/env/base_test.py +0 -1481
  151. langfun/env/event_handlers/base.py +0 -350
  152. langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
  153. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
  154. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
  155. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,32 @@ import pyglove as pg
24
24
 
25
25
  @pg.use_init_args(['filename', 'ttl', 'key'])
26
26
  class InMemory(base.LMCacheBase):
27
- """In memory cache."""
27
+ """An in-memory cache for language model lookups.
28
+
29
+ `InMemory` stores LM prompts and their corresponding responses in memory,
30
+ providing a simple and fast caching mechanism for a single session.
31
+ Optionally, it can persist the cache to a JSON file on disk, allowing
32
+ results to be reused across sessions.
33
+
34
+ When a filename is provided, the cache will be loaded from the file upon
35
+ initialization and saved to the file when `save()` is called. This is
36
+ useful for caching results in interactive environments like Colab or
37
+ when running batch jobs.
38
+
39
+ Example:
40
+
41
+ ```python
42
+ import langfun as lf
43
+ # Using in-memory cache without persistence
44
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
45
+ r = lm.query('hello')
46
+
47
+ # Using in-memory cache with persistence
48
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
49
+ r = lm.query('hello')
50
+ lm.cache.save()
51
+ ```
52
+ """
28
53
 
29
54
  filename: Annotated[
30
55
  str | None,
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
144
169
 
145
170
  @contextlib.contextmanager
146
171
  def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
147
- """Context manager to enable cache for LMs under the context.
172
+ """Context manager to enable in-memory cache for LMs in the current context.
173
+
174
+ This context manager sets an `InMemory` cache as the default cache for
175
+ any Langfun language model instantiated within its scope, unless a model
176
+ is explicitly configured with a different cache.
177
+
178
+ If a `filename` is provided, the cache will be loaded from the specified
179
+ file at the beginning of the context and automatically saved back to the
180
+ file upon exiting the context. This is a convenient way to manage
181
+ persistent caching for a block of code.
182
+
183
+ Example:
148
184
 
149
- If LMs under the context manager have explicitly specified cache, they will
150
- use their own cache. Otherwise they will use the cache created by the context
151
- manager.
185
+ ```python
186
+ import langfun as lf
187
+ with lf.lm_cache('my_cache.json'):
188
+ # LMs created here will use 'my_cache.json' for caching.
189
+ lm = lf.llms.GeminiPro()
190
+ print(lm.query('hello'))
191
+ ```
152
192
 
153
193
  Args:
154
- filename: If not None, JSON file to load and save the cache.
194
+ filename: If provided, specifies the JSON file for loading and saving
195
+ the cache.
155
196
 
156
197
  Yields:
157
- A cache object created.
198
+ The `InMemory` cache instance created for this context.
158
199
  """
159
200
  cache = InMemory(filename)
160
201
  try:
@@ -175,18 +175,28 @@ class InMemoryLMCacheTest(unittest.TestCase):
175
175
 
176
176
  cache = in_memory.InMemory()
177
177
  lm = fake.StaticSequence(['1', '2', '3', '4', '5', '6'], cache=cache)
178
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('foo')))
179
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('bar')))
178
+ image_foo = CustomModality('foo')
179
+ image_bar = CustomModality('bar')
180
+ lm(
181
+ lf.UserMessage(
182
+ f'hi <<[[{image_foo.id}]]>>', referred_modalities=[image_foo]
183
+ )
184
+ )
185
+ lm(
186
+ lf.UserMessage(
187
+ f'hi <<[[{image_bar.id}]]>>', referred_modalities=[image_bar]
188
+ )
189
+ )
180
190
  self.assertEqual(
181
191
  list(cache.keys()),
182
192
  [
183
193
  (
184
- 'hi <<[[image]]>><image>acbd18db</image>',
194
+ f'hi <<[[{image_foo.id}]]>>',
185
195
  (None, None, 1, 40, None, None),
186
196
  0,
187
197
  ),
188
198
  (
189
- 'hi <<[[image]]>><image>37b51d19</image>',
199
+ f'hi <<[[{image_bar.id}]]>>',
190
200
  (None, None, 1, 40, None, None),
191
201
  0,
192
202
  ),
@@ -21,7 +21,31 @@ import pyglove as pg
21
21
 
22
22
  @pg.use_init_args(['candidates', 'seed'])
23
23
  class RandomChoice(lf.LanguageModel):
24
- """Random choice of a list of LLM models."""
24
+ """A composite language model that randomly selects from a list of candidates.
25
+
26
+ `RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
27
+ `tokenize`, or `__call__`) to one of the `candidates` selected randomly.
28
+ This can be useful for load balancing across multiple LLM endpoints,
29
+ for A/B testing different models, or for ensembling model outputs
30
+ by calling it multiple times.
31
+
32
+ The selection is determined by the provided `seed`, ensuring reproducibility
33
+ if needed.
34
+
35
+ Example:
36
+
37
+ ```python
38
+ import langfun as lf
39
+
40
+ lm = lf.llms.RandomChoice([
41
+ lf.llms.GeminiPro(),
42
+ lf.llms.GPT4(),
43
+ ])
44
+
45
+ # This call will be handled by either GeminiPro or GPT4, chosen randomly.
46
+ r = lm.sample('hello')
47
+ ```
48
+ """
25
49
 
26
50
  candidates: Annotated[
27
51
  list[lf.LanguageModel],
@@ -93,8 +93,36 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
93
93
  # DeepSeek API uses an API format compatible with OpenAI.
94
94
  # Reference: https://api-docs.deepseek.com/
95
95
  @lf.use_init_args(['model'])
96
- class DeepSeek(openai_compatible.OpenAICompatible):
97
- """DeepSeek model."""
96
+ class DeepSeek(openai_compatible.OpenAIChatCompletionAPI):
97
+ """DeepSeek models.
98
+
99
+ **Quick Start:**
100
+
101
+ ```python
102
+ import langfun as lf
103
+
104
+ # Call DeepSeek-V3 using API key from environment variable
105
+ # 'DEEPSEEK_API_KEY'.
106
+ lm = lf.llms.DeepSeekV3()
107
+ r = lm('Who are you?')
108
+ print(r)
109
+ ```
110
+
111
+ **Setting up API key:**
112
+
113
+ The DeepSeek API key can be specified in following ways:
114
+
115
+ 1. At model instantiation:
116
+
117
+ ```python
118
+ lm = lf.llms.DeepSeekV3(api_key='MY_API_KEY')
119
+ ```
120
+ 2. via environment variable `DEEPSEEK_API_KEY`.
121
+
122
+ **References:**
123
+
124
+ * https://api-docs.deepseek.com/
125
+ """
98
126
 
99
127
  model: pg.typing.Annotated[
100
128
  pg.typing.Enum(
langfun/core/llms/fake.py CHANGED
@@ -20,7 +20,38 @@ import langfun.core as lf
20
20
 
21
21
 
22
22
  class Fake(lf.LanguageModel):
23
- """The base class for all fake language models."""
23
+ """Base class for fake language models, used for testing.
24
+
25
+ Fake models simulate the behavior of real language models but return
26
+ pre-defined responses, making them useful for testing prompts,
27
+ data processing logic, and agent behavior without incurring API costs
28
+ or relying on external services.
29
+
30
+ Langfun provides several fake models:
31
+ * `lf.llms.Echo`: Echoes the prompt back as the response.
32
+ * `lf.llms.StaticResponse`: Returns a fixed, pre-defined response for
33
+ any prompt.
34
+ * `lf.llms.StaticMapping`: Returns responses based on a prompt-to-response
35
+ dictionary.
36
+ * `lf.llms.StaticSequence`: Returns responses from a pre-defined sequence
37
+ in order.
38
+
39
+ **Example:**
40
+
41
+ ```python
42
+ import langfun as lf
43
+
44
+ # Use Echo model for testing
45
+ lm = lf.llms.Echo()
46
+ response = lm('hello')
47
+ assert response.text == 'hello'
48
+
49
+ # Use StaticResponse model
50
+ lm = lf.llms.StaticResponse('world')
51
+ response = lm('hello')
52
+ assert response.text == 'world'
53
+ ```
54
+ """
24
55
 
25
56
  def _score(self, prompt: lf.Message| list[lf.Message],
26
57
  completions: list[lf.Message]):
@@ -151,6 +151,32 @@ SUPPORTED_MODELS = [
151
151
  #
152
152
  # Production models.
153
153
  #
154
+ # Gemini 3 Pro Preview
155
+ GeminiModelInfo(
156
+ model_id='gemini-3-pro-preview',
157
+ in_service=True,
158
+ provider=pg.oneof(['Google GenAI', 'VertexAI']),
159
+ model_type='instruction-tuned',
160
+ description='Gemini 3 Pro Preview.',
161
+ release_date=datetime.datetime(2025, 11, 18),
162
+ input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
163
+ context_length=lf.ModelInfo.ContextLength(
164
+ max_input_tokens=1_048_576,
165
+ max_output_tokens=65_536,
166
+ ),
167
+ pricing=GeminiModelInfo.Pricing(
168
+ cost_per_1m_cached_input_tokens=0.2,
169
+ cost_per_1m_input_tokens=2.0,
170
+ cost_per_1m_output_tokens=12.0,
171
+ cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
172
+ cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
173
+ cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
174
+ ),
175
+ rate_limits=lf.ModelInfo.RateLimits(
176
+ max_requests_per_minute=2000,
177
+ max_tokens_per_minute=4_000_000,
178
+ ),
179
+ ),
154
180
  # Gemini 2.5 Flash
155
181
  GeminiModelInfo(
156
182
  model_id='gemini-2.5-flash',
@@ -696,7 +722,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
696
722
 
697
723
  @pg.use_init_args(['model'])
698
724
  class Gemini(rest.REST):
699
- """Language models provided by Google GenAI."""
725
+ """Base class for Gemini models served on Google GenAI and Vertex AI.
726
+
727
+ This class implements the Gemini API protocol, shared by
728
+ `lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
729
+ formatting and response parsing for Gemini models.
730
+
731
+ It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
732
+ `lf.llms.VertexAI` instead.
733
+ """
700
734
 
701
735
  model: pg.typing.Annotated[
702
736
  pg.typing.Enum(
@@ -752,11 +786,8 @@ class Gemini(rest.REST):
752
786
  prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
753
787
  )
754
788
  request['contents'] = contents
755
- # Users could use `metadata_gemini_tools` to pass Gemini tools. For example,
756
- # for enabling Search Grounding, users could pass:
757
- # metadata_gemini_tools=[{'google_search': {}}]
758
- if tools := prompt.metadata.get('gemini_tools'):
759
- request['tools'] = tools
789
+ if sampling_options.extras:
790
+ request.update(sampling_options.extras)
760
791
  return request
761
792
 
762
793
  def _generation_config(
@@ -788,11 +819,14 @@ class Gemini(rest.REST):
788
819
  + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
789
820
  + pg.to_json_str(json_schema, json_indent=2)
790
821
  )
822
+ thinking_config_data = {}
791
823
  if options.max_thinking_tokens is not None:
792
- config['thinkingConfig'] = {
793
- 'includeThoughts': options.max_thinking_tokens > 0,
794
- 'thinkingBudget': options.max_thinking_tokens,
795
- }
824
+ thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
825
+ thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
826
+ if options.thinking_level is not None:
827
+ thinking_config_data['thinkingLevel'] = options.thinking_level
828
+ if thinking_config_data:
829
+ config['thinkingConfig'] = thinking_config_data
796
830
 
797
831
  if self.response_modalities:
798
832
  config['responseModalities'] = self.response_modalities
@@ -808,10 +842,14 @@ class Gemini(rest.REST):
808
842
  'No candidates found in response. This is a Gemini API issue that '
809
843
  'happens occasionally, and retrying should fix it. '
810
844
  )
811
- messages = [
812
- lf.Message.from_value(candidate['content'], format='gemini')
813
- for candidate in candidates
814
- ]
845
+
846
+ messages = []
847
+ for candidate in candidates:
848
+ message = lf.Message.from_value(candidate['content'], format='gemini')
849
+ if finish_reason := candidate.get('finishReason'):
850
+ message.metadata['finish_reason'] = finish_reason
851
+ messages.append(message)
852
+
815
853
  usage = json['usageMetadata']
816
854
  input_tokens = usage['promptTokenCount']
817
855
  # NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
@@ -833,9 +871,9 @@ class Gemini(rest.REST):
833
871
  )
834
872
 
835
873
  def _error(self, status_code: int, content: str) -> lf.LMError:
836
- if (
837
- status_code == 400
838
- and b'exceeds the maximum number of tokens' in content
874
+ if status_code == 400 and (
875
+ b'exceeds the maximum number of tokens' in content
876
+ or b'Reduce the input token count and try again.' in content
839
877
  ):
840
878
  return lf.ContextLimitError(f'{status_code}: {content}')
841
879
  return super()._error(status_code, content)
@@ -177,6 +177,58 @@ class GeminiTest(unittest.TestCase):
177
177
  ),
178
178
  )
179
179
 
180
+ # Add test for thinkingConfig with thinking_level.
181
+ actual = model._generation_config(
182
+ lf.UserMessage('hi'),
183
+ lf.LMSamplingOptions(
184
+ thinking_level='high',
185
+ ),
186
+ )
187
+ self.assertEqual(
188
+ actual,
189
+ dict(
190
+ candidateCount=1,
191
+ temperature=None,
192
+ topP=None,
193
+ topK=40,
194
+ maxOutputTokens=None,
195
+ stopSequences=None,
196
+ responseLogprobs=False,
197
+ logprobs=None,
198
+ seed=None,
199
+ thinkingConfig={'thinkingLevel': 'high'},
200
+ ),
201
+ )
202
+
203
+ # Add test for thinkingConfig with both max_thinking_tokens and
204
+ # thinking_level.
205
+ actual = model._generation_config(
206
+ lf.UserMessage('hi'),
207
+ lf.LMSamplingOptions(
208
+ max_thinking_tokens=100,
209
+ thinking_level='low',
210
+ ),
211
+ )
212
+ self.assertEqual(
213
+ actual,
214
+ dict(
215
+ candidateCount=1,
216
+ temperature=None,
217
+ topP=None,
218
+ topK=40,
219
+ maxOutputTokens=None,
220
+ stopSequences=None,
221
+ responseLogprobs=False,
222
+ logprobs=None,
223
+ seed=None,
224
+ thinkingConfig={
225
+ 'includeThoughts': True,
226
+ 'thinkingBudget': 100,
227
+ 'thinkingLevel': 'low',
228
+ },
229
+ ),
230
+ )
231
+
180
232
  with self.assertRaisesRegex(
181
233
  ValueError, '`json_schema` must be a dict, got'
182
234
  ):
@@ -225,6 +277,38 @@ class GeminiTest(unittest.TestCase):
225
277
  ):
226
278
  lm('hello')
227
279
 
280
+ def test_call_model_with_max_tokens_error(self):
281
+ def mock_requests_post_error(*args, **kwargs):
282
+ del args, kwargs
283
+ response = requests.Response()
284
+ response.status_code = 200
285
+ response._content = pg.to_json_str({
286
+ 'candidates': [
287
+ {
288
+ 'finishReason': 'MAX_TOKENS',
289
+ 'content': {
290
+ 'parts': [
291
+ {
292
+ 'text': 'This is'
293
+ }
294
+ ]
295
+ }
296
+ },
297
+ ],
298
+ 'usageMetadata': {
299
+ 'promptTokenCount': 3,
300
+ 'candidatesTokenCount': 4,
301
+ }
302
+ }).encode()
303
+ return response
304
+
305
+ with mock.patch('requests.Session.post') as mock_generate:
306
+ mock_generate.side_effect = mock_requests_post_error
307
+ lm = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
308
+ m = lm('hello')
309
+ self.assertEqual(m.metadata.finish_reason, 'MAX_TOKENS')
310
+ self.assertEqual(m.text, 'This is')
311
+
228
312
  def test_call_model_with_system_message(self):
229
313
  with mock.patch('requests.Session.post') as mock_generate:
230
314
  mock_generate.side_effect = mock_requests_post
@@ -25,7 +25,35 @@ import pyglove as pg
25
25
  @lf.use_init_args(['model'])
26
26
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
27
27
  class GenAI(gemini.Gemini):
28
- """Language models provided by Google GenAI."""
28
+ """Google GenAI models.
29
+
30
+ **Quick Start:**
31
+
32
+ ```python
33
+ import langfun as lf
34
+
35
+ # Call Gemini 1.5 Flash using API key from environment variable
36
+ # 'GOOGLE_API_KEY'.
37
+ lm = lf.llms.Gemini15Flash()
38
+ r = lm('Who are you?')
39
+ print(r)
40
+ ```
41
+
42
+ **Setting up API key:**
43
+
44
+ The Google API key can be specified in following ways:
45
+
46
+ 1. At model instantiation:
47
+
48
+ ```python
49
+ lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
50
+ ```
51
+ 2. via environment variable `GOOGLE_API_KEY`.
52
+
53
+ **References:**
54
+
55
+ * https://ai.google.dev/docs
56
+ """
29
57
 
30
58
  model: pg.typing.Annotated[
31
59
  pg.typing.Enum(
@@ -87,9 +115,14 @@ class GenAI(gemini.Gemini):
87
115
 
88
116
  # pylint: disable=invalid-name
89
117
 
118
+
90
119
  #
91
120
  # Experimental models.
92
121
  #
122
+ class Gemini3ProPreview(GenAI):
123
+ """Gemini 3 Pro Preview model."""
124
+
125
+ model = 'gemini-3-pro-preview'
93
126
 
94
127
 
95
128
  class Gemini25FlashImagePreview(GenAI):
langfun/core/llms/groq.py CHANGED
@@ -259,10 +259,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
259
259
 
260
260
 
261
261
  @lf.use_init_args(['model'])
262
- class Groq(openai_compatible.OpenAICompatible):
263
- """Groq LLMs through REST APIs (OpenAI compatible).
262
+ class Groq(openai_compatible.OpenAIChatCompletionAPI):
263
+ """Groq models.
264
264
 
265
- See https://platform.openai.com/docs/api-reference/chat
265
+ **Quick Start:**
266
+
267
+ ```python
268
+ import langfun as lf
269
+
270
+ # Call Llama 3.3 70B on Groq using API key from environment variable
271
+ # 'GROQ_API_KEY'.
272
+ lm = lf.llms.GroqLlama33_70B_Versatile()
273
+ r = lm('Who are you?')
274
+ print(r)
275
+ ```
276
+
277
+ **Setting up API key:**
278
+
279
+ The Groq API key can be specified in following ways:
280
+
281
+ 1. At model instantiation:
282
+
283
+ ```python
284
+ lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
285
+ ```
286
+ 2. via environment variable `GROQ_API_KEY`.
287
+
288
+ **References:**
289
+
290
+ * https://console.groq.com/docs
266
291
  """
267
292
 
268
293
  model: pg.typing.Annotated[
@@ -20,11 +20,30 @@ import pyglove as pg
20
20
 
21
21
  @pg.use_init_args(['url', 'model'])
22
22
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
23
- class LlamaCppRemote(openai_compatible.OpenAICompatible):
24
- """The remote LLaMA C++ model.
23
+ class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
24
+ """LLaMA C++ models served via a remote server.
25
25
 
26
- The Remote LLaMA C++ models can be launched via
27
- https://github.com/ggerganov/llama.cpp/tree/master/examples/server
26
+ This class provides an interface to interact with language models
27
+ hosted on a LLaMA C++ server, which is compatible with the OpenAI
28
+ Chat Completions API format.
29
+
30
+ **Quick Start:**
31
+
32
+ Assuming a LLaMA C++ server is running at `http://localhost:8080`,
33
+ you can interact with it as follows:
34
+
35
+ ```python
36
+ import langfun as lf
37
+
38
+ # If model name is not specified, it will use server's default.
39
+ lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
40
+ r = lm('Who are you?')
41
+ print(r)
42
+ ```
43
+
44
+ **References:**
45
+
46
+ * https://github.com/ggerganov/llama.cpp/tree/master/examples/server
28
47
  """
29
48
  url: Annotated[
30
49
  str,
@@ -1031,8 +1031,36 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
1031
1031
 
1032
1032
 
1033
1033
  @lf.use_init_args(['model'])
1034
- class OpenAI(openai_compatible.OpenAICompatible):
1035
- """OpenAI model."""
1034
+ class OpenAI(openai_compatible.OpenAIResponsesAPI):
1035
+ """OpenAI models.
1036
+
1037
+ **Quick Start:**
1038
+
1039
+ ```python
1040
+ import langfun as lf
1041
+
1042
+ # Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
1043
+ lm = lf.llms.Gpt4o()
1044
+ r = lm('Who are you?')
1045
+ print(r)
1046
+ ```
1047
+
1048
+ **Setting up API key:**
1049
+
1050
+ The OpenAI API key can be specified in following ways:
1051
+
1052
+ 1. At model instantiation:
1053
+
1054
+ ```python
1055
+ lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
1056
+ ```
1057
+ 2. via environment variable `OPENAI_API_KEY`.
1058
+
1059
+ **References:**
1060
+
1061
+ * https://platform.openai.com/docs/models
1062
+ * https://platform.openai.com/docs/api-reference
1063
+ """
1036
1064
 
1037
1065
  model: pg.typing.Annotated[
1038
1066
  pg.typing.Enum(
@@ -1041,7 +1069,12 @@ class OpenAI(openai_compatible.OpenAICompatible):
1041
1069
  'The name of the model to use.',
1042
1070
  ]
1043
1071
 
1044
- api_endpoint: str = 'https://api.openai.com/v1/chat/completions'
1072
+ # Disable message storage by default.
1073
+ sampling_options = lf.LMSamplingOptions(
1074
+ extras={'store': False}
1075
+ )
1076
+
1077
+ api_endpoint: str = 'https://api.openai.com/v1/responses'
1045
1078
 
1046
1079
  api_key: Annotated[
1047
1080
  str | None,