langfun 0.1.2.dev202508250805__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (133) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +6 -1
  3. langfun/core/agentic/__init__.py +4 -0
  4. langfun/core/agentic/action.py +412 -103
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +68 -6
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +9 -2
  20. langfun/core/data/conversion/gemini_test.py +12 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +47 -43
  24. langfun/core/eval/base_test.py +4 -4
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +1 -0
  29. langfun/core/eval/v2/checkpointing.py +30 -4
  30. langfun/core/eval/v2/eval_test_helper.py +1 -1
  31. langfun/core/eval/v2/evaluation.py +60 -14
  32. langfun/core/eval/v2/example.py +22 -11
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +39 -4
  37. langfun/core/eval/v2/metrics_test.py +14 -0
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +6 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners.py +27 -7
  44. langfun/core/eval/v2/runners_test.py +3 -0
  45. langfun/core/langfunc.py +45 -130
  46. langfun/core/langfunc_test.py +6 -4
  47. langfun/core/language_model.py +151 -31
  48. langfun/core/language_model_test.py +9 -3
  49. langfun/core/llms/__init__.py +12 -1
  50. langfun/core/llms/anthropic.py +157 -2
  51. langfun/core/llms/azure_openai.py +29 -17
  52. langfun/core/llms/cache/base.py +25 -3
  53. langfun/core/llms/cache/in_memory.py +48 -7
  54. langfun/core/llms/cache/in_memory_test.py +14 -4
  55. langfun/core/llms/compositional.py +25 -1
  56. langfun/core/llms/deepseek.py +30 -2
  57. langfun/core/llms/fake.py +39 -1
  58. langfun/core/llms/fake_test.py +9 -0
  59. langfun/core/llms/gemini.py +43 -7
  60. langfun/core/llms/google_genai.py +34 -1
  61. langfun/core/llms/groq.py +28 -3
  62. langfun/core/llms/llama_cpp.py +23 -4
  63. langfun/core/llms/openai.py +93 -3
  64. langfun/core/llms/openai_compatible.py +148 -27
  65. langfun/core/llms/openai_compatible_test.py +207 -20
  66. langfun/core/llms/openai_test.py +0 -2
  67. langfun/core/llms/rest.py +16 -1
  68. langfun/core/llms/vertexai.py +59 -8
  69. langfun/core/logging.py +1 -1
  70. langfun/core/mcp/__init__.py +10 -0
  71. langfun/core/mcp/client.py +177 -0
  72. langfun/core/mcp/client_test.py +71 -0
  73. langfun/core/mcp/session.py +241 -0
  74. langfun/core/mcp/session_test.py +54 -0
  75. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  76. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  77. langfun/core/mcp/tool.py +256 -0
  78. langfun/core/mcp/tool_test.py +197 -0
  79. langfun/core/memory.py +1 -0
  80. langfun/core/message.py +160 -55
  81. langfun/core/message_test.py +65 -81
  82. langfun/core/modalities/__init__.py +8 -0
  83. langfun/core/modalities/audio.py +21 -1
  84. langfun/core/modalities/image.py +19 -1
  85. langfun/core/modalities/mime.py +62 -3
  86. langfun/core/modalities/pdf.py +19 -1
  87. langfun/core/modalities/video.py +21 -1
  88. langfun/core/modality.py +167 -29
  89. langfun/core/modality_test.py +42 -12
  90. langfun/core/natural_language.py +1 -1
  91. langfun/core/sampling.py +4 -4
  92. langfun/core/sampling_test.py +20 -4
  93. langfun/core/structured/completion.py +34 -44
  94. langfun/core/structured/completion_test.py +23 -43
  95. langfun/core/structured/description.py +54 -50
  96. langfun/core/structured/function_generation.py +29 -12
  97. langfun/core/structured/mapping.py +74 -28
  98. langfun/core/structured/parsing.py +90 -74
  99. langfun/core/structured/parsing_test.py +0 -3
  100. langfun/core/structured/querying.py +242 -156
  101. langfun/core/structured/querying_test.py +95 -64
  102. langfun/core/structured/schema.py +70 -10
  103. langfun/core/structured/schema_generation.py +33 -14
  104. langfun/core/structured/scoring.py +45 -34
  105. langfun/core/structured/tokenization.py +24 -9
  106. langfun/core/subscription.py +2 -2
  107. langfun/core/template.py +175 -50
  108. langfun/core/template_test.py +123 -17
  109. langfun/env/__init__.py +43 -0
  110. langfun/env/base_environment.py +827 -0
  111. langfun/env/base_environment_test.py +473 -0
  112. langfun/env/base_feature.py +304 -0
  113. langfun/env/base_feature_test.py +228 -0
  114. langfun/env/base_sandbox.py +842 -0
  115. langfun/env/base_sandbox_test.py +1235 -0
  116. langfun/env/event_handlers/__init__.py +14 -0
  117. langfun/env/event_handlers/chain.py +233 -0
  118. langfun/env/event_handlers/chain_test.py +253 -0
  119. langfun/env/event_handlers/event_logger.py +472 -0
  120. langfun/env/event_handlers/event_logger_test.py +304 -0
  121. langfun/env/event_handlers/metric_writer.py +726 -0
  122. langfun/env/event_handlers/metric_writer_test.py +214 -0
  123. langfun/env/interface.py +1640 -0
  124. langfun/env/interface_test.py +151 -0
  125. langfun/env/load_balancers.py +59 -0
  126. langfun/env/load_balancers_test.py +139 -0
  127. langfun/env/test_utils.py +497 -0
  128. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
  129. langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
  130. langfun-0.1.2.dev202508250805.dist-info/RECORD +0 -172
  131. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
  132. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
  133. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,32 @@ import pyglove as pg
24
24
 
25
25
  @pg.use_init_args(['filename', 'ttl', 'key'])
26
26
  class InMemory(base.LMCacheBase):
27
- """In memory cache."""
27
+ """An in-memory cache for language model lookups.
28
+
29
+ `InMemory` stores LM prompts and their corresponding responses in memory,
30
+ providing a simple and fast caching mechanism for a single session.
31
+ Optionally, it can persist the cache to a JSON file on disk, allowing
32
+ results to be reused across sessions.
33
+
34
+ When a filename is provided, the cache will be loaded from the file upon
35
+ initialization and saved to the file when `save()` is called. This is
36
+ useful for caching results in interactive environments like Colab or
37
+ when running batch jobs.
38
+
39
+ Example:
40
+
41
+ ```python
42
+ import langfun as lf
43
+ # Using in-memory cache without persistence
44
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
45
+ r = lm.query('hello')
46
+
47
+ # Using in-memory cache with persistence
48
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
49
+ r = lm.query('hello')
50
+ lm.cache.save()
51
+ ```
52
+ """
28
53
 
29
54
  filename: Annotated[
30
55
  str | None,
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
144
169
 
145
170
  @contextlib.contextmanager
146
171
  def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
147
- """Context manager to enable cache for LMs under the context.
172
+ """Context manager to enable in-memory cache for LMs in the current context.
173
+
174
+ This context manager sets an `InMemory` cache as the default cache for
175
+ any Langfun language model instantiated within its scope, unless a model
176
+ is explicitly configured with a different cache.
177
+
178
+ If a `filename` is provided, the cache will be loaded from the specified
179
+ file at the beginning of the context and automatically saved back to the
180
+ file upon exiting the context. This is a convenient way to manage
181
+ persistent caching for a block of code.
182
+
183
+ Example:
148
184
 
149
- If LMs under the context manager have explicitly specified cache, they will
150
- use their own cache. Otherwise they will use the cache created by the context
151
- manager.
185
+ ```python
186
+ import langfun as lf
187
+ with lf.lm_cache('my_cache.json'):
188
+ # LMs created here will use 'my_cache.json' for caching.
189
+ lm = lf.llms.GeminiPro()
190
+ print(lm.query('hello'))
191
+ ```
152
192
 
153
193
  Args:
154
- filename: If not None, JSON file to load and save the cache.
194
+ filename: If provided, specifies the JSON file for loading and saving
195
+ the cache.
155
196
 
156
197
  Yields:
157
- A cache object created.
198
+ The `InMemory` cache instance created for this context.
158
199
  """
159
200
  cache = InMemory(filename)
160
201
  try:
@@ -175,18 +175,28 @@ class InMemoryLMCacheTest(unittest.TestCase):
175
175
 
176
176
  cache = in_memory.InMemory()
177
177
  lm = fake.StaticSequence(['1', '2', '3', '4', '5', '6'], cache=cache)
178
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('foo')))
179
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('bar')))
178
+ image_foo = CustomModality('foo')
179
+ image_bar = CustomModality('bar')
180
+ lm(
181
+ lf.UserMessage(
182
+ f'hi <<[[{image_foo.id}]]>>', referred_modalities=[image_foo]
183
+ )
184
+ )
185
+ lm(
186
+ lf.UserMessage(
187
+ f'hi <<[[{image_bar.id}]]>>', referred_modalities=[image_bar]
188
+ )
189
+ )
180
190
  self.assertEqual(
181
191
  list(cache.keys()),
182
192
  [
183
193
  (
184
- 'hi <<[[image]]>><image>acbd18db</image>',
194
+ f'hi <<[[{image_foo.id}]]>>',
185
195
  (None, None, 1, 40, None, None),
186
196
  0,
187
197
  ),
188
198
  (
189
- 'hi <<[[image]]>><image>37b51d19</image>',
199
+ f'hi <<[[{image_bar.id}]]>>',
190
200
  (None, None, 1, 40, None, None),
191
201
  0,
192
202
  ),
@@ -21,7 +21,31 @@ import pyglove as pg
21
21
 
22
22
  @pg.use_init_args(['candidates', 'seed'])
23
23
  class RandomChoice(lf.LanguageModel):
24
- """Random choice of a list of LLM models."""
24
+ """A composite language model that randomly selects from a list of candidates.
25
+
26
+ `RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
27
+ `tokenize`, or `__call__`) to one of the `candidates` selected randomly.
28
+ This can be useful for load balancing across multiple LLM endpoints,
29
+ for A/B testing different models, or for ensembling model outputs
30
+ by calling it multiple times.
31
+
32
+ The selection is determined by the provided `seed`, ensuring reproducibility
33
+ if needed.
34
+
35
+ Example:
36
+
37
+ ```python
38
+ import langfun as lf
39
+
40
+ lm = lf.llms.RandomChoice([
41
+ lf.llms.GeminiPro(),
42
+ lf.llms.GPT4(),
43
+ ])
44
+
45
+ # This call will be handled by either GeminiPro or GPT4, chosen randomly.
46
+ r = lm.sample('hello')
47
+ ```
48
+ """
25
49
 
26
50
  candidates: Annotated[
27
51
  list[lf.LanguageModel],
@@ -93,8 +93,36 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
93
93
  # DeepSeek API uses an API format compatible with OpenAI.
94
94
  # Reference: https://api-docs.deepseek.com/
95
95
  @lf.use_init_args(['model'])
96
- class DeepSeek(openai_compatible.OpenAICompatible):
97
- """DeepSeek model."""
96
+ class DeepSeek(openai_compatible.OpenAIChatCompletionAPI):
97
+ """DeepSeek models.
98
+
99
+ **Quick Start:**
100
+
101
+ ```python
102
+ import langfun as lf
103
+
104
+ # Call DeepSeek-V3 using API key from environment variable
105
+ # 'DEEPSEEK_API_KEY'.
106
+ lm = lf.llms.DeepSeekV3()
107
+ r = lm('Who are you?')
108
+ print(r)
109
+ ```
110
+
111
+ **Setting up API key:**
112
+
113
+ The DeepSeek API key can be specified in following ways:
114
+
115
+ 1. At model instantiation:
116
+
117
+ ```python
118
+ lm = lf.llms.DeepSeekV3(api_key='MY_API_KEY')
119
+ ```
120
+ 2. via environment variable `DEEPSEEK_API_KEY`.
121
+
122
+ **References:**
123
+
124
+ * https://api-docs.deepseek.com/
125
+ """
98
126
 
99
127
  model: pg.typing.Annotated[
100
128
  pg.typing.Enum(
langfun/core/llms/fake.py CHANGED
@@ -20,7 +20,38 @@ import langfun.core as lf
20
20
 
21
21
 
22
22
  class Fake(lf.LanguageModel):
23
- """The base class for all fake language models."""
23
+ """Base class for fake language models, used for testing.
24
+
25
+ Fake models simulate the behavior of real language models but return
26
+ pre-defined responses, making them useful for testing prompts,
27
+ data processing logic, and agent behavior without incurring API costs
28
+ or relying on external services.
29
+
30
+ Langfun provides several fake models:
31
+ * `lf.llms.Echo`: Echoes the prompt back as the response.
32
+ * `lf.llms.StaticResponse`: Returns a fixed, pre-defined response for
33
+ any prompt.
34
+ * `lf.llms.StaticMapping`: Returns responses based on a prompt-to-response
35
+ dictionary.
36
+ * `lf.llms.StaticSequence`: Returns responses from a pre-defined sequence
37
+ in order.
38
+
39
+ **Example:**
40
+
41
+ ```python
42
+ import langfun as lf
43
+
44
+ # Use Echo model for testing
45
+ lm = lf.llms.Echo()
46
+ response = lm('hello')
47
+ assert response.text == 'hello'
48
+
49
+ # Use StaticResponse model
50
+ lm = lf.llms.StaticResponse('world')
51
+ response = lm('hello')
52
+ assert response.text == 'world'
53
+ ```
54
+ """
24
55
 
25
56
  def _score(self, prompt: lf.Message| list[lf.Message],
26
57
  completions: list[lf.Message]):
@@ -62,6 +93,13 @@ class Echo(Fake):
62
93
  return lf.AIMessage(prompt.text)
63
94
 
64
95
 
96
+ class Pseudo(Fake):
97
+ """A pseudo language model that should never be called."""
98
+
99
+ def _response_from(self, prompt: lf.Message) -> lf.Message:
100
+ raise ValueError('Pseudo language model should never be called.')
101
+
102
+
65
103
  @lf.use_init_args(['response'])
66
104
  class StaticResponse(Fake):
67
105
  """Language model that always gives the same canned response."""
@@ -20,6 +20,15 @@ import langfun.core as lf
20
20
  from langfun.core.llms import fake as fakelm
21
21
 
22
22
 
23
+ class PseudoTest(unittest.TestCase):
24
+
25
+ def test_sample(self):
26
+ lm = fakelm.Pseudo()
27
+ self.assertEqual(lm.model_id, 'Pseudo')
28
+ with self.assertRaises(ValueError):
29
+ _ = lm.sample(['hi'])
30
+
31
+
23
32
  class EchoTest(unittest.TestCase):
24
33
 
25
34
  def test_sample(self):
@@ -195,7 +195,7 @@ SUPPORTED_MODELS = [
195
195
  rate_limits=lf.ModelInfo.RateLimits(
196
196
  max_requests_per_minute=2000,
197
197
  max_tokens_per_minute=4_000_000,
198
- )
198
+ ),
199
199
  ),
200
200
  # Gemini 2.5 Pro 0605
201
201
  GeminiModelInfo(
@@ -218,7 +218,7 @@ SUPPORTED_MODELS = [
218
218
  rate_limits=lf.ModelInfo.RateLimits(
219
219
  max_requests_per_minute=2000,
220
220
  max_tokens_per_minute=4_000_000,
221
- )
221
+ ),
222
222
  ),
223
223
  # Gemini 2.5 Flash Preview 0520
224
224
  GeminiModelInfo(
@@ -264,7 +264,7 @@ SUPPORTED_MODELS = [
264
264
  rate_limits=lf.ModelInfo.RateLimits(
265
265
  max_requests_per_minute=2000,
266
266
  max_tokens_per_minute=4_000_000,
267
- )
267
+ ),
268
268
  ),
269
269
  # Gemini 2.5 Flash Preview
270
270
  GeminiModelInfo(
@@ -614,6 +614,21 @@ SUPPORTED_MODELS = [
614
614
  #
615
615
  # Experimental models.
616
616
  #
617
+ GeminiModelInfo(
618
+ model_id='gemini-2.5-flash-image-preview',
619
+ in_service=True,
620
+ experimental=True,
621
+ provider=pg.oneof(['Google GenAI', 'VertexAI']),
622
+ model_type='instruction-tuned',
623
+ description='Gemini 2.5 Flash Image Preview model.',
624
+ release_date=datetime.datetime(2025, 8, 17),
625
+ input_modalities=GeminiModelInfo.INPUT_IMAGE_TYPES
626
+ + GeminiModelInfo.INPUT_DOC_TYPES,
627
+ context_length=lf.ModelInfo.ContextLength(
628
+ max_input_tokens=32_768,
629
+ max_output_tokens=32_768,
630
+ ),
631
+ ),
617
632
  GeminiModelInfo(
618
633
  model_id='gemini-2.0-pro-exp-02-05',
619
634
  in_service=True,
@@ -681,7 +696,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
681
696
 
682
697
  @pg.use_init_args(['model'])
683
698
  class Gemini(rest.REST):
684
- """Language models provided by Google GenAI."""
699
+ """Base class for Gemini models served on Google GenAI and Vertex AI.
700
+
701
+ This class implements the Gemini API protocol, shared by
702
+ `lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
703
+ formatting and response parsing for Gemini models.
704
+
705
+ It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
706
+ `lf.llms.VertexAI` instead.
707
+ """
685
708
 
686
709
  model: pg.typing.Annotated[
687
710
  pg.typing.Enum(
@@ -690,6 +713,12 @@ class Gemini(rest.REST):
690
713
  'The name of the model to use.',
691
714
  ]
692
715
 
716
+ response_modalities: pg.typing.Annotated[
717
+ list[str] | None,
718
+ 'Response modalities. It is needed for models whose response modalities '
719
+ + 'are more than plain text.',
720
+ ] = None
721
+
693
722
  @functools.cached_property
694
723
  def model_info(self) -> GeminiModelInfo:
695
724
  return _SUPPORTED_MODELS_BY_ID[self.model]
@@ -731,6 +760,8 @@ class Gemini(rest.REST):
731
760
  prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
732
761
  )
733
762
  request['contents'] = contents
763
+ if sampling_options.extras:
764
+ request.update(sampling_options.extras)
734
765
  return request
735
766
 
736
767
  def _generation_config(
@@ -768,6 +799,11 @@ class Gemini(rest.REST):
768
799
  'thinkingBudget': options.max_thinking_tokens,
769
800
  }
770
801
 
802
+ if self.response_modalities:
803
+ config['responseModalities'] = self.response_modalities
804
+ if 'IMAGE' in self.response_modalities:
805
+ config.pop('responseLogprobs', None)
806
+ config.pop('logprobs', None)
771
807
  return config
772
808
 
773
809
  def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -802,9 +838,9 @@ class Gemini(rest.REST):
802
838
  )
803
839
 
804
840
  def _error(self, status_code: int, content: str) -> lf.LMError:
805
- if (
806
- status_code == 400
807
- and b'exceeds the maximum number of tokens' in content
841
+ if status_code == 400 and (
842
+ b'exceeds the maximum number of tokens' in content
843
+ or b'Reduce the input token count and try again.' in content
808
844
  ):
809
845
  return lf.ContextLimitError(f'{status_code}: {content}')
810
846
  return super()._error(status_code, content)
@@ -25,7 +25,35 @@ import pyglove as pg
25
25
  @lf.use_init_args(['model'])
26
26
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
27
27
  class GenAI(gemini.Gemini):
28
- """Language models provided by Google GenAI."""
28
+ """Google GenAI models.
29
+
30
+ **Quick Start:**
31
+
32
+ ```python
33
+ import langfun as lf
34
+
35
+ # Call Gemini 1.5 Flash using API key from environment variable
36
+ # 'GOOGLE_API_KEY'.
37
+ lm = lf.llms.Gemini15Flash()
38
+ r = lm('Who are you?')
39
+ print(r)
40
+ ```
41
+
42
+ **Setting up API key:**
43
+
44
+ The Google API key can be specified in following ways:
45
+
46
+ 1. At model instantiation:
47
+
48
+ ```python
49
+ lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
50
+ ```
51
+ 2. via environment variable `GOOGLE_API_KEY`.
52
+
53
+ **References:**
54
+
55
+ * https://ai.google.dev/docs
56
+ """
29
57
 
30
58
  model: pg.typing.Annotated[
31
59
  pg.typing.Enum(
@@ -92,6 +120,11 @@ class GenAI(gemini.Gemini):
92
120
  #
93
121
 
94
122
 
123
+ class Gemini25FlashImagePreview(GenAI):
124
+ """Gemini 2.5 Flash Image Preview model."""
125
+ model = 'gemini-2.5-flash-image-preview'
126
+
127
+
95
128
  class Gemini25Pro(GenAI):
96
129
  """Gemini 2.5 Pro GA model."""
97
130
 
langfun/core/llms/groq.py CHANGED
@@ -259,10 +259,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
259
259
 
260
260
 
261
261
  @lf.use_init_args(['model'])
262
- class Groq(openai_compatible.OpenAICompatible):
263
- """Groq LLMs through REST APIs (OpenAI compatible).
262
+ class Groq(openai_compatible.OpenAIChatCompletionAPI):
263
+ """Groq models.
264
264
 
265
- See https://platform.openai.com/docs/api-reference/chat
265
+ **Quick Start:**
266
+
267
+ ```python
268
+ import langfun as lf
269
+
270
+ # Call Llama 3.3 70B on Groq using API key from environment variable
271
+ # 'GROQ_API_KEY'.
272
+ lm = lf.llms.GroqLlama33_70B_Versatile()
273
+ r = lm('Who are you?')
274
+ print(r)
275
+ ```
276
+
277
+ **Setting up API key:**
278
+
279
+ The Groq API key can be specified in following ways:
280
+
281
+ 1. At model instantiation:
282
+
283
+ ```python
284
+ lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
285
+ ```
286
+ 2. via environment variable `GROQ_API_KEY`.
287
+
288
+ **References:**
289
+
290
+ * https://console.groq.com/docs
266
291
  """
267
292
 
268
293
  model: pg.typing.Annotated[
@@ -20,11 +20,30 @@ import pyglove as pg
20
20
 
21
21
  @pg.use_init_args(['url', 'model'])
22
22
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
23
- class LlamaCppRemote(openai_compatible.OpenAICompatible):
24
- """The remote LLaMA C++ model.
23
+ class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
24
+ """LLaMA C++ models served via a remote server.
25
25
 
26
- The Remote LLaMA C++ models can be launched via
27
- https://github.com/ggerganov/llama.cpp/tree/master/examples/server
26
+ This class provides an interface to interact with language models
27
+ hosted on a LLaMA C++ server, which is compatible with the OpenAI
28
+ Chat Completions API format.
29
+
30
+ **Quick Start:**
31
+
32
+ Assuming a LLaMA C++ server is running at `http://localhost:8080`,
33
+ you can interact with it as follows:
34
+
35
+ ```python
36
+ import langfun as lf
37
+
38
+ # If model name is not specified, it will use server's default.
39
+ lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
40
+ r = lm('Who are you?')
41
+ print(r)
42
+ ```
43
+
44
+ **References:**
45
+
46
+ * https://github.com/ggerganov/llama.cpp/tree/master/examples/server
28
47
  """
29
48
  url: Annotated[
30
49
  str,
@@ -49,6 +49,53 @@ class OpenAIModelInfo(lf.ModelInfo):
49
49
  #
50
50
 
51
51
  SUPPORTED_MODELS = [
52
+ # GPT-5 models
53
+ OpenAIModelInfo(
54
+ model_id='gpt-5',
55
+ alias_for='gpt-5-2025-08-07',
56
+ in_service=True,
57
+ model_type='instruction-tuned',
58
+ description='GPT 5 model (latest stable).',
59
+ url='https://platform.openai.com/docs/models/gpt-5',
60
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
61
+ context_length=lf.ModelInfo.ContextLength(
62
+ max_input_tokens=400_000,
63
+ max_output_tokens=128_000,
64
+ ),
65
+ pricing=lf.ModelInfo.Pricing(
66
+ cost_per_1m_cached_input_tokens=0.125,
67
+ cost_per_1m_input_tokens=1.25,
68
+ cost_per_1m_output_tokens=10.0,
69
+ ),
70
+ # Tier 5 rate limits.
71
+ rate_limits=lf.ModelInfo.RateLimits(
72
+ max_requests_per_minute=15_000,
73
+ max_tokens_per_minute=40_000_000,
74
+ ),
75
+ ),
76
+ OpenAIModelInfo(
77
+ model_id='gpt-5-mini',
78
+ alias_for='gpt-5-mini-2025-08-07',
79
+ in_service=True,
80
+ model_type='instruction-tuned',
81
+ description='GPT 5 mini model (latest stable).',
82
+ url='https://platform.openai.com/docs/models/gpt-5-mini',
83
+ input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
84
+ context_length=lf.ModelInfo.ContextLength(
85
+ max_input_tokens=400_000,
86
+ max_output_tokens=128_000,
87
+ ),
88
+ pricing=lf.ModelInfo.Pricing(
89
+ cost_per_1m_cached_input_tokens=0.025,
90
+ cost_per_1m_input_tokens=0.25,
91
+ cost_per_1m_output_tokens=2.0,
92
+ ),
93
+ # Tier 5 rate limits.
94
+ rate_limits=lf.ModelInfo.RateLimits(
95
+ max_requests_per_minute=180_000_000,
96
+ max_tokens_per_minute=30_000_000,
97
+ ),
98
+ ),
52
99
  # GPT-4.1 models
53
100
  OpenAIModelInfo(
54
101
  model_id='gpt-4.1',
@@ -984,8 +1031,36 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
984
1031
 
985
1032
 
986
1033
  @lf.use_init_args(['model'])
987
- class OpenAI(openai_compatible.OpenAICompatible):
988
- """OpenAI model."""
1034
+ class OpenAI(openai_compatible.OpenAIResponsesAPI):
1035
+ """OpenAI models.
1036
+
1037
+ **Quick Start:**
1038
+
1039
+ ```python
1040
+ import langfun as lf
1041
+
1042
+ # Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
1043
+ lm = lf.llms.Gpt4o()
1044
+ r = lm('Who are you?')
1045
+ print(r)
1046
+ ```
1047
+
1048
+ **Setting up API key:**
1049
+
1050
+ The OpenAI API key can be specified in following ways:
1051
+
1052
+ 1. At model instantiation:
1053
+
1054
+ ```python
1055
+ lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
1056
+ ```
1057
+ 2. via environment variable `OPENAI_API_KEY`.
1058
+
1059
+ **References:**
1060
+
1061
+ * https://platform.openai.com/docs/models
1062
+ * https://platform.openai.com/docs/api-reference
1063
+ """
989
1064
 
990
1065
  model: pg.typing.Annotated[
991
1066
  pg.typing.Enum(
@@ -994,7 +1069,12 @@ class OpenAI(openai_compatible.OpenAICompatible):
994
1069
  'The name of the model to use.',
995
1070
  ]
996
1071
 
997
- api_endpoint: str = 'https://api.openai.com/v1/chat/completions'
1072
+ # Disable message storage by default.
1073
+ sampling_options = lf.LMSamplingOptions(
1074
+ extras={'store': False}
1075
+ )
1076
+
1077
+ api_endpoint: str = 'https://api.openai.com/v1/responses'
998
1078
 
999
1079
  api_key: Annotated[
1000
1080
  str | None,
@@ -1069,6 +1149,16 @@ class OpenAI(openai_compatible.OpenAICompatible):
1069
1149
  return super()._request_args(options)
1070
1150
 
1071
1151
 
1152
+ class Gpt5(OpenAI):
1153
+ """GPT-5."""
1154
+ model = 'gpt-5'
1155
+
1156
+
1157
+ class Gpt5Mini(OpenAI):
1158
+ """GPT-5 mini."""
1159
+ model = 'gpt-5-mini'
1160
+
1161
+
1072
1162
  class Gpt41(OpenAI):
1073
1163
  """GPT-4.1."""
1074
1164
  model = 'gpt-4.1'