PyPI - langfun - Versions diffs - 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl - Mend

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

langfun/__init__.py +1 -1
langfun/core/__init__.py +7 -1
langfun/core/agentic/__init__.py +8 -1
langfun/core/agentic/action.py +740 -112
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +189 -24
langfun/core/async_support.py +104 -5
langfun/core/async_support_test.py +23 -0
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +9 -2
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +11 -2
langfun/core/data/conversion/gemini_test.py +48 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +48 -44
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +3 -0
langfun/core/eval/v2/checkpointing.py +148 -46
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/config_saver.py +37 -0
langfun/core/eval/v2/config_saver_test.py +36 -0
langfun/core/eval/v2/eval_test_helper.py +104 -3
langfun/core/eval/v2/evaluation.py +102 -19
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +95 -20
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +31 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +13 -5
langfun/core/eval/v2/progress_tracking_test.py +9 -1
langfun/core/eval/v2/reporting.py +88 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
langfun/core/eval/v2/runners/beam.py +354 -0
langfun/core/eval/v2/runners/beam_test.py +153 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +243 -0
langfun/core/eval/v2/runners/parallel_test.py +182 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +169 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +189 -36
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +14 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +90 -12
langfun/core/llms/gemini_test.py +110 -0
langfun/core/llms/google_genai.py +52 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +120 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +16 -1
langfun/core/llms/vertexai.py +78 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/__init__.py +10 -0
langfun/core/mcp/client.py +177 -0
langfun/core/mcp/client_test.py +71 -0
langfun/core/mcp/session.py +241 -0
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/testing/simple_mcp_client.py +33 -0
langfun/core/mcp/testing/simple_mcp_server.py +33 -0
langfun/core/mcp/tool.py +254 -0
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +73 -3
langfun/core/modalities/image_test.py +116 -0
langfun/core/modalities/mime.py +78 -4
langfun/core/modalities/mime_test.py +59 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +230 -154
langfun/core/structured/querying_test.py +69 -33
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +175 -50
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +43 -0
langfun/env/base_environment.py +827 -0
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +304 -0
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +842 -0
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +14 -0
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +472 -0
langfun/env/event_handlers/event_logger_test.py +304 -0
langfun/env/event_handlers/metric_writer.py +726 -0
langfun/env/event_handlers/metric_writer_test.py +214 -0
langfun/env/interface.py +1640 -0
langfun/env/interface_test.py +153 -0
langfun/env/load_balancers.py +59 -0
langfun/env/load_balancers_test.py +141 -0
langfun/env/test_utils.py +507 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0

langfun/core/llms/gemini.py CHANGED Viewed

@@ -151,6 +151,55 @@ SUPPORTED_MODELS = [
     #
     # Production models.
     #
+    # Gemini 3 Pro Preview
+    GeminiModelInfo(
+        model_id='gemini-3-pro-preview',
+        in_service=True,
+        provider=pg.oneof(['Google GenAI', 'VertexAI']),
+        model_type='instruction-tuned',
+        description='Gemini 3 Pro Preview.',
+        release_date=datetime.datetime(2025, 11, 18),
+        input_modalities=GeminiModelInfo.ALL_SUPPORTED_INPUT_TYPES,
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=1_048_576,
+            max_output_tokens=65_536,
+        ),
+        pricing=GeminiModelInfo.Pricing(
+            cost_per_1m_cached_input_tokens=0.2,
+            cost_per_1m_input_tokens=2.0,
+            cost_per_1m_output_tokens=12.0,
+            cost_per_1m_cached_input_tokens_with_prompt_longer_than_128k=0.4,
+            cost_per_1m_input_tokens_with_prompt_longer_than_128k=4.0,
+            cost_per_1m_output_tokens_with_prompt_longer_than_128k=18.0,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            max_requests_per_minute=2000,
+            max_tokens_per_minute=4_000_000,
+        ),
+    ),
+    # Gemini 3 Pro Image Preview
+    GeminiModelInfo(
+        model_id='gemini-3-pro-image-preview',
+        in_service=True,
+        experimental=True,
+        provider=pg.oneof(['Google GenAI', 'VertexAI']),
+        model_type='instruction-tuned',
+        description=(
+            'Gemini 3 Pro Image Preview for high-fidelity image generation,'
+            ' editing, and visual reasoning.'
+        ),
+        release_date=datetime.datetime(2025, 12, 9),
+        input_modalities=GeminiModelInfo.INPUT_IMAGE_TYPES
+        + GeminiModelInfo.INPUT_DOC_TYPES,
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=65_536,
+            max_output_tokens=32_768,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            max_requests_per_minute=200,
+            max_tokens_per_minute=1_000_000,
+        ),
+    ),
     # Gemini 2.5 Flash
     GeminiModelInfo(
         model_id='gemini-2.5-flash',
@@ -696,7 +745,15 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
 @pg.use_init_args(['model'])
 class Gemini(rest.REST):
-  """Language models provided by Google GenAI."""
+  """Base class for Gemini models served on Google GenAI and Vertex AI.
+  This class implements the Gemini API protocol, shared by
+  `lf.llms.GoogleGenAI` and `lf.llms.VertexAI`, providing common request
+  formatting and response parsing for Gemini models.
+  It is not intended to be used directly. Please use `lf.llms.GoogleGenAI` or
+  `lf.llms.VertexAI` instead.
+  """
   model: pg.typing.Annotated[
       pg.typing.Enum(
@@ -752,6 +809,13 @@ class Gemini(rest.REST):
         prompt.as_format('gemini', chunk_preprocessor=modality_conversion)
     )
     request['contents'] = contents
+    request['toolConfig'] = {
+        'functionCallingConfig': {
+            'mode': 'NONE',
+        }
+    }
+    if sampling_options.extras:
+      request.update(sampling_options.extras)
     return request
   def _generation_config(
@@ -783,11 +847,21 @@ class Gemini(rest.REST):
           + '\n\n [RESPONSE FORMAT (not part of prompt)]\n'
           + pg.to_json_str(json_schema, json_indent=2)
       )
+    thinking_config_data = {}
     if options.max_thinking_tokens is not None:
-      config['thinkingConfig'] = {
-          'includeThoughts': options.max_thinking_tokens > 0,
-          'thinkingBudget': options.max_thinking_tokens,
-      }
+      thinking_config_data['includeThoughts'] = options.max_thinking_tokens > 0
+      thinking_config_data['thinkingBudget'] = options.max_thinking_tokens
+    if options.thinking_level is not None:
+      thinking_config_data['thinkingLevel'] = options.thinking_level
+    if thinking_config_data:
+      config['thinkingConfig'] = thinking_config_data
+    # This is the new feature since Gemini 3.
+    # Skip for image generation models as they don't support mediaResolution.
+    if self.model_id.startswith('gemini-3') and not (
+        self.response_modalities and 'IMAGE' in self.response_modalities
+    ):
+      config['mediaResolution'] = 'MEDIA_RESOLUTION_HIGH'
     if self.response_modalities:
       config['responseModalities'] = self.response_modalities
@@ -803,10 +877,14 @@ class Gemini(rest.REST):
           'No candidates found in response. This is a Gemini API issue that '
           'happens occasionally, and retrying should fix it. '
       )
-    messages = [
-        lf.Message.from_value(candidate['content'], format='gemini')
-        for candidate in candidates
-    ]
+    messages = []
+    for candidate in candidates:
+      message = lf.Message.from_value(candidate['content'], format='gemini')
+      if finish_reason := candidate.get('finishReason'):
+        message.metadata['finish_reason'] = finish_reason
+      messages.append(message)
     usage = json['usageMetadata']
     input_tokens = usage['promptTokenCount']
     # NOTE(daiyip): We saw cases that `candidatesTokenCount` is not present.
@@ -828,9 +906,9 @@ class Gemini(rest.REST):
     )
   def _error(self, status_code: int, content: str) -> lf.LMError:
-    if (
-        status_code == 400
-        and b'exceeds the maximum number of tokens' in content
+    if status_code == 400 and (
+        b'exceeds the maximum number of tokens' in content
+        or b'Reduce the input token count and try again.' in content
     ):
       return lf.ContextLimitError(f'{status_code}: {content}')
     return super()._error(status_code, content)

langfun/core/llms/gemini_test.py CHANGED Viewed

@@ -177,6 +177,58 @@ class GeminiTest(unittest.TestCase):
         ),
     )
+    # Add test for thinkingConfig with thinking_level.
+    actual = model._generation_config(
+        lf.UserMessage('hi'),
+        lf.LMSamplingOptions(
+            thinking_level='high',
+        ),
+    )
+    self.assertEqual(
+        actual,
+        dict(
+            candidateCount=1,
+            temperature=None,
+            topP=None,
+            topK=40,
+            maxOutputTokens=None,
+            stopSequences=None,
+            responseLogprobs=False,
+            logprobs=None,
+            seed=None,
+            thinkingConfig={'thinkingLevel': 'high'},
+        ),
+    )
+    # Add test for thinkingConfig with both max_thinking_tokens and
+    # thinking_level.
+    actual = model._generation_config(
+        lf.UserMessage('hi'),
+        lf.LMSamplingOptions(
+            max_thinking_tokens=100,
+            thinking_level='low',
+        ),
+    )
+    self.assertEqual(
+        actual,
+        dict(
+            candidateCount=1,
+            temperature=None,
+            topP=None,
+            topK=40,
+            maxOutputTokens=None,
+            stopSequences=None,
+            responseLogprobs=False,
+            logprobs=None,
+            seed=None,
+            thinkingConfig={
+                'includeThoughts': True,
+                'thinkingBudget': 100,
+                'thinkingLevel': 'low',
+            },
+        ),
+    )
     with self.assertRaisesRegex(
         ValueError, '`json_schema` must be a dict, got'
     ):
@@ -185,6 +237,32 @@ class GeminiTest(unittest.TestCase):
           lf.LMSamplingOptions(),
       )
+  def test_media_resolution_for_gemini3(self):
+    model = gemini.Gemini('gemini-3-pro-preview', api_endpoint='')
+    config = model._generation_config(
+        lf.UserMessage('hi'),
+        lf.LMSamplingOptions(),
+    )
+    self.assertEqual(config.get('mediaResolution'), 'MEDIA_RESOLUTION_HIGH')
+    model = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
+    config = model._generation_config(
+        lf.UserMessage('hi'),
+        lf.LMSamplingOptions(),
+    )
+    self.assertIsNone(config.get('mediaResolution'))
+  def test_request_tool_config(self):
+    model = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
+    request = model.request(
+        lf.UserMessage('hi'),
+        lf.LMSamplingOptions(),
+    )
+    self.assertEqual(
+        request.get('toolConfig'),
+        {'functionCallingConfig': {'mode': 'NONE'}},
+    )
   def test_call_model(self):
     with mock.patch('requests.Session.post') as mock_generate:
       mock_generate.side_effect = mock_requests_post
@@ -225,6 +303,38 @@ class GeminiTest(unittest.TestCase):
       ):
         lm('hello')
+  def test_call_model_with_max_tokens_error(self):
+    def mock_requests_post_error(*args, **kwargs):
+      del args, kwargs
+      response = requests.Response()
+      response.status_code = 200
+      response._content = pg.to_json_str({
+          'candidates': [
+              {
+                  'finishReason': 'MAX_TOKENS',
+                  'content': {
+                      'parts': [
+                          {
+                              'text': 'This is'
+                          }
+                      ]
+                  }
+              },
+          ],
+          'usageMetadata': {
+              'promptTokenCount': 3,
+              'candidatesTokenCount': 4,
+          }
+      }).encode()
+      return response
+    with mock.patch('requests.Session.post') as mock_generate:
+      mock_generate.side_effect = mock_requests_post_error
+      lm = gemini.Gemini('gemini-1.5-pro', api_endpoint='')
+      m = lm('hello')
+      self.assertEqual(m.metadata.finish_reason, 'MAX_TOKENS')
+      self.assertEqual(m.text, 'This is')
   def test_call_model_with_system_message(self):
     with mock.patch('requests.Session.post') as mock_generate:
       mock_generate.side_effect = mock_requests_post

langfun/core/llms/google_genai.py CHANGED Viewed

@@ -25,7 +25,35 @@ import pyglove as pg
 @lf.use_init_args(['model'])
 @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
 class GenAI(gemini.Gemini):
-  """Language models provided by Google GenAI."""
+  """Google GenAI models.
+  **Quick Start:**
+  ```python
+  import langfun as lf
+  # Call Gemini 1.5 Flash using API key from environment variable
+  # 'GOOGLE_API_KEY'.
+  lm = lf.llms.Gemini15Flash()
+  r = lm('Who are you?')
+  print(r)
+  ```
+  **Setting up API key:**
+  The Google API key can be specified in following ways:
+  1. At model instantiation:
+     ```python
+     lm = lf.llms.Gemini15Flash(api_key='MY_API_KEY')
+     ```
+  2. via environment variable `GOOGLE_API_KEY`.
+  **References:**
+  *   https://ai.google.dev/docs
+  """
   model: pg.typing.Annotated[
       pg.typing.Enum(
@@ -87,9 +115,32 @@ class GenAI(gemini.Gemini):
 # pylint: disable=invalid-name
 #
 # Experimental models.
 #
+class Gemini3ProPreview(GenAI):
+  """Gemini 3 Pro Preview model."""
+  model = 'gemini-3-pro-preview'
+class Gemini3ProImagePreview(GenAI):
+  """Gemini 3 Pro Image Preview model for high-fidelity image generation.
+  This model supports:
+  - Text-to-image generation
+  - Image editing (multimodal input)
+  - Visual reasoning
+  Key Requirements:
+  - responseModalities must include 'IMAGE'
+  - Supported aspect ratios: 1:1, 16:9, 9:16, 4:3, 3:4
+  - Image sizes: 1K (default), 2K, 4K
+  """
+  model = 'gemini-3-pro-image-preview'
+  response_modalities = ['TEXT', 'IMAGE']
 class Gemini25FlashImagePreview(GenAI):

langfun/core/llms/groq.py CHANGED Viewed

@@ -259,10 +259,35 @@ _SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
 @lf.use_init_args(['model'])
-class Groq(openai_compatible.OpenAICompatible):
-  """Groq LLMs through REST APIs (OpenAI compatible).
+class Groq(openai_compatible.OpenAIChatCompletionAPI):
+  """Groq models.
-  See https://platform.openai.com/docs/api-reference/chat
+  **Quick Start:**
+  ```python
+  import langfun as lf
+  # Call Llama 3.3 70B on Groq using API key from environment variable
+  # 'GROQ_API_KEY'.
+  lm = lf.llms.GroqLlama33_70B_Versatile()
+  r = lm('Who are you?')
+  print(r)
+  ```
+  **Setting up API key:**
+  The Groq API key can be specified in following ways:
+  1. At model instantiation:
+     ```python
+     lm = lf.llms.GroqLlama33_70B_Versatile(api_key='MY_API_KEY')
+     ```
+  2. via environment variable `GROQ_API_KEY`.
+  **References:**
+  *   https://console.groq.com/docs
   """
   model: pg.typing.Annotated[

langfun/core/llms/llama_cpp.py CHANGED Viewed

@@ -20,11 +20,30 @@ import pyglove as pg
 @pg.use_init_args(['url', 'model'])
 @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
-class LlamaCppRemote(openai_compatible.OpenAICompatible):
-  """The remote LLaMA C++ model.
+class LlamaCppRemote(openai_compatible.OpenAIChatCompletionAPI):
+  """LLaMA C++ models served via a remote server.
-  The Remote LLaMA C++ models can be launched via
-  https://github.com/ggerganov/llama.cpp/tree/master/examples/server
+  This class provides an interface to interact with language models
+  hosted on a LLaMA C++ server, which is compatible with the OpenAI
+  Chat Completions API format.
+  **Quick Start:**
+  Assuming a LLaMA C++ server is running at `http://localhost:8080`,
+  you can interact with it as follows:
+  ```python
+  import langfun as lf
+  # If model name is not specified, it will use server's default.
+  lm = lf.llms.LlamaCppRemote(url='http://localhost:8080')
+  r = lm('Who are you?')
+  print(r)
+  ```
+  **References:**
+  *   https://github.com/ggerganov/llama.cpp/tree/master/examples/server
   """
   url: Annotated[
       str,

langfun/core/llms/openai.py CHANGED Viewed

@@ -49,6 +49,75 @@ class OpenAIModelInfo(lf.ModelInfo):
 #
 SUPPORTED_MODELS = [
+    # GPT-5 models
+    OpenAIModelInfo(
+        model_id='gpt-5.1',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='GPT 5.1 model (latest stable).',
+        url='https://platform.openai.com/docs/models/gpt-5.1',
+        input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=400_000,
+            max_output_tokens=128_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_cached_input_tokens=0.13,
+            cost_per_1m_input_tokens=1.25,
+            cost_per_1m_output_tokens=10.0,
+        ),
+        # Tier 5 rate limits.
+        rate_limits=lf.ModelInfo.RateLimits(
+            max_requests_per_minute=15_000,
+            max_tokens_per_minute=40_000_000,
+        ),
+    ),
+    OpenAIModelInfo(
+        model_id='gpt-5',
+        alias_for='gpt-5-2025-08-07',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='GPT 5 model (latest stable).',
+        url='https://platform.openai.com/docs/models/gpt-5',
+        input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=400_000,
+            max_output_tokens=128_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_cached_input_tokens=0.125,
+            cost_per_1m_input_tokens=1.25,
+            cost_per_1m_output_tokens=10.0,
+        ),
+        # Tier 5 rate limits.
+        rate_limits=lf.ModelInfo.RateLimits(
+            max_requests_per_minute=15_000,
+            max_tokens_per_minute=40_000_000,
+        ),
+    ),
+    OpenAIModelInfo(
+        model_id='gpt-5-mini',
+        alias_for='gpt-5-mini-2025-08-07',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='GPT 5 mini model (latest stable).',
+        url='https://platform.openai.com/docs/models/gpt-5-mini',
+        input_modalities=OpenAIModelInfo.INPUT_IMAGE_TYPES,
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=400_000,
+            max_output_tokens=128_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_cached_input_tokens=0.025,
+            cost_per_1m_input_tokens=0.25,
+            cost_per_1m_output_tokens=2.0,
+        ),
+        # Tier 5 rate limits.
+        rate_limits=lf.ModelInfo.RateLimits(
+            max_requests_per_minute=180_000_000,
+            max_tokens_per_minute=30_000_000,
+        ),
+    ),
     # GPT-4.1 models
     OpenAIModelInfo(
         model_id='gpt-4.1',
@@ -984,8 +1053,36 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
 @lf.use_init_args(['model'])
-class OpenAI(openai_compatible.OpenAICompatible):
-  """OpenAI model."""
+class OpenAI(openai_compatible.OpenAIResponsesAPI):
+  """OpenAI models.
+  **Quick Start:**
+  ```python
+  import langfun as lf
+  # Call GPT-4o using API key from environment variable 'OPENAI_API_KEY'.
+  lm = lf.llms.Gpt4o()
+  r = lm('Who are you?')
+  print(r)
+  ```
+  **Setting up API key:**
+  The OpenAI API key can be specified in following ways:
+  1. At model instantiation:
+     ```python
+     lm = lf.llms.Gpt4o(api_key='MY_API_KEY')
+     ```
+  2. via environment variable `OPENAI_API_KEY`.
+  **References:**
+  *   https://platform.openai.com/docs/models
+  *   https://platform.openai.com/docs/api-reference
+  """
   model: pg.typing.Annotated[
       pg.typing.Enum(
@@ -994,7 +1091,12 @@ class OpenAI(openai_compatible.OpenAICompatible):
       'The name of the model to use.',
   ]
-  api_endpoint: str = 'https://api.openai.com/v1/chat/completions'
+  # Disable message storage by default.
+  sampling_options = lf.LMSamplingOptions(
+      extras={'store': False}
+  )
+  api_endpoint: str = 'https://api.openai.com/v1/responses'
   api_key: Annotated[
       str | None,
@@ -1069,6 +1171,21 @@ class OpenAI(openai_compatible.OpenAICompatible):
     return super()._request_args(options)
+class Gpt51(OpenAI):
+  """GPT-5.1."""
+  model = 'gpt-5.1'
+class Gpt5(OpenAI):
+  """GPT-5."""
+  model = 'gpt-5'
+class Gpt5Mini(OpenAI):
+  """GPT-5 mini."""
+  model = 'gpt-5-mini'
 class Gpt41(OpenAI):
   """GPT-4.1."""
   model = 'gpt-4.1'

langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl