PyPI - langfun - Versions diffs - 0.1.2.dev202511160804__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl - Mend

langfun 0.1.2.dev202511160804py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (41) hide show

langfun/core/__init__.py +1 -0
langfun/core/agentic/__init__.py +4 -1
langfun/core/agentic/action.py +340 -17
langfun/core/agentic/action_test.py +124 -21
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/v2/checkpointing.py +25 -1
langfun/core/eval/v2/checkpointing_test.py +8 -1
langfun/core/eval/v2/eval_test_helper.py +7 -2
langfun/core/eval/v2/evaluation.py +4 -1
langfun/core/eval/v2/example.py +5 -1
langfun/core/eval/v2/example_test.py +13 -5
langfun/core/eval/v2/experiment.py +23 -0
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/progress_tracking.py +12 -3
langfun/core/eval/v2/progress_tracking_test.py +3 -1
langfun/core/eval/v2/reporting_test.py +4 -0
langfun/core/eval/v2/runners/__init__.py +4 -0
langfun/core/eval/v2/runners/base.py +40 -21
langfun/core/eval/v2/runners/beam.py +341 -0
langfun/core/eval/v2/runners/beam_test.py +131 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
langfun/core/eval/v2/runners/debug_test.py +1 -4
langfun/core/eval/v2/runners/parallel_test.py +1 -4
langfun/core/eval/v2/runners/sequential_test.py +1 -4
langfun/core/langfunc_test.py +3 -3
langfun/core/language_model.py +38 -5
langfun/core/language_model_test.py +45 -0
langfun/core/llms/__init__.py +2 -0
langfun/core/llms/gemini.py +41 -8
langfun/core/llms/gemini_test.py +84 -0
langfun/core/llms/google_genai.py +5 -0
langfun/core/llms/vertexai.py +7 -0
langfun/core/modalities/mime.py +2 -0
langfun/core/modalities/mime_test.py +11 -0
langfun/core/structured/schema/__init__.py +1 -0
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/RECORD +41 -37
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/runners/ckpt_monitor.py ADDED Viewed

@@ -0,0 +1,294 @@
+# Copyright 2025 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Checkpoint aggregator for Langfun evaluations."""
+import concurrent.futures
+import dataclasses
+import os
+import threading
+import time
+from typing import Annotated, Iterator
+from langfun.core.eval.v2 import evaluation as evaluation_lib
+from langfun.core.eval.v2 import example as example_lib
+from langfun.core.eval.v2 import reporting
+from langfun.core.eval.v2.runners import base
+import pyglove as pg
+class CheckpointMonitor(base.RunnerBase):
+  """Runner for monitoring checkpoing files generated by other runners.
+  Currently checkpoint monitor only supports aggregating per-example
+  checkpoint files.
+  """
+  NAME = 'checkpoint_monitor'
+  plugins = [
+      reporting.HtmlReporter(),
+  ]
+  checkpoint_pattern: Annotated[
+      str, 'The glob pattern of the checkpoint files to monitor.'
+  ] = 'checkpoint_*.bagz'
+  monitor_inprogress_files: Annotated[
+      bool,
+      'If True, monitor in-progress files to aggregate.'
+  ] = False
+  poll_interval: Annotated[
+      int,
+      'The interval in seconds to poll for new checkpoint files.'
+  ] = 5
+  max_aggregation_threads: Annotated[
+      int,
+      'The maximum number of threads to aggregate checkpoints.'
+  ] = 128
+  @dataclasses.dataclass
+  class _AggregationEntry:
+    evaluation: evaluation_lib.Evaluation
+    output_dir: str
+    inprogress_file_pattern: str | None
+    ckpt_file_pattern: str
+    example_ids_inprogress: set[int]
+    example_ids_to_be_aggregated: set[int]
+    example_ids_being_aggregated: set[int]
+    completion_lock: threading.Lock
+    is_completed: bool = False
+  def _on_bound(self):
+    super()._on_bound()
+    self._monitor_thread = None
+    self._aggregation_entries = []
+    self._aggregator_pool = None
+    self._error = None
+  def start(self):
+    # Reset the experiment state before getting started.
+    self.current_run.experiment.reset()
+    # Signal the start of the run.
+    self.on_run_start()
+    # Start the non-leaf nodes.
+    for node in self.current_run.experiment.nonleaf_nodes:
+      self.on_experiment_start(node)
+    for evaluation in self.current_run.experiment.leaf_nodes:
+      # This is not precise, but we at least notify example start.
+      if not self.current_run.filter or self.current_run.filter(evaluation):
+        self.on_experiment_start(evaluation)
+        # Signal the start of the examples if we are not monitoring in-progress
+        # files.
+        if not self.monitor_inprogress_files:
+          for example_id in self.current_run.examples_to_evaluate(evaluation):
+            self._mark_example_started(evaluation, example_id)
+        # Create the aggregation entries for polling.
+        output_dir = self.current_run.output_dir(evaluation)
+        self._aggregation_entries.append(
+            self._AggregationEntry(
+                evaluation=evaluation,
+                output_dir=output_dir,
+                ckpt_file_pattern=os.path.join(
+                    output_dir, self.checkpoint_pattern
+                ),
+                inprogress_file_pattern=os.path.join(
+                    output_dir, '*.inprogress'
+                ) if self.monitor_inprogress_files else None,
+                example_ids_to_be_aggregated=(
+                    self.current_run.examples_to_evaluate(evaluation)
+                ),
+                example_ids_inprogress=set(),
+                example_ids_being_aggregated=set(),
+                completion_lock=threading.Lock(),
+                is_completed=False,
+            )
+        )
+      else:
+        self.on_experiment_skipped(evaluation)
+    self._aggregator_pool = concurrent.futures.ThreadPoolExecutor(
+        max_workers=self.max_aggregation_threads
+    )
+    self._monitor_thread = threading.Thread(target=self._monitor_loop)
+    self._monitor_thread.start()
+  def join(self):
+    if self._monitor_thread:
+      self._monitor_thread.join()
+    if self._error is not None:
+      raise self._error
+  def run(self):
+    self.start()
+    self.join()
+  def _monitor_loop(self):
+    while not self._error and any(
+        not e.is_completed for e in self._aggregation_entries
+    ):
+      for entry in self._aggregation_entries:
+        if not entry.example_ids_to_be_aggregated:
+          continue
+        # Signal example processing.
+        if self.monitor_inprogress_files:
+          inprogress_files = pg.io.glob(entry.inprogress_file_pattern)
+          for inprogress_file in inprogress_files:
+            example_id = int(
+                os.path.basename(inprogress_file).split('.')[0]
+            )
+            if example_id not in entry.example_ids_inprogress:
+              self._mark_example_started(entry.evaluation, example_id)
+              entry.example_ids_inprogress.add(example_id)
+        for filepath in pg.io.glob(entry.ckpt_file_pattern):
+          example_id = int(
+              os.path.basename(filepath).split('.')[0].split('_')[-1]
+          )
+          if example_id in entry.example_ids_to_be_aggregated:
+            # Remove example ID from the set to avoid duplicate processing.
+            entry.example_ids_to_be_aggregated.remove(example_id)
+            entry.example_ids_being_aggregated.add(example_id)
+            # It could be that the example has been processed before, but the
+            # inprogress file was removed. In this case, we should signal the
+            # example has started before completing it.
+            if example_id not in entry.example_ids_inprogress:
+              self._mark_example_started(entry.evaluation, example_id)
+              entry.example_ids_inprogress.add(example_id)
+            self._aggregator_pool.submit(
+                self._aggregate, entry, filepath, example_id
+            )
+            pg.logging.info(
+                '[%s] Aggregating example %d from %s...',
+                entry.evaluation.id,
+                example_id,
+                filepath,
+            )
+      time.sleep(self.poll_interval)
+    if self._error is None:
+      self.on_run_complete()
+    else:
+      self.on_run_abort(self._error)
+  def _aggregate(
+      self,
+      entry: _AggregationEntry,
+      ckpt_filepath: str,
+      example_id: int
+  ):
+    """Aggregate an example from a checkpoint file."""
+    try:
+      loaded_examples = entry.evaluation.state.load(
+          ckpt_filepath,
+          example_input_by_id=entry.evaluation.example_input_by_id,
+          # Example metadata may be expensive to load, and is not used by
+          # metric aggregation. Thus we do not load example metadata.
+          load_example_metadata=False
+      )
+      assert len(loaded_examples) > 1, loaded_examples
+      # Ocassionally the per-example checkpoint file may contain the same
+      # example processed multiple times. We only need to aggregate the last
+      # example.
+      example = loaded_examples[-1]
+    except BaseException as e:  # pylint: disable=broad-except
+      error_info = pg.ErrorInfo.from_exception(e)
+      pg.logging.error(
+          '[%s] Failed to aggregate example %d: %s',
+          entry.evaluation.id,
+          example_id,
+          error_info
+      )
+      example = example_lib.Example(
+          id=example_id,
+          input=entry.evaluation.example_input_by_id(example_id),
+          error=error_info,
+      )
+    # This will skip processing but still allow metrics to be collected.
+    # `process` will never be called for evaluation, thus we do not
+    # need to setup/teardown evaluation.
+    example = entry.evaluation.evaluate(
+        example, reevaluate_upon_previous_errors=False
+    )
+    example.newly_processed = True
+    pg.logging.info(
+        '[%s] Successfully aggregated example %d from %s.',
+        entry.evaluation.id,
+        example_id,
+        ckpt_filepath,
+    )
+    try:
+      self.on_example_complete(entry.evaluation, example)
+    except BaseException as e:  # pylint: disable=broad-except
+      # Plugin failures should be raised to the user.
+      self._error = e
+    entry.example_ids_being_aggregated.remove(example_id)
+    # Remove the in-progress file to indicate that the example has been
+    # processed.
+    try:
+      pg.io.rm(os.path.join(entry.output_dir, f'{example_id}.inprogress'))
+    except FileNotFoundError:
+      pass
+    if (not self._error
+        and not entry.example_ids_to_be_aggregated
+        and not entry.example_ids_being_aggregated):
+      with entry.completion_lock:
+        if not entry.is_completed:
+          entry.is_completed = True
+          try:
+            self.on_experiment_complete(entry.evaluation)
+          except BaseException as e:  # pylint: disable=broad-except
+            # Plugin failures should be raised to the user.
+            self._error = e
+  def _mark_example_started(
+      self,
+      evaluation: evaluation_lib.Evaluation,
+      example_id: int
+  ) -> None:
+    """Mark an example as started."""
+    example = example_lib.Example(
+        id=example_id, input=evaluation.example_input_by_id(example_id),
+    )
+    example.start_time = time.time()
+    self.on_example_start(evaluation, example)
+    # We update evaluation state with the inprogress status so the evaluation
+    # HTML could show remotely in-progress examples.
+    evaluation.state.update(example, in_progress=True)
+  def _run(self, evaluations: list[evaluation_lib.Evaluation]):
+    raise NotImplementedError('Not needed in checkpoint monitor.')
+  def _evaluate_items(
+      self,
+      evaluation: evaluation_lib.Evaluation,
+      items: Iterator[example_lib.Example]
+  ) -> None:
+    raise NotImplementedError('Not needed in checkpoint monitor.')

langfun/core/eval/v2/runners/ckpt_monitor_test.py ADDED Viewed

@@ -0,0 +1,162 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import tempfile
+import unittest
+from langfun.core.eval.v2 import checkpointing
+from langfun.core.eval.v2 import eval_test_helper
+from langfun.core.eval.v2 import example as example_lib
+from langfun.core.eval.v2 import experiment as experiment_lib
+from langfun.core.eval.v2.runners import ckpt_monitor
+from langfun.core.eval.v2.runners import sequential  # pylint: disable=unused-import
+import pyglove as pg
+class CheckpointMonitorTest(unittest.TestCase):
+  def setUp(self):
+    super().setUp()
+    self.test_dir = tempfile.mkdtemp()
+  def test_aggregate(self):
+    exp = eval_test_helper.test_experiment()
+    root_dir = os.path.join(self.test_dir, 'test_aggregate')
+    run = exp.run(
+        root_dir,
+        runner='sequential',
+        progress_tracker=None,
+        plugins=[
+            checkpointing.PerExampleCheckpointer(
+                checkpoint_filename='checkpoint.jsonl'
+            )
+        ],
+        use_cache='no',
+    )
+    # Try to corrupt one of the checkpoint files.
+    pg.io.writefile(
+        run.output_path_for(exp.leaf_nodes[0], 'checkpoint_1.jsonl'),
+        'bad ckpt'
+    )
+    plugin = eval_test_helper.TestPlugin()
+    monitor = ckpt_monitor.CheckpointMonitor(
+        run,
+        plugins=[plugin],
+        checkpoint_pattern='checkpoint_*.jsonl',
+        monitor_inprogress_files=True,
+    )
+    monitor.run()
+    # Assert that the in-progress files are created and not removed.
+    for entry in monitor._aggregation_entries:
+      self.assertEqual(len(entry.example_ids_inprogress), 10)
+    # 6 leaf nodes + 1 suite + 1 hyper.
+    self.assertEqual(len(plugin.started_experiments), 6 + 2)
+    self.assertEqual(len(plugin.completed_experiments), 6 + 2)
+    self.assertEqual(len(plugin.started_example_ids), 10 * 6)
+    self.assertEqual(len(plugin.completed_example_ids), 10 * 6)
+    for e in exp.leaf_nodes:
+      self.assertEqual(e.progress.num_completed, 10)
+  def test_aggregate_with_filter(self):
+    exp = eval_test_helper.test_experiment()
+    root_dir = os.path.join(self.test_dir, 'test_aggregate_with_filter')
+    node_to_skip = exp.leaf_nodes[2]
+    # Run experiment to generate checkpoint files for all examples.
+    run = exp.run(
+        root_dir,
+        runner='sequential',
+        filter=lambda e: e.id != node_to_skip.id,
+        progress_tracker=None,
+        plugins=[
+            checkpointing.PerExampleCheckpointer(
+                checkpoint_filename='checkpoint.jsonl'
+            )
+        ],
+        use_cache='no',
+    )
+    plugin = eval_test_helper.TestPlugin()
+    monitor = ckpt_monitor.CheckpointMonitor(
+        run,
+        plugins=[plugin],
+        checkpoint_pattern='checkpoint_*.jsonl',
+    )
+    monitor.run()
+    # Assert that on_experiment_skipped was called for the filtered node.
+    self.assertEqual(len(plugin.skipped_experiments), 1)
+    self.assertEqual(plugin.skipped_experiments[0].id, node_to_skip.id)
+    # Assert that the skipped node was not started.
+    started_ids = [e.id for e in plugin.started_experiments]
+    self.assertNotIn(node_to_skip.id, started_ids)
+  def test_plugin_raise(self):
+    class TestPlugin(eval_test_helper.TestPlugin):
+      simulate_raise_on_example_complete: bool = False
+      simulate_raise_on_experiment_complete: bool = False
+      def on_example_complete(
+          self,
+          runner: experiment_lib.Runner,
+          experiment: experiment_lib.Experiment,
+          example: example_lib.Example
+      ):
+        if self.simulate_raise_on_example_complete:
+          raise ValueError('example complete error')
+      def on_experiment_complete(
+          self,
+          runner: experiment_lib.Runner,
+          experiment: experiment_lib.Experiment
+      ):
+        if self.simulate_raise_on_experiment_complete:
+          raise ValueError('experiment complete error')
+    exp = eval_test_helper.test_evaluation()
+    root_dir = os.path.join(self.test_dir, 'test_plugin_raise')
+    # Run experiment to generate checkpoint files for all examples.
+    run = exp.run(
+        root_dir,
+        runner='sequential',
+        progress_tracker=None,
+        plugins=[
+            checkpointing.PerExampleCheckpointer(
+                checkpoint_filename='checkpoint.jsonl'
+            )
+        ],
+        use_cache='no',
+    )
+    with self.assertRaisesRegex(ValueError, 'example complete error'):
+      ckpt_monitor.CheckpointMonitor(
+          run,
+          plugins=[TestPlugin(simulate_raise_on_example_complete=True)],
+          checkpoint_pattern='checkpoint_*.jsonl',
+      ).run()
+    with self.assertRaisesRegex(ValueError, 'experiment complete error'):
+      ckpt_monitor.CheckpointMonitor(
+          run,
+          plugins=[TestPlugin(simulate_raise_on_experiment_complete=True)],
+          checkpoint_pattern='checkpoint_*.jsonl',
+      ).run()
+if __name__ == '__main__':
+  unittest.main()

langfun/core/eval/v2/runners/debug_test.py CHANGED Viewed

@@ -23,7 +23,7 @@ from langfun.core.eval.v2.runners import debug  # pylint: disable=unused-import
 import pyglove as pg
-class RunnerTest(unittest.TestCase):
+class DebugRunnerTest(unittest.TestCase):
   def assert_same_list(self, actual: list[Any], expected: list[Any]):
     self.assertEqual(len(actual), len(expected))
@@ -32,9 +32,6 @@ class RunnerTest(unittest.TestCase):
         print(i, pg.diff(x, y))
       self.assertIs(x, y)
-class DebugRunnerTest(RunnerTest):
   def test_debug_runner(self):
     plugin = eval_test_helper.TestPlugin()
     exp = eval_test_helper.test_experiment()

langfun/core/eval/v2/runners/parallel_test.py CHANGED Viewed

@@ -23,7 +23,7 @@ from langfun.core.eval.v2.runners import parallel  # pylint: disable=unused-impo
 import pyglove as pg
-class RunnerTest(unittest.TestCase):
+class ParallelRunnerTest(unittest.TestCase):
   def assert_same_list(self, actual: list[Any], expected: list[Any]):
     self.assertEqual(len(actual), len(expected))
@@ -32,9 +32,6 @@ class RunnerTest(unittest.TestCase):
         print(i, pg.diff(x, y))
       self.assertIs(x, y)
-class ParallelRunnerTest(RunnerTest):
   def test_parallel_runner(self):
     plugin = eval_test_helper.TestPlugin()
     exp = eval_test_helper.test_experiment()

langfun/core/eval/v2/runners/sequential_test.py CHANGED Viewed

@@ -23,7 +23,7 @@ from langfun.core.eval.v2.runners import sequential  # pylint: disable=unused-im
 import pyglove as pg
-class RunnerTest(unittest.TestCase):
+class SequentialRunnerTest(unittest.TestCase):
   def assert_same_list(self, actual: list[Any], expected: list[Any]):
     self.assertEqual(len(actual), len(expected))
@@ -32,9 +32,6 @@ class RunnerTest(unittest.TestCase):
         print(i, pg.diff(x, y))
       self.assertIs(x, y)
-class SequentialRunnerTest(RunnerTest):
   def test_basic(self):
     plugin = eval_test_helper.TestPlugin()
     exp = eval_test_helper.test_experiment()

langfun/core/langfunc_test.py CHANGED Viewed

@@ -109,9 +109,9 @@ class LangFuncCallTest(unittest.TestCase):
         ' lm=ExcitedEchoer(sampling_options=LMSamplingOptions(temperature=None,'
         ' max_tokens=None, n=1, top_k=40, top_p=None, stop=None,'
         ' random_seed=None, logprobs=False, top_logprobs=None,'
-        ' max_thinking_tokens=None, reasoning_effort=None, extras={}),'
-        ' cache=None, max_concurrency=None, timeout=120.0, max_attempts=5,'
-        ' retry_interval=(5, 60), exponential_backoff=True,'
+        ' max_thinking_tokens=None, thinking_level=None, reasoning_effort=None,'
+        ' extras={}), cache=None, max_concurrency=None, timeout=120.0,'
+        ' max_attempts=5, retry_interval=(5, 60), exponential_backoff=True,'
         ' max_retry_interval=300, debug=False))',
     )

langfun/core/language_model.py CHANGED Viewed

@@ -53,6 +53,10 @@ class RetryableLMError(LMError):
   """Base class for LLM errors that can be solved by retrying."""
+class EmptyGenerationError(RetryableLMError):
+  """Error for empty generaition."""
 class RateLimitError(RetryableLMError):
   """Error for rate limit reached."""
@@ -575,6 +579,14 @@ class LMSamplingOptions(component.Component):
       int | None, 'Number of max thinking tokens.'
   ] = None
+  thinking_level: Annotated[
+      Literal['low', 'high'] | None,
+      (
+          'Thinking level for Gemini models. High is for complex tasks, '
+          'while low is for faster responses.'
+      ),
+  ] = None
   reasoning_effort: Annotated[
       Literal['low', 'medium', 'high'] | None,
       (
@@ -1076,10 +1088,32 @@ class LanguageModel(component.Component):
     prompts = [message_lib.UserMessage.from_value(p) for p in prompts]
     with component.context(override_attrs=True, **kwargs):
-      if self.cache is None:
-        results = self._sample(prompts)
-      else:
-        results = self._sample_with_cache_lookup(prompts, cache_seed)
+      def _sample_with_retry():
+        if self.cache is None:
+          results = self._sample(prompts)
+        else:
+          results = self._sample_with_cache_lookup(prompts, cache_seed)
+        for i, result in enumerate(results):
+          for sample in result.samples:
+            if not sample.response.text:
+              if self.cache is not None:
+                self.cache.delete(self, prompts[i], seed=cache_seed)
+              raise EmptyGenerationError(
+                  f'Empty generation encountered from model {self.model_id}.'
+              )
+        return results
+      retry_fn = concurrent.with_retry(
+          _sample_with_retry,
+          retry_on_errors=EmptyGenerationError,
+          max_attempts=self.max_attempts,
+          retry_interval=self.retry_interval,
+          exponential_backoff=self.exponential_backoff,
+          max_retry_interval=self.max_retry_interval,
+      )
+      results = retry_fn()
       for prompt, result in zip(prompts, results):
@@ -1088,7 +1122,6 @@ class LanguageModel(component.Component):
         for sample in result.samples:
           # Update metadata for response message.
           response = sample.response
           response.metadata.score = sample.score
           response.metadata.logprobs = sample.logprobs

langfun/core/language_model_test.py CHANGED Viewed

@@ -591,6 +591,51 @@ class LanguageModelTest(unittest.TestCase):
     lm = MockModel(cache=cache, top_k=1)
     self.assertEqual(lm('a'), 'a')
+  def test_empty_generation_error(self):
+    class MockModelWithEmptyResponse(MockModel):
+      def _sample(self,
+                  prompts: list[message_lib.Message]
+                  ) -> list[lm_lib.LMSamplingResult]:
+        return [lm_lib.LMSamplingResult(
+            [lm_lib.LMSample(response='')],
+            usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0)
+        )]
+    lm = MockModelWithEmptyResponse(max_attempts=1, retry_interval=0)
+    with self.assertRaisesRegex(
+        concurrent.RetryError, 'Empty generation encountered'
+    ):
+      lm('a')
+  def test_empty_generation_retry(self):
+    class MockModelWithEmptyThenValid(MockModel):
+      attempt_count: int = 0
+      def _sample(
+          self, prompts: list[message_lib.Message]
+      ) -> list[lm_lib.LMSamplingResult]:
+        self.rebind(attempt_count=self.attempt_count + 1)
+        if self.attempt_count == 1:
+          # First attempt returns empty
+          return [
+              lm_lib.LMSamplingResult(
+                  [lm_lib.LMSample(response='')],
+                  usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0),
+              )
+          ]
+        else:
+          # Subsequent attempts return valid response
+          return [
+              lm_lib.LMSamplingResult(
+                  [lm_lib.LMSample(response='valid response')],
+                  usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
+              )
+          ]
+    lm = MockModelWithEmptyThenValid(max_attempts=3, retry_interval=0)
+    result = lm('a')
+    self.assertEqual(result.text, 'valid response')
+    self.assertEqual(lm.attempt_count, 2)
   def test_estimate_max_concurrency(self):
     self.assertIsNone(lm_lib.LanguageModel.estimate_max_concurrency(None, None))
     self.assertEqual(

langfun/core/llms/__init__.py CHANGED Viewed

@@ -42,6 +42,7 @@ from langfun.core.llms.azure_openai import AzureOpenAI
 # Gemini models.
 from langfun.core.llms.google_genai import GenAI
+from langfun.core.llms.google_genai import Gemini3ProPreview
 from langfun.core.llms.google_genai import Gemini25Pro
 from langfun.core.llms.google_genai import Gemini25Flash
 from langfun.core.llms.google_genai import Gemini25ProPreview_20250605
@@ -90,6 +91,7 @@ from langfun.core.llms.vertexai import VertexAIGemini25ProPreview_20250605
 from langfun.core.llms.vertexai import VertexAIGemini25Pro
 from langfun.core.llms.vertexai import VertexAIGemini25Flash
 from langfun.core.llms.vertexai import VertexAIGemini25FlashImagePreview
+from langfun.core.llms.vertexai import VertexAIGemini3ProPreview
 # For backward compatibility.
 GeminiPro1_5 = Gemini15Pro

langfun 0.1.2.dev202511160804__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202511160804py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl