PyPI - langfun - Versions diffs - 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl - Mend

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (155) hide show

langfun/core/__init__.py +2 -0
langfun/core/agentic/__init__.py +4 -1
langfun/core/agentic/action.py +447 -29
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +149 -21
langfun/core/async_support.py +32 -3
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +1 -0
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +9 -2
langfun/core/data/conversion/gemini_test.py +12 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +47 -43
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +1 -0
langfun/core/eval/v2/checkpointing.py +64 -6
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/eval_test_helper.py +103 -2
langfun/core/eval/v2/evaluation.py +91 -16
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +74 -8
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +30 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +12 -3
langfun/core/eval/v2/progress_tracking_test.py +6 -1
langfun/core/eval/v2/reporting.py +90 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
langfun/core/eval/v2/runners/beam.py +341 -0
langfun/core/eval/v2/runners/beam_test.py +131 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +100 -0
langfun/core/eval/v2/runners/parallel_test.py +95 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +172 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +141 -21
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +9 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +55 -17
langfun/core/llms/gemini_test.py +84 -0
langfun/core/llms/google_genai.py +34 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +36 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +12 -1
langfun/core/llms/vertexai.py +58 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/client.py +77 -22
langfun/core/mcp/client_test.py +8 -35
langfun/core/mcp/session.py +94 -29
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/tool.py +151 -22
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +19 -1
langfun/core/modalities/mime.py +64 -3
langfun/core/modalities/mime_test.py +11 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +215 -142
langfun/core/structured/querying_test.py +65 -29
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +174 -49
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +8 -2
langfun/env/base_environment.py +320 -128
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +92 -15
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +84 -361
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +1 -1
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +95 -98
langfun/env/event_handlers/event_logger_test.py +21 -21
langfun/env/event_handlers/metric_writer.py +225 -140
langfun/env/event_handlers/metric_writer_test.py +23 -6
langfun/env/interface.py +854 -40
langfun/env/interface_test.py +112 -2
langfun/env/load_balancers_test.py +23 -2
langfun/env/test_utils.py +126 -84
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun/env/base_test.py +0 -1481
langfun/env/event_handlers/base.py +0 -350
langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/reporting.py CHANGED Viewed

@@ -32,8 +32,97 @@ _SUMMARY_FILE = 'summary.html'
 _EVALULATION_DETAIL_FILE = 'index.html'
+class ExampleHtmlGenerator(experiment_lib.Plugin):
+  """Plugin for generating HTML views for each evaluation example."""
+  def on_example_complete(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ):
+    self._save_example_html(runner, experiment, example)
+  def _save_example_html(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ) -> None:
+    """Saves the example in HTML format."""
+    current_run = runner.current_run
+    def _generate():
+      try:
+        with pg.timeit() as t:
+          html = example.to_html(
+              collapse_level=None,
+              enable_summary_tooltip=False,
+              extra_flags=dict(
+                  # For properly rendering the next link.
+                  num_examples=getattr(experiment, 'num_examples', None)
+              ),
+          )
+          html.save(
+              runner.current_run.output_path_for(
+                  experiment, f'{example.id}.html'
+              )
+          )
+        experiment.info(
+            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to generate \'{example.id}.html\'. '
+            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
+        )
+        raise e
+    def _copy():
+      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
+      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
+      if src_file == dest_file:
+        return
+      if not pg.io.path_exists(src_file):
+        experiment.warning(
+            f'Skip copying \'{example.id}.html\' as '
+            f'{src_file!r} does not exist.'
+        )
+        return
+      try:
+        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
+          content = src.read()
+          with pg.io.open(dest_file, 'w') as dest:
+            dest.write(content)
+        experiment.info(
+            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
+        )
+        raise e
+    generate_example_html = current_run.generate_example_html
+    if (generate_example_html == 'all'
+        or (generate_example_html == 'new' and example.newly_processed)
+        or (isinstance(generate_example_html, list)
+            and example.id in generate_example_html)):
+      op = _generate
+    else:
+      op = _copy
+    runner.background_run(op)
 class HtmlReporter(experiment_lib.Plugin):
-  """Plugin for periodically generating HTML reports for the experiment."""
+  """Plugin for periodically generating HTML reports for the experiment.
+  The `HtmlReporter` plugin generates several HTML files during an experiment
+  run:
+    - A `summary.html` at the root of the run directory, summarizing all
+      evaluations in the experiment.
+    - An `index.html` for each leaf evaluation, detailing the evaluation
+      definition, metrics, and logs.
+  These reports are updated periodically in the background during the run,
+  allowing users to monitor progress in near real-time.
+  """
   summary_interval: Annotated[
       int,
@@ -127,7 +216,6 @@ class HtmlReporter(experiment_lib.Plugin):
   def on_example_complete(
       self, runner: Runner, experiment: Experiment, example: Example
   ):
-    self._save_example_html(runner, experiment, example)
     self._maybe_update_experiment_html(runner, experiment)
     self._maybe_update_summary(runner)
@@ -197,72 +285,3 @@ class HtmlReporter(experiment_lib.Plugin):
         runner.background_run(_save)
       else:
         _save()
-  def _save_example_html(
-      self, runner: Runner, experiment: Experiment, example: Example
-  ) -> None:
-    """Saves the example in HTML format."""
-    current_run = runner.current_run
-    def _generate():
-      try:
-        with pg.timeit() as t:
-          html = example.to_html(
-              collapse_level=None,
-              enable_summary_tooltip=False,
-              extra_flags=dict(
-                  # For properly rendering the next link.
-                  num_examples=getattr(experiment, 'num_examples', None)
-              ),
-          )
-          html.save(
-              runner.current_run.output_path_for(
-                  experiment, f'{example.id}.html'
-              )
-          )
-        experiment.info(
-            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
-        )
-      except BaseException as e:  # pylint: disable=broad-except
-        experiment.error(
-            f'Failed to generate \'{example.id}.html\'. '
-            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
-        )
-        raise e
-    def _copy():
-      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
-      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
-      if src_file == dest_file:
-        return
-      if not pg.io.path_exists(src_file):
-        experiment.warning(
-            f'Skip copying \'{example.id}.html\' as '
-            f'{src_file!r} does not exist.'
-        )
-        return
-      try:
-        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
-          content = src.read()
-          with pg.io.open(dest_file, 'w') as dest:
-            dest.write(content)
-        experiment.info(
-            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
-        )
-      except BaseException as e:  # pylint: disable=broad-except
-        experiment.error(
-            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
-        )
-        raise e
-    generate_example_html = current_run.generate_example_html
-    if (generate_example_html == 'all'
-        or (generate_example_html == 'new' and example.newly_processed)
-        or (isinstance(generate_example_html, list)
-            and example.id in generate_example_html)):
-      op = _generate
-    else:
-      op = _copy
-    runner.background_run(op)

langfun/core/eval/v2/reporting_test.py CHANGED Viewed

@@ -29,7 +29,16 @@ class ReportingTest(unittest.TestCase):
     experiment = eval_test_helper.test_experiment()
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
-    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    self.assertFalse(reporter.is_per_example())
+    example_html_generator = reporting.ExampleHtmlGenerator()
+    self.assertTrue(example_html_generator.is_per_example())
+    run = experiment.run(
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator]
+    )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
     )
@@ -52,8 +61,10 @@ class ReportingTest(unittest.TestCase):
     root_dir = os.path.join(tempfile.mkdtemp(), 'test_reporting2')
     experiment = eval_test_helper.test_experiment()
     run = experiment.run(
-        root_dir, 'new', plugins=[checkpointer, reporter],
-        warm_start_from=run.output_root
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator],
+        warm_start_from=run.output_root,
     )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
@@ -105,7 +116,12 @@ class ReportingTest(unittest.TestCase):
                   .test_experiment_with_example_html_generation_error())
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
-    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    example_html_generator = reporting.ExampleHtmlGenerator()
+    run = experiment.run(
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator]
+    )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
     )
@@ -132,8 +148,10 @@ class ReportingTest(unittest.TestCase):
     experiment = (eval_test_helper
                   .test_experiment_with_example_html_generation_error())
     run = experiment.run(
-        root_dir, 'new', plugins=[checkpointer, reporter],
-        warm_start_from=run.output_root
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator],
+        warm_start_from=run.output_root,
     )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))

langfun/core/eval/v2/runners/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Langfun evaluation runners."""
+# pylint: disable=g-importing-member
+from langfun.core.eval.v2.runners.base import RunnerBase
+from langfun.core.eval.v2.runners.beam import BeamRunner
+from langfun.core.eval.v2.runners.debug import DebugRunner
+from langfun.core.eval.v2.runners.parallel import ParallelRunner
+from langfun.core.eval.v2.runners.sequential import SequentialRunner
+# pylint: enable=g-importing-member
+__all__ = [
+    'RunnerBase',
+    'BeamRunner',
+    'DebugRunner',
+    'ParallelRunner',
+    'SequentialRunner',
+]

langfun/core/eval/v2/{runners.py → runners/base.py} RENAMED Viewed

@@ -11,15 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Evaluation experiment runners."""
+"""Base experiment runner."""
 import abc
-import collections
 import concurrent.futures
 import random
 import threading
-import time
 import traceback
-from typing import Any, Annotated, Callable, Iterator
+from typing import Any, Annotated, Callable, Iterator, Literal
 from langfun import core as lf
 from langfun.core.eval.v2 import checkpointing
@@ -42,31 +41,55 @@ _RUN_MANIFEST = 'run.json'
 class RunnerBase(Runner):
-  """A simple runner that runs evaluations and their examples sequentially."""
+  """Base class for runners with plugin support and IO pooling.
+  `RunnerBase` provides the basic runner functionalities such as plugin
+  integration for checkpointing, reporting and progress tracking.
+  It also manages a thread pool for background IO operations.
+  Subclasses should implement `_run` and `_evaluate_items` for different
+  execution strategies.
+  """
-  tqdm: Annotated[
-      bool,
+  progress_tracker: Annotated[
+      Literal['tqdm', 'html', 'auto', None],
       (
-          'If True, force using tqdm for progress update. Otherwise, determine '
-          'it automatically based on the running environment (console vs. '
-          'notebook)'
+          'If `tqdm`, force using tqdm for progress update. '
+          'If `html`, force using html for progress update. '
+          'If `auto`, determine it automatically based on the running '
+          'environment (console vs. notebook)'
+          'If `none`, disable progress update.'
       )
-  ] = False
+  ] = 'auto'
   plugins = [
       checkpointing.BulkCheckpointer(),
       reporting.HtmlReporter(),
   ]
+  max_background_threads: Annotated[
+      int,
+      'Max number of background threads for IO operations.'
+  ] = 128
   def _on_bound(self):
     super()._on_bound()
     # Install the tqdm plugin if needed.
-    with pg.notify_on_change(False):
-      self.plugins.append(progress_tracking.progress_tracker(self.tqdm))
+    if self.progress_tracker is not None:
+      with pg.notify_on_change(False):
+        self.plugins.append(
+            progress_tracking.progress_tracker(self.progress_tracker)
+        )
+    if self.max_background_threads > 0:
+      self._io_pool_lock = threading.Lock()
+      self._io_pool = concurrent.futures.ThreadPoolExecutor(
+          max_workers=self.max_background_threads
+      )
+    else:
+      self._io_pool_lock = None
+      self._io_pool = None
-    self._io_pool_lock = threading.Lock()
-    self._io_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16)
     # TODO(daiyip): render background errors.
     self._background_last_error = None
@@ -78,9 +101,12 @@ class RunnerBase(Runner):
       except Exception as e:  # pylint: disable=broad-except
         self._background_last_error = e
-    with self._io_pool_lock:
-      if self._io_pool is not None:
-        self._io_pool.submit(_background_run, *args, **kwargs)
+    if self.max_background_threads > 0:
+      with self._io_pool_lock:
+        if self._io_pool is not None:
+          self._io_pool.submit(_background_run, *args, **kwargs)
+    else:
+      _background_run(*args, **kwargs)
   def _all_plugins(self, experiment: Experiment) -> Iterator[Plugin]:
     """Returns all plugins for the experiment."""
@@ -139,6 +165,7 @@ class RunnerBase(Runner):
       plugin.on_experiment_start(self, experiment)
     if experiment.is_leaf:
+      pg.io.mkdirs(self.current_run.output_dir(experiment))
       experiment.info(
           f'Starting evaluation {experiment.id!r} with '
           f'{num_examples_to_evaluate} examples to evaluate.'
@@ -220,7 +247,7 @@ class RunnerBase(Runner):
       else:
         # A evaluation could be considered as done if it has processed all the
         # examples specified by `example_ids`.
-        assert progress.is_completed
+        assert progress.is_completed, progress
         parent_progress.increment_processed()
       if parent_progress.is_completed:
@@ -235,6 +262,8 @@ class RunnerBase(Runner):
       example: Example
   ) -> None:
     """Called when an evaluation example is started."""
+    assert isinstance(experiment, Evaluation), experiment
+    experiment.state.update(example, in_progress=True)
     for plugin in self._all_plugins(experiment):
       plugin.on_example_start(self, experiment, example)
     experiment.info(f'Starting to evaluate example {example.id}.')
@@ -245,6 +274,8 @@ class RunnerBase(Runner):
       example: Example
   ) -> None:
     """Called when an evaluation example is complete."""
+    assert isinstance(experiment, Evaluation), experiment
+    experiment.state.update(example, in_progress=False)
     if example.newly_processed:
       if example.error is None:
         experiment.progress.increment_processed()
@@ -256,7 +287,7 @@ class RunnerBase(Runner):
         experiment.progress.increment_failed()
         experiment.error(
             (
-                f'Failed to evaluate example {example.id} in'
+                f'Failed to evaluate example {example.id} in '
                 f'{example.elapse:.2f} seconds.'
             ),
             error=example.error
@@ -316,7 +347,7 @@ class RunnerBase(Runner):
         self._run(targets)
       self.on_run_complete()
-    except Exception as e:  # pylint: disable=broad-except
+    except BaseException as e:  # pylint: disable=broad-except
       self.on_run_abort(e)
       raise e
     finally:
@@ -324,9 +355,10 @@ class RunnerBase(Runner):
         self.background_run(cache.save)
       # Wait for the background tasks to finish.
-      with self._io_pool_lock:
-        self._io_pool, io_pool = None, self._io_pool
-      io_pool.shutdown(wait=True)
+      if self.max_background_threads > 0:
+        with self._io_pool_lock:
+          self._io_pool, io_pool = None, self._io_pool
+        io_pool.shutdown(wait=True)
   @abc.abstractmethod
   def _run(self, evaluations: list[Evaluation]) -> None:
@@ -335,6 +367,7 @@ class RunnerBase(Runner):
   def run_evaluation(self, evaluation: Evaluation) -> None:
     """Runs the evaluation."""
     try:
+      evaluation.setup()
       self.on_experiment_start(evaluation)
       per_evaluation_settings = {}
@@ -367,6 +400,8 @@ class RunnerBase(Runner):
     except BaseException as e:  # pylint: disable=broad-except
       self.on_experiment_abort(evaluation, e)
       raise e
+    finally:
+      evaluation.teardown()
   @abc.abstractmethod
   def _evaluate_items(
@@ -394,121 +429,3 @@ class RunnerBase(Runner):
     return in_memory.InMemory(
         self.current_run.output_path_for(experiment, 'cache.json')
     )
-class SequentialRunner(RunnerBase):
-  """Sequential runner.
-  Sequential runner runs all evaluations and their examples in sequence,
-  as well as the background tasks, it allows the developer to catch all
-  exceptions thrown from the background tasks, making it easier to debug.
-  """
-  NAME = 'sequential'
-  def background_run(
-      self, func: Callable[..., Any], *args: Any, **kwargs: Any
-  ) -> None:
-    """Runs the function with the IO pool."""
-    func(*args, **kwargs)
-  def _run(self, evaluations: list[Evaluation]) -> None:
-    """Runs the experiment in sequence."""
-    for e in evaluations:
-      self.run_evaluation(e)
-  def _evaluate_items(
-      self, evaluation: Evaluation, items: Iterator[Example]
-  ) -> None:
-    """Runs the evaluation items in sequence."""
-    for item in items:
-      self.evaluate_item(evaluation, item)
-class DebugRunner(SequentialRunner):
-  """Debug runner."""
-  NAME = 'debug'
-  # Do not use the checkpointer for debug runner.
-  plugins = []
-  def _on_bound(self):
-    super()._on_bound()
-    if self.current_run.example_ids is None:
-      self.current_run.rebind(example_ids=[1], skip_notification=True)
-    self.current_run.rebind(raise_if_has_error=True, skip_notification=True)
-  def _save_run_manifest(self) -> None:
-    """Do nothing to avoid overriden existing runs."""
-class ParallelRunner(RunnerBase):
-  """Parallel runner."""
-  NAME = 'parallel'
-  timeout: Annotated[
-      int | None,
-      'Timeout for each evaluation example.'
-  ] = None
-  concurrent_startup_delay: Annotated[
-      tuple[int, int] | None,
-      (
-          'A range of seconds to delay the initial evaluation of each thread '
-          'in the thread pool, helping to prevent a burst in LLM QPS at '
-          'startup. If set to None, no delay will be applied.'
-      )
-  ] = None
-  def _run(self, evaluations: list[Evaluation]) -> None:
-    """Runs the evaluations in parallel."""
-    def _run_group(evaluation_group: list[Evaluation]):
-      for e in evaluation_group:
-        self.run_evaluation(e)
-    # Run evaluations in parallel groupped by resource key.
-    groups: dict[str, list[Evaluation]] = collections.defaultdict(list)
-    for e in evaluations:
-      resource_ids = e.resource_ids()
-      if not resource_ids:
-        group_id = e.id
-      else:
-        # TODO(daiyip): support group that requires multiple resources.
-        group_id = resource_ids.pop()
-      groups[group_id].append(e)
-    for _, _, _ in lf.concurrent_map(
-        _run_group,
-        groups.values(),
-        max_workers=max(64, len(groups)),
-        timeout=self.timeout,
-        silence_on_errors=None,
-    ):
-      pass
-  def _evaluate_items(
-      self, evaluation: Evaluation, items: Iterator[Example]
-  ) -> None:
-    """Override run items to run in parallel."""
-    if self.concurrent_startup_delay is not None:
-      thread_delayed = {}
-      def _evaluate_item(item: Example):
-        thread_id = threading.current_thread().ident
-        if thread_id not in thread_delayed:
-          thread_delayed[thread_id] = True
-          time.sleep(random.randint(*self.concurrent_startup_delay))
-        return self.evaluate_item(evaluation, item)
-    else:
-      def _evaluate_item(item: Example):
-        return self.evaluate_item(evaluation, item)
-    for _, _, _ in lf.concurrent_map(
-        _evaluate_item,
-        items,
-        max_workers=evaluation.max_workers,
-        timeout=self.timeout,
-        silence_on_errors=None,
-    ):
-      pass

langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl