PyPI - langfun - Versions diffs - 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511160804__py3-none-any.whl - Mend

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511160804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (146) hide show

langfun/core/__init__.py +1 -0
langfun/core/agentic/action.py +107 -12
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +25 -0
langfun/core/async_support.py +32 -3
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +1 -0
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +9 -2
langfun/core/data/conversion/gemini_test.py +12 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +47 -43
langfun/core/eval/base_test.py +4 -4
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +1 -0
langfun/core/eval/v2/checkpointing.py +39 -5
langfun/core/eval/v2/checkpointing_test.py +1 -1
langfun/core/eval/v2/eval_test_helper.py +96 -0
langfun/core/eval/v2/evaluation.py +87 -15
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +45 -39
langfun/core/eval/v2/example_test.py +3 -3
langfun/core/eval/v2/experiment.py +51 -8
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +30 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking_test.py +3 -0
langfun/core/eval/v2/reporting.py +90 -71
langfun/core/eval/v2/reporting_test.py +20 -6
langfun/core/eval/v2/runners/__init__.py +26 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +22 -124
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +79 -0
langfun/core/eval/v2/runners/parallel.py +100 -0
langfun/core/eval/v2/runners/parallel_test.py +98 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +175 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +6 -4
langfun/core/language_model.py +103 -16
langfun/core/language_model_test.py +9 -3
langfun/core/llms/__init__.py +7 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +14 -9
langfun/core/llms/google_genai.py +29 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +36 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +12 -1
langfun/core/llms/vertexai.py +51 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/client.py +77 -22
langfun/core/mcp/client_test.py +8 -35
langfun/core/mcp/session.py +94 -29
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/tool.py +151 -22
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +19 -1
langfun/core/modalities/mime.py +62 -3
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +215 -142
langfun/core/structured/querying_test.py +65 -29
langfun/core/structured/schema/__init__.py +48 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +174 -49
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +8 -2
langfun/env/base_environment.py +320 -128
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +92 -15
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +84 -361
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +1 -1
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +95 -98
langfun/env/event_handlers/event_logger_test.py +21 -21
langfun/env/event_handlers/metric_writer.py +225 -140
langfun/env/event_handlers/metric_writer_test.py +23 -6
langfun/env/interface.py +854 -40
langfun/env/interface_test.py +112 -2
langfun/env/load_balancers_test.py +23 -2
langfun/env/test_utils.py +126 -84
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/METADATA +1 -1
langfun-0.1.2.dev202511160804.dist-info/RECORD +211 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun/env/base_test.py +0 -1481
langfun/env/event_handlers/base.py +0 -350
langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/reporting.py CHANGED Viewed

@@ -32,8 +32,97 @@ _SUMMARY_FILE = 'summary.html'
 _EVALULATION_DETAIL_FILE = 'index.html'
+class ExampleHtmlGenerator(experiment_lib.Plugin):
+  """Plugin for generating HTML views for each evaluation example."""
+  def on_example_complete(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ):
+    self._save_example_html(runner, experiment, example)
+  def _save_example_html(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ) -> None:
+    """Saves the example in HTML format."""
+    current_run = runner.current_run
+    def _generate():
+      try:
+        with pg.timeit() as t:
+          html = example.to_html(
+              collapse_level=None,
+              enable_summary_tooltip=False,
+              extra_flags=dict(
+                  # For properly rendering the next link.
+                  num_examples=getattr(experiment, 'num_examples', None)
+              ),
+          )
+          html.save(
+              runner.current_run.output_path_for(
+                  experiment, f'{example.id}.html'
+              )
+          )
+        experiment.info(
+            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to generate \'{example.id}.html\'. '
+            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
+        )
+        raise e
+    def _copy():
+      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
+      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
+      if src_file == dest_file:
+        return
+      if not pg.io.path_exists(src_file):
+        experiment.warning(
+            f'Skip copying \'{example.id}.html\' as '
+            f'{src_file!r} does not exist.'
+        )
+        return
+      try:
+        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
+          content = src.read()
+          with pg.io.open(dest_file, 'w') as dest:
+            dest.write(content)
+        experiment.info(
+            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
+        )
+        raise e
+    generate_example_html = current_run.generate_example_html
+    if (generate_example_html == 'all'
+        or (generate_example_html == 'new' and example.newly_processed)
+        or (isinstance(generate_example_html, list)
+            and example.id in generate_example_html)):
+      op = _generate
+    else:
+      op = _copy
+    runner.background_run(op)
 class HtmlReporter(experiment_lib.Plugin):
-  """Plugin for periodically generating HTML reports for the experiment."""
+  """Plugin for periodically generating HTML reports for the experiment.
+  The `HtmlReporter` plugin generates several HTML files during an experiment
+  run:
+    - A `summary.html` at the root of the run directory, summarizing all
+      evaluations in the experiment.
+    - An `index.html` for each leaf evaluation, detailing the evaluation
+      definition, metrics, and logs.
+  These reports are updated periodically in the background during the run,
+  allowing users to monitor progress in near real-time.
+  """
   summary_interval: Annotated[
       int,
@@ -127,7 +216,6 @@ class HtmlReporter(experiment_lib.Plugin):
   def on_example_complete(
       self, runner: Runner, experiment: Experiment, example: Example
   ):
-    self._save_example_html(runner, experiment, example)
     self._maybe_update_experiment_html(runner, experiment)
     self._maybe_update_summary(runner)
@@ -197,72 +285,3 @@ class HtmlReporter(experiment_lib.Plugin):
         runner.background_run(_save)
       else:
         _save()
-  def _save_example_html(
-      self, runner: Runner, experiment: Experiment, example: Example
-  ) -> None:
-    """Saves the example in HTML format."""
-    current_run = runner.current_run
-    def _generate():
-      try:
-        with pg.timeit() as t:
-          html = example.to_html(
-              collapse_level=None,
-              enable_summary_tooltip=False,
-              extra_flags=dict(
-                  # For properly rendering the next link.
-                  num_examples=getattr(experiment, 'num_examples', None)
-              ),
-          )
-          html.save(
-              runner.current_run.output_path_for(
-                  experiment, f'{example.id}.html'
-              )
-          )
-        experiment.info(
-            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
-        )
-      except BaseException as e:  # pylint: disable=broad-except
-        experiment.error(
-            f'Failed to generate \'{example.id}.html\'. '
-            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
-        )
-        raise e
-    def _copy():
-      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
-      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
-      if src_file == dest_file:
-        return
-      if not pg.io.path_exists(src_file):
-        experiment.warning(
-            f'Skip copying \'{example.id}.html\' as '
-            f'{src_file!r} does not exist.'
-        )
-        return
-      try:
-        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
-          content = src.read()
-          with pg.io.open(dest_file, 'w') as dest:
-            dest.write(content)
-        experiment.info(
-            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
-        )
-      except BaseException as e:  # pylint: disable=broad-except
-        experiment.error(
-            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
-        )
-        raise e
-    generate_example_html = current_run.generate_example_html
-    if (generate_example_html == 'all'
-        or (generate_example_html == 'new' and example.newly_processed)
-        or (isinstance(generate_example_html, list)
-            and example.id in generate_example_html)):
-      op = _generate
-    else:
-      op = _copy
-    runner.background_run(op)

langfun/core/eval/v2/reporting_test.py CHANGED Viewed

@@ -29,7 +29,12 @@ class ReportingTest(unittest.TestCase):
     experiment = eval_test_helper.test_experiment()
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
-    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    example_html_generator = reporting.ExampleHtmlGenerator()
+    run = experiment.run(
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator]
+    )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
     )
@@ -52,8 +57,10 @@ class ReportingTest(unittest.TestCase):
     root_dir = os.path.join(tempfile.mkdtemp(), 'test_reporting2')
     experiment = eval_test_helper.test_experiment()
     run = experiment.run(
-        root_dir, 'new', plugins=[checkpointer, reporter],
-        warm_start_from=run.output_root
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator],
+        warm_start_from=run.output_root,
     )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
@@ -105,7 +112,12 @@ class ReportingTest(unittest.TestCase):
                   .test_experiment_with_example_html_generation_error())
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
-    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    example_html_generator = reporting.ExampleHtmlGenerator()
+    run = experiment.run(
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator]
+    )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
     )
@@ -132,8 +144,10 @@ class ReportingTest(unittest.TestCase):
     experiment = (eval_test_helper
                   .test_experiment_with_example_html_generation_error())
     run = experiment.run(
-        root_dir, 'new', plugins=[checkpointer, reporter],
-        warm_start_from=run.output_root
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator],
+        warm_start_from=run.output_root,
     )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))

langfun/core/eval/v2/runners/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Langfun evaluation runners."""
+from langfun.core.eval.v2.runners.base import RunnerBase
+from langfun.core.eval.v2.runners.debug import DebugRunner
+from langfun.core.eval.v2.runners.parallel import ParallelRunner
+from langfun.core.eval.v2.runners.sequential import SequentialRunner
+__all__ = [
+    'RunnerBase',
+    'DebugRunner',
+    'ParallelRunner',
+    'SequentialRunner',
+]

langfun/core/eval/v2/{runners.py → runners/base.py} RENAMED Viewed

@@ -11,13 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Evaluation experiment runners."""
+"""Base experiment runner."""
 import abc
-import collections
 import concurrent.futures
 import random
 import threading
-import time
 import traceback
 from typing import Any, Annotated, Callable, Iterator
@@ -42,7 +41,14 @@ _RUN_MANIFEST = 'run.json'
 class RunnerBase(Runner):
-  """A simple runner that runs evaluations and their examples sequentially."""
+  """Base class for runners with plugin support and IO pooling.
+  `RunnerBase` provides the basic runner functionalities such as plugin
+  integration for checkpointing, reporting and progress tracking.
+  It also manages a thread pool for background IO operations.
+  Subclasses should implement `_run` and `_evaluate_items` for different
+  execution strategies.
+  """
   tqdm: Annotated[
       bool,
@@ -58,6 +64,11 @@ class RunnerBase(Runner):
       reporting.HtmlReporter(),
   ]
+  max_background_threads: Annotated[
+      int,
+      'Max number of background threads for IO operations.'
+  ] = 128
   def _on_bound(self):
     super()._on_bound()
@@ -66,7 +77,9 @@ class RunnerBase(Runner):
       self.plugins.append(progress_tracking.progress_tracker(self.tqdm))
     self._io_pool_lock = threading.Lock()
-    self._io_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16)
+    self._io_pool = concurrent.futures.ThreadPoolExecutor(
+        max_workers=self.max_background_threads
+    )
     # TODO(daiyip): render background errors.
     self._background_last_error = None
@@ -220,7 +233,7 @@ class RunnerBase(Runner):
       else:
         # A evaluation could be considered as done if it has processed all the
         # examples specified by `example_ids`.
-        assert progress.is_completed
+        assert progress.is_completed, progress
         parent_progress.increment_processed()
       if parent_progress.is_completed:
@@ -335,6 +348,7 @@ class RunnerBase(Runner):
   def run_evaluation(self, evaluation: Evaluation) -> None:
     """Runs the evaluation."""
     try:
+      evaluation.setup()
       self.on_experiment_start(evaluation)
       per_evaluation_settings = {}
@@ -367,6 +381,8 @@ class RunnerBase(Runner):
     except BaseException as e:  # pylint: disable=broad-except
       self.on_experiment_abort(evaluation, e)
       raise e
+    finally:
+      evaluation.teardown()
   @abc.abstractmethod
   def _evaluate_items(
@@ -394,121 +410,3 @@ class RunnerBase(Runner):
     return in_memory.InMemory(
         self.current_run.output_path_for(experiment, 'cache.json')
     )
-class SequentialRunner(RunnerBase):
-  """Sequential runner.
-  Sequential runner runs all evaluations and their examples in sequence,
-  as well as the background tasks, it allows the developer to catch all
-  exceptions thrown from the background tasks, making it easier to debug.
-  """
-  NAME = 'sequential'
-  def background_run(
-      self, func: Callable[..., Any], *args: Any, **kwargs: Any
-  ) -> None:
-    """Runs the function with the IO pool."""
-    func(*args, **kwargs)
-  def _run(self, evaluations: list[Evaluation]) -> None:
-    """Runs the experiment in sequence."""
-    for e in evaluations:
-      self.run_evaluation(e)
-  def _evaluate_items(
-      self, evaluation: Evaluation, items: Iterator[Example]
-  ) -> None:
-    """Runs the evaluation items in sequence."""
-    for item in items:
-      self.evaluate_item(evaluation, item)
-class DebugRunner(SequentialRunner):
-  """Debug runner."""
-  NAME = 'debug'
-  # Do not use the checkpointer for debug runner.
-  plugins = []
-  def _on_bound(self):
-    super()._on_bound()
-    if self.current_run.example_ids is None:
-      self.current_run.rebind(example_ids=[1], skip_notification=True)
-    self.current_run.rebind(raise_if_has_error=True, skip_notification=True)
-  def _save_run_manifest(self) -> None:
-    """Do nothing to avoid overriden existing runs."""
-class ParallelRunner(RunnerBase):
-  """Parallel runner."""
-  NAME = 'parallel'
-  timeout: Annotated[
-      int | None,
-      'Timeout for each evaluation example.'
-  ] = None
-  concurrent_startup_delay: Annotated[
-      tuple[int, int] | None,
-      (
-          'A range of seconds to delay the initial evaluation of each thread '
-          'in the thread pool, helping to prevent a burst in LLM QPS at '
-          'startup. If set to None, no delay will be applied.'
-      )
-  ] = None
-  def _run(self, evaluations: list[Evaluation]) -> None:
-    """Runs the evaluations in parallel."""
-    def _run_group(evaluation_group: list[Evaluation]):
-      for e in evaluation_group:
-        self.run_evaluation(e)
-    # Run evaluations in parallel groupped by resource key.
-    groups: dict[str, list[Evaluation]] = collections.defaultdict(list)
-    for e in evaluations:
-      resource_ids = e.resource_ids()
-      if not resource_ids:
-        group_id = e.id
-      else:
-        # TODO(daiyip): support group that requires multiple resources.
-        group_id = resource_ids.pop()
-      groups[group_id].append(e)
-    for _, _, _ in lf.concurrent_map(
-        _run_group,
-        groups.values(),
-        max_workers=max(64, len(groups)),
-        timeout=self.timeout,
-        silence_on_errors=None,
-    ):
-      pass
-  def _evaluate_items(
-      self, evaluation: Evaluation, items: Iterator[Example]
-  ) -> None:
-    """Override run items to run in parallel."""
-    if self.concurrent_startup_delay is not None:
-      thread_delayed = {}
-      def _evaluate_item(item: Example):
-        thread_id = threading.current_thread().ident
-        if thread_id not in thread_delayed:
-          thread_delayed[thread_id] = True
-          time.sleep(random.randint(*self.concurrent_startup_delay))
-        return self.evaluate_item(evaluation, item)
-    else:
-      def _evaluate_item(item: Example):
-        return self.evaluate_item(evaluation, item)
-    for _, _, _ in lf.concurrent_map(
-        _evaluate_item,
-        items,
-        max_workers=evaluation.max_workers,
-        timeout=self.timeout,
-        silence_on_errors=None,
-    ):
-      pass

langfun/core/eval/v2/runners/debug.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Debug runner."""
+from langfun.core.eval.v2.runners import sequential
+class DebugRunner(sequential.SequentialRunner):
+  """A runner for debugging evaluations.
+  The debug runner is a sequential runner that only runs the first example
+  of each evaluation, with `raise_if_has_error` enabled. This is useful for
+  quickly identifying issues in evaluation logic during development.
+  Checkpointers are disabled for this runner.
+  """
+  NAME = 'debug'
+  # Do not use the checkpointer for debug runner.
+  plugins = []
+  def _on_bound(self):
+    super()._on_bound()
+    if self.current_run.example_ids is None:
+      self.current_run.rebind(example_ids=[1], skip_notification=True)
+    self.current_run.rebind(raise_if_has_error=True, skip_notification=True)
+  def _save_run_manifest(self) -> None:
+    """Do nothing to avoid overriden existing runs."""

langfun/core/eval/v2/runners/debug_test.py ADDED Viewed

@@ -0,0 +1,79 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for debug runner."""
+import os
+import tempfile
+from typing import Any
+import unittest
+from langfun.core.eval.v2 import eval_test_helper
+from langfun.core.eval.v2.runners import debug  # pylint: disable=unused-import
+import pyglove as pg
+class RunnerTest(unittest.TestCase):
+  def assert_same_list(self, actual: list[Any], expected: list[Any]):
+    self.assertEqual(len(actual), len(expected))
+    for i, (x, y) in enumerate(zip(actual, expected)):
+      if x is not y:
+        print(i, pg.diff(x, y))
+      self.assertIs(x, y)
+class DebugRunnerTest(RunnerTest):
+  def test_debug_runner(self):
+    plugin = eval_test_helper.TestPlugin()
+    exp = eval_test_helper.test_experiment()
+    root_dir = os.path.join(tempfile.mkdtemp(), 'test_debug_runner')
+    run = exp.run(root_dir, runner='debug', plugins=[plugin])
+    self.assertIsNotNone(plugin.start_time)
+    self.assertIsNotNone(plugin.complete_time)
+    self.assertGreater(plugin.complete_time, plugin.start_time)
+    self.assertEqual(
+        len(plugin.started_experiments), len(exp.nodes)
+    )
+    self.assertEqual(
+        len(plugin.completed_experiments), len(exp.nodes)
+    )
+    self.assertEqual(
+        len(plugin.started_example_ids), 6 * 1
+    )
+    self.assertEqual(
+        len(plugin.completed_example_ids), 6 * 1
+    )
+    self.assert_same_list(plugin.skipped_experiments, [])
+    self.assertFalse(
+        pg.io.path_exists(os.path.join(run.output_root, 'run.json'))
+    )
+    for node in exp.nodes:
+      self.assertTrue(node.progress.is_started)
+      self.assertTrue(node.progress.is_completed)
+      if node.is_leaf:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_completed, 1)
+        self.assertEqual(node.progress.num_failed, 0)
+      else:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_failed, 0)
+        self.assertEqual(node.progress.num_processed, node.progress.num_total)
+if __name__ == '__main__':
+  unittest.main()

langfun/core/eval/v2/runners/parallel.py ADDED Viewed

@@ -0,0 +1,100 @@
+# Copyright 2025 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Parallel runner."""
+import collections
+import random
+import threading
+import time
+from typing import Annotated, Iterator
+import langfun.core as lf
+from langfun.core.eval.v2.runners import base
+class ParallelRunner(base.RunnerBase):
+  """A runner that executes evaluations and examples in parallel.
+  The parallel runner groups evaluations by their required resources
+  (e.g., specific LLMs) and runs evaluations that do not share resources in
+  parallel. Within each evaluation, examples are also processed in parallel
+  using threads, up to `Evaluation.max_workers`.
+  """
+  NAME = 'parallel'
+  timeout: Annotated[
+      int | None,
+      'Timeout for each evaluation example.'
+  ] = None
+  concurrent_startup_delay: Annotated[
+      tuple[int, int] | None,
+      (
+          'A range of seconds to delay the initial evaluation of each thread '
+          'in the thread pool, helping to prevent a burst in LLM QPS at '
+          'startup. If set to None, no delay will be applied.'
+      )
+  ] = None
+  def _run(self, evaluations: list[base.Evaluation]) -> None:
+    """Runs the evaluations in parallel."""
+    def _run_group(evaluation_group: list[base.Evaluation]):
+      for e in evaluation_group:
+        self.run_evaluation(e)
+    # Run evaluations in parallel groupped by resource key.
+    groups: dict[str, list[base.Evaluation]] = collections.defaultdict(list)
+    for e in evaluations:
+      resource_ids = e.resource_ids()
+      if not resource_ids:
+        group_id = e.id
+      else:
+        # TODO(daiyip): support group that requires multiple resources.
+        group_id = resource_ids.pop()
+      groups[group_id].append(e)
+    for _, _, _ in lf.concurrent_map(
+        _run_group,
+        groups.values(),
+        max_workers=max(64, len(groups)),
+        timeout=self.timeout,
+        silence_on_errors=None,
+    ):
+      pass
+  def _evaluate_items(
+      self, evaluation: base.Evaluation, items: Iterator[base.Example]
+  ) -> None:
+    """Override run items to run in parallel."""
+    if self.concurrent_startup_delay is not None:
+      thread_delayed = {}
+      def _evaluate_item(item: base.Example):
+        thread_id = threading.current_thread().ident
+        if thread_id not in thread_delayed:
+          thread_delayed[thread_id] = True
+          time.sleep(random.randint(*self.concurrent_startup_delay))
+        return self.evaluate_item(evaluation, item)
+    else:
+      def _evaluate_item(item: base.Example):
+        return self.evaluate_item(evaluation, item)
+    for _, _, _ in lf.concurrent_map(
+        _evaluate_item,
+        items,
+        max_workers=evaluation.max_workers,
+        timeout=self.timeout,
+        silence_on_errors=None,
+    ):
+      pass

langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511160804__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511160804py3-none-any.whl