PyPI - langfun - Versions diffs - 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl - Mend

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

langfun/__init__.py +1 -1
langfun/core/__init__.py +7 -1
langfun/core/agentic/__init__.py +8 -1
langfun/core/agentic/action.py +740 -112
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +189 -24
langfun/core/async_support.py +104 -5
langfun/core/async_support_test.py +23 -0
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +9 -2
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +11 -2
langfun/core/data/conversion/gemini_test.py +48 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +48 -44
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +3 -0
langfun/core/eval/v2/checkpointing.py +148 -46
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/config_saver.py +37 -0
langfun/core/eval/v2/config_saver_test.py +36 -0
langfun/core/eval/v2/eval_test_helper.py +104 -3
langfun/core/eval/v2/evaluation.py +102 -19
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +95 -20
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +31 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +13 -5
langfun/core/eval/v2/progress_tracking_test.py +9 -1
langfun/core/eval/v2/reporting.py +88 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
langfun/core/eval/v2/runners/beam.py +354 -0
langfun/core/eval/v2/runners/beam_test.py +153 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +243 -0
langfun/core/eval/v2/runners/parallel_test.py +182 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +169 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +189 -36
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +14 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +90 -12
langfun/core/llms/gemini_test.py +110 -0
langfun/core/llms/google_genai.py +52 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +120 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +16 -1
langfun/core/llms/vertexai.py +78 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/__init__.py +10 -0
langfun/core/mcp/client.py +177 -0
langfun/core/mcp/client_test.py +71 -0
langfun/core/mcp/session.py +241 -0
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/testing/simple_mcp_client.py +33 -0
langfun/core/mcp/testing/simple_mcp_server.py +33 -0
langfun/core/mcp/tool.py +254 -0
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +73 -3
langfun/core/modalities/image_test.py +116 -0
langfun/core/modalities/mime.py +78 -4
langfun/core/modalities/mime_test.py +59 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +230 -154
langfun/core/structured/querying_test.py +69 -33
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +175 -50
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +43 -0
langfun/env/base_environment.py +827 -0
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +304 -0
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +842 -0
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +14 -0
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +472 -0
langfun/env/event_handlers/event_logger_test.py +304 -0
langfun/env/event_handlers/metric_writer.py +726 -0
langfun/env/event_handlers/metric_writer_test.py +214 -0
langfun/env/interface.py +1640 -0
langfun/env/interface_test.py +153 -0
langfun/env/load_balancers.py +59 -0
langfun/env/load_balancers_test.py +141 -0
langfun/env/test_utils.py +507 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/checkpointing.py CHANGED Viewed

@@ -13,6 +13,8 @@
 # limitations under the License.
 """Checkpointing evaluation runs."""
 import abc
+import datetime
+import os
 import re
 import threading
 import traceback
@@ -29,12 +31,32 @@ Runner = experiment_lib.Runner
 class Checkpointer(experiment_lib.Plugin):
-  """Base class for checkpointing evaluation examples."""
+  """Base class for checkpointing evaluation examples.
+  `Checkpointer` is a plugin that saves the state of processed examples
+  incrementally during an experiment run, allowing the experiment to be resumed
+  later. When an experiment starts, the checkpointer loads any previously saved
+  examples from an earlier run (or a warm-start run) into `experiment.state`,
+  so the runner can skip processing them again.
+  Subclasses should implement `_list_checkpoint_files` to identify
+  checkpoint files to load, and `_save_example` to save a newly processed
+  example.
+  """
   checkpoint_filename: Annotated[
       str,
       'Checkpoint file pattern.'
-  ] = 'checkpoint.bagz'
+  ] = 'checkpoint.jsonl'
+  enable_inprogress_file: Annotated[
+      bool,
+      'If True, write file "<example_id>.inprogress" when example gets started.'
+  ] = True
+  max_ckpt_loading_threads: Annotated[
+      int,
+      'Max number of workers for loading checkpoint files at startup.'
+  ] = 128
   def on_experiment_start(
       self,
@@ -75,6 +97,24 @@ class Checkpointer(experiment_lib.Plugin):
           f'scratch. Example IDs: {example_ids_to_evaluate}.'
       )
+  def on_example_start(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+      example: Example,
+  ) -> None:
+    """Saves the example to the checkpoint file."""
+    if self.enable_inprogress_file:
+      def _save_inprogress_file(example: Example):
+        inprogress_file = runner.current_run.output_path_for(
+            experiment, f'{example.id}.inprogress'
+        )
+        pg.io.writefile(
+            inprogress_file,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        )
+      runner.background_run(_save_inprogress_file, example)
   def on_example_complete(
       self,
       runner: Runner,
@@ -91,7 +131,7 @@ class Checkpointer(experiment_lib.Plugin):
       experiment: Experiment,
   ) -> None:
     """Creates the checkpoint file."""
-    ckpt_files = self._list_checkpoint_filenames(runner, experiment)
+    ckpt_files = self._list_checkpoint_files(runner, experiment)
     experiment.info(f'Found {len(ckpt_files)} checkpoint files to load.')
     # Load the checkpoint files in parallel.
@@ -101,18 +141,18 @@ class Checkpointer(experiment_lib.Plugin):
         experiment
     )
     context = dict(counter=0, counter_lock=threading.Lock())
-    copy_ckpt = current_run.input_root != current_run.output_root
     def _load_state(ckpt_file):
       error = None
       with pg.timeit() as t:
         try:
-          experiment.load_state(
-              current_run.input_path_for(experiment, ckpt_file),
+          loaded_examples = experiment.load_state(
+              ckpt_file,
               filter=lambda x: x.id in examples_to_load,
               load_example_metadata=lambda x: x.id in examples_to_load_metadata,
           )
         except BaseException as e:  # pylint: disable=broad-except
+          loaded_examples = []
           error = e
         finally:
           with context['counter_lock']:
@@ -130,34 +170,33 @@ class Checkpointer(experiment_lib.Plugin):
                 f'Skipping the file. ({progress_str})'
             )
-        if not copy_ckpt:
-          return
-        # Copy the checkpoint records to the output directory.
-        try:
-          with pg.io.open_sequence(
-              current_run.output_path_for(experiment, ckpt_file), 'w'
-          ) as o, pg.io.open_sequence(
-              current_run.input_path_for(experiment, ckpt_file), 'r'
-          ) as i:
-            for x in i:
-              o.add(x)
-        except BaseException as e:  # pylint: disable=broad-except
-          experiment.warning(
-              f'Failed to copy checkpoint {ckpt_file!r}: {e}.'
-          )
+        output_ckpt_file = current_run.output_path_for(
+            experiment, os.path.basename(ckpt_file)
+        )
+        if ckpt_file != output_ckpt_file and any(
+            e for e in loaded_examples if not e.has_error
+        ):
+          # Write the error-free warm-start examples to the output checkpoint
+          # file.
+          with SequenceWriter(output_ckpt_file) as writer:
+            for example in loaded_examples:
+              if not example.has_error:
+                writer.add(example)
     _ = list(
         lf.concurrent_map(
-            _load_state, ckpt_files, max_workers=16, silence_on_errors=None
+            _load_state,
+            ckpt_files,
+            max_workers=self.max_ckpt_loading_threads,
+            silence_on_errors=None
         )
     )
   @abc.abstractmethod
-  def _list_checkpoint_filenames(
+  def _list_checkpoint_files(
       self, runner: Runner, experiment: Experiment
   ) -> list[str]:
-    """Lists the checkpoint filenames to restore."""
+    """Lists the checkpoint file paths to restore."""
   @abc.abstractmethod
   def _save_example(
@@ -170,7 +209,12 @@ class Checkpointer(experiment_lib.Plugin):
 class PerExampleCheckpointer(Checkpointer):
-  """Checkpointer that saves each example to a separate file."""
+  """Checkpointer that saves each example to a separate file.
+  This checkpointer saves each processed example to its own checkpoint file,
+  named using the pattern `<checkpoint_filename_prefix>_<example_id>.<ext>`.
+  For example, `checkpoint_1.bagz`, `checkpoint_2.bagz`, etc.
+  """
   def _on_bound(self):
     super()._on_bound()
@@ -178,22 +222,41 @@ class PerExampleCheckpointer(Checkpointer):
     self._checkpoint_file_prefix = prefix
     self._checkpoint_file_ext = ext
-  def _list_checkpoint_filenames(
+  def _list_checkpoint_files(
       self, runner: Runner, experiment: Experiment
   ) -> list[str]:
-    experiment_dir = runner.current_run.input_dir(experiment)
-    filenames = []
+    def _list_checkpoints_from(ckpt_dir: str, examples_to_load: set[int]):
+      ckpt_files = []
+      if pg.io.path_exists(ckpt_dir):
+        regex = re.compile(
+            f'{self._checkpoint_file_prefix}_(\\d+){self._checkpoint_file_ext}'
+            .replace('.', '\\.')
+        )
+        for filename in pg.io.listdir(ckpt_dir):
+          match = regex.match(filename)
+          if match and int(match.group(1)) in examples_to_load:
+            examples_to_load.remove(int(match.group(1)))
+            ckpt_files.append(os.path.join(ckpt_dir, filename))
+      return ckpt_files
     examples_to_load = runner.current_run.examples_to_load(experiment)
-    if pg.io.path_exists(experiment_dir):
-      regex = re.compile(
-          f'{self._checkpoint_file_prefix}_(\\d+){self._checkpoint_file_ext}'
-          .replace('.', '\\.')
+    # Take output directory as the first priority to checkpoints processed in
+    # this run.
+    ckpt_files = _list_checkpoints_from(
+        runner.current_run.output_dir(experiment), examples_to_load
+    )
+    # If the input and output directories are different, also load from the
+    # input directory.
+    if (examples_to_load
+        and runner.current_run.input_root != runner.current_run.output_root):
+      ckpt_files.extend(
+          _list_checkpoints_from(
+              runner.current_run.input_dir(experiment), examples_to_load
+          )
       )
-      for filename in pg.io.listdir(experiment_dir):
-        match = regex.match(filename)
-        if match and int(match.group(1)) in examples_to_load:
-          filenames.append(filename)
-    return filenames
+    return ckpt_files
   def _save_example(
       self,
@@ -235,7 +298,13 @@ class PerExampleCheckpointer(Checkpointer):
 class BulkCheckpointer(Checkpointer):
-  """Checkpointer that saves all examples to a single file."""
+  """Checkpointer that saves all examples of an evaluation to a single file.
+  This checkpointer appends newly processed examples of an evaluation to a
+  single sequence file (e.g., `checkpoint.bagz`). This is often more efficient
+  than `PerExampleCheckpointer` when dealing with a large number of examples
+  or when file system overhead is a concern.
+  """
   def _on_bound(self):
     super()._on_bound()
@@ -287,13 +356,24 @@ class BulkCheckpointer(Checkpointer):
         if self._sequence_writer is not None:
           self._sequence_writer[experiment.id] = sequence_writer
-  def _list_checkpoint_filenames(
+  def _list_checkpoint_files(
       self, runner: Runner, experiment: Experiment
   ) -> list[str]:
-    if pg.io.path_exists(
-        runner.current_run.input_path_for(experiment, self.checkpoint_filename)
-    ):
-      return [self.checkpoint_filename]
+    # Always honor the output directory if it's present, as it contains both
+    # the warm-started examples and newly processed examples.
+    output_ckpt_file = runner.current_run.output_path_for(
+        experiment, self.checkpoint_filename
+    )
+    if pg.io.path_exists(output_ckpt_file):
+      return [output_ckpt_file]
+    if runner.current_run.input_root != runner.current_run.output_root:
+      input_ckpt_file = runner.current_run.input_path_for(
+          experiment, self.checkpoint_filename
+      )
+      if pg.io.path_exists(input_ckpt_file):
+        return [input_ckpt_file]
+    print('CCC', experiment.hash, [])
     return []
   def on_experiment_complete(
@@ -341,12 +421,26 @@ class BulkCheckpointer(Checkpointer):
 class SequenceWriter:
-  """Thread safe sequence writer."""
+  """A thread-safe writer for sequence files (e.g., Bagz) with atomic write.
+  `SequenceWriter` wraps a `pg.io.SequenceWriter` to provide thread-safe
+  `add` and `close` operations, ensuring that examples can be written
+  concurrently from multiple threads without corrupting the sequence file.
+  It writes to a temporary file and renames it to target path on `close` to
+  achieve atomic write. If the target path exists, new examples are appended
+  to existing content.
+  """
   def __init__(self, path: str):
     self._lock = threading.Lock()
     self._path = path
-    self._sequence_writer = pg.io.open_sequence(path, 'a')
+    basename = os.path.basename(path)
+    self._tmp_path = os.path.join(
+        os.path.dirname(path), f'tmp.{basename}'
+    )
+    if pg.io.path_exists(self._path):
+      pg.io.copy(self._path, self._tmp_path)
+    self._sequence_writer = pg.io.open_sequence(self._tmp_path, 'a')
   @property
   def path(self) -> str:
@@ -371,6 +465,14 @@ class SequenceWriter:
         return
       self._sequence_writer.close()
       self._sequence_writer = None
+      pg.io.rename(self._tmp_path, self._path)
+  def __enter__(self):
+    return self
+  def __exit__(self, *args, **kwargs):
+    del args, kwargs
+    self.close()
   def __del__(self):
     self.close()

langfun/core/eval/v2/checkpointing_test.py CHANGED Viewed

@@ -65,7 +65,7 @@ class ExampleCollector(experiment_lib.Plugin):
     return self._examples
   def on_example_complete(
-      self, runner: runners_lib.Runner,
+      self, runner: experiment_lib.Runner,
       experiment: experiment_lib.Experiment,
       example: example_lib.Example,
   ):
@@ -90,7 +90,10 @@ class PerExampleCheckpointerTest(CheckpointerTest):
     root_dir = os.path.join(tempfile.mkdtemp(), 'per_example_checkpointer')
     experiment = eval_test_helper.test_experiment()
     checkpoint_filename = 'checkpoint.jsonl'
-    checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
+    checkpointer = checkpointing.PerExampleCheckpointer(
+        checkpoint_filename,
+        enable_inprogress_file=True
+    )
     collector = ExampleCollector()
     run = experiment.run(
         root_dir, 'new', runner='sequential', plugins=[checkpointer, collector]
@@ -102,6 +105,10 @@ class PerExampleCheckpointerTest(CheckpointerTest):
         example = collector.examples[i + 1]
         ckpt = run.output_path_for(leaf, f'checkpoint_{example.id}.jsonl')
         self.assertTrue(pg.io.path_exists(ckpt))
+        inprogress_file = run.output_path_for(
+            leaf, f'{example.id}.inprogress'
+        )
+        self.assertTrue(pg.io.path_exists(inprogress_file))
         with pg.io.open_sequence(ckpt) as f:
           examples_from_ckpt = list(iter(f))
           # `eval_test_helper.test_experiment` has two TestEvaluation with

langfun/core/eval/v2/config_saver.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Config saver plugins."""
+import os
+from langfun.core.eval.v2 import experiment as experiment_lib
+class RunConfigSaver(experiment_lib.Plugin):
+  """Saves the current run."""
+  def on_run_start(
+      self,
+      runner: experiment_lib.Runner,
+      root: experiment_lib.Experiment
+  ) -> None:
+    del root  # Unused.
+    self._save_run_config(runner)
+  def _save_run_config(self, runner: experiment_lib.Runner) -> None:
+    def _save():
+      runner.current_run.save(
+          os.path.join(runner.current_run.output_root, 'run.json'),
+          hide_default_values=True,
+      )
+    runner.background_run(_save)

langfun/core/eval/v2/config_saver_test.py ADDED Viewed

@@ -0,0 +1,36 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Config saver test."""
+import os
+import tempfile
+import unittest
+from langfun.core.eval.v2 import config_saver
+from langfun.core.eval.v2 import eval_test_helper
+from langfun.core.eval.v2.runners import parallel  # pylint: disable=unused-import
+class RunConfigSaverTest(unittest.TestCase):
+  def test_save_run_config(self):
+    root_dir = os.path.join(tempfile.mkdtemp(), 'test_run_config_saver')
+    experiment = eval_test_helper.test_evaluation()
+    run = experiment.run(
+        root_dir, 'new', plugins=[config_saver.RunConfigSaver()]
+    )
+    self.assertTrue(os.path.exists(os.path.join(run.output_root, 'run.json')))
+if __name__ == '__main__':
+  unittest.main()

langfun/core/eval/v2/eval_test_helper.py CHANGED Viewed

@@ -13,6 +13,9 @@
 # limitations under the License.
 """Helper classes and functions for evaluation tests."""
+import threading
+import time
 from langfun.core import language_model
 from langfun.core import llms
 from langfun.core import message as message_lib
@@ -47,6 +50,8 @@ class TestLLM(llms.Fake):
   offset: int = 0
+  __test__ = False
   def _response_from(self, prompt: message_lib.Message) -> message_lib.Message:
     return message_lib.AIMessage(
         str(prompt.metadata.x + prompt.metadata.y + self.offset)
@@ -63,6 +68,8 @@ class TestEvaluation(Evaluation):
   metrics = [metrics_lib.Match()]
   lm: language_model.LanguageModel = TestLLM()
+  __test__ = False
   def process(self, example):
     v = example.input
     if v.x == 5:
@@ -75,7 +82,7 @@ class TestEvaluation(Evaluation):
 class BadJsonConvertible(pg.Object):
-  def to_json(self, *args, **kwargs):
+  def sym_jsonify(self, *args, **kwargs):
     raise ValueError('Cannot convert to JSON.')
@@ -84,6 +91,8 @@ class TestEvaluationWithExampleCheckpointingError(TestEvaluation):
   inputs = test_inputs()
   metrics = [metrics_lib.Match()]
+  __test__ = False
   def process(self, example):
     return 1, dict(
         x=BadJsonConvertible()
@@ -101,6 +110,8 @@ class TestEvaluationWithExampleHtmlGenerationError(Evaluation):
   inputs = test_inputs()
   metrics = [metrics_lib.Match()]
+  __test__ = False
   def process(self, example):
     return 1, dict(
         x=BadHtmlConvertible()
@@ -110,15 +121,22 @@ class TestEvaluationWithExampleHtmlGenerationError(Evaluation):
 class TestEvaluationWithIndexHtmlGenerationError(TestEvaluation):
   """Test evaluation class with bad index HTML generation."""
+  __test__ = False
   def _html_tree_view(self, *args, **kwargs):
     raise ValueError('Cannot render HTML.')
+def test_evaluation(offset: int | pg.hyper.OneOf = 0):
+  """Returns a test evaluation."""
+  return TestEvaluation(lm=TestLLM(offset=offset))
 def test_experiment():
   """Returns a test experiment."""
   return Suite([
-      TestEvaluation(lm=TestLLM(offset=0)),
-      TestEvaluation(lm=TestLLM(offset=pg.oneof(range(5)))),
+      test_evaluation(),
+      test_evaluation(pg.oneof(range(5))),
   ])
@@ -135,3 +153,86 @@ def test_experiment_with_example_html_generation_error():
 def test_experiment_with_index_html_generation_error():
   """Returns a test experiment with bad index HTML."""
   return TestEvaluationWithIndexHtmlGenerationError()
+class TestPlugin(experiment_lib.Plugin):
+  """Plugin for testing."""
+  started_experiments: list[experiment_lib.Experiment] = []
+  completed_experiments: list[experiment_lib.Experiment] = []
+  skipped_experiments: list[experiment_lib.Experiment] = []
+  started_example_ids: list[int] = []
+  completed_example_ids: list[int] = []
+  start_time: float | None = None
+  complete_time: float | None = None
+  __test__ = False
+  def _on_bound(self):
+    super()._on_bound()
+    self._lock = threading.Lock()
+  def on_run_start(
+      self,
+      runner: experiment_lib.Runner,
+      root: experiment_lib.Experiment
+  ) -> None:
+    del root
+    with pg.notify_on_change(False), pg.allow_writable_accessors(True):
+      self.start_time = time.time()
+  def on_run_complete(
+      self,
+      runner: experiment_lib.Runner,
+      root: experiment_lib.Experiment
+  ) -> None:
+    del root
+    with pg.notify_on_change(False), pg.allow_writable_accessors(True):
+      self.complete_time = time.time()
+  def on_experiment_start(
+      self,
+      runner: experiment_lib.Runner,
+      experiment: experiment_lib.Experiment
+  ) -> None:
+    del runner
+    with pg.notify_on_change(False), self._lock:
+      self.started_experiments.append(pg.Ref(experiment))
+  def on_experiment_skipped(
+      self,
+      runner: experiment_lib.Runner,
+      experiment: experiment_lib.Experiment
+  ) -> None:
+    del runner
+    with pg.notify_on_change(False), self._lock:
+      self.skipped_experiments.append(pg.Ref(experiment))
+  def on_experiment_complete(
+      self,
+      runner: experiment_lib.Runner,
+      experiment: experiment_lib.Experiment
+  ) -> None:
+    del runner
+    with pg.notify_on_change(False), self._lock:
+      self.completed_experiments.append(pg.Ref(experiment))
+  def on_example_start(
+      self,
+      runner: experiment_lib.Runner,
+      experiment: experiment_lib.Experiment,
+      example: Example
+  ) -> None:
+    del runner, experiment
+    with pg.notify_on_change(False), self._lock:
+      self.started_example_ids.append(example.id)
+  def on_example_complete(
+      self,
+      runner: experiment_lib.Runner,
+      experiment: experiment_lib.Experiment,
+      example: Example
+  ) -> None:
+    del runner, experiment
+    with pg.notify_on_change(False), self._lock:
+      self.completed_example_ids.append(example.id)

langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl