PyPI - langfun - Versions diffs - 0.1.2.dev202501010804__py3-none-any.whl → 0.1.2.dev202501030804__py3-none-any.whl - Mend

langfun 0.1.2.dev202501010804py3-none-any.whl → 0.1.2.dev202501030804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

langfun/core/__init__.py +0 -4
langfun/core/eval/matching.py +2 -2
langfun/core/eval/scoring.py +6 -2
langfun/core/eval/v2/checkpointing.py +106 -72
langfun/core/eval/v2/checkpointing_test.py +108 -3
langfun/core/eval/v2/eval_test_helper.py +56 -0
langfun/core/eval/v2/evaluation.py +25 -4
langfun/core/eval/v2/evaluation_test.py +11 -0
langfun/core/eval/v2/example.py +11 -1
langfun/core/eval/v2/example_test.py +16 -2
langfun/core/eval/v2/experiment.py +78 -19
langfun/core/eval/v2/experiment_test.py +104 -3
langfun/core/eval/v2/reporting.py +50 -18
langfun/core/eval/v2/reporting_test.py +119 -2
langfun/core/eval/v2/runners.py +7 -4
langfun/core/llms/__init__.py +2 -0
langfun/core/llms/anthropic.py +12 -0
langfun/core/llms/openai.py +23 -37
{langfun-0.1.2.dev202501010804.dist-info → langfun-0.1.2.dev202501030804.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202501010804.dist-info → langfun-0.1.2.dev202501030804.dist-info}/RECORD +23 -25
langfun/core/repr_utils.py +0 -204
langfun/core/repr_utils_test.py +0 -90
{langfun-0.1.2.dev202501010804.dist-info → langfun-0.1.2.dev202501030804.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202501010804.dist-info → langfun-0.1.2.dev202501030804.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202501010804.dist-info → langfun-0.1.2.dev202501030804.dist-info}/top_level.txt +0 -0

langfun/core/__init__.py CHANGED Viewed

@@ -123,10 +123,6 @@ from langfun.core.memory import Memory
 # Utility for console output.
 from langfun.core import console
-# Helpers for implementing _repr_xxx_ methods.
-from langfun.core import repr_utils
-Html = repr_utils.Html
 # Utility for event logging.
 from langfun.core import logging

langfun/core/eval/matching.py CHANGED Viewed

@@ -251,14 +251,14 @@ class Matching(base.Evaluation):
     for i, (_, example, output, message) in enumerate(self.matches):
       bgcolor = 'white' if i % 2 == 0 else '#DDDDDD'
       s.write(f'<tr style="background-color: {bgcolor}"><td>{i + 1}</td>')
-      input_str = lf.repr_utils.escape_quoted(
+      input_str = pg.Html.escape(
           pg.format(
               example, verbose=False, max_bytes_len=32,
               custom_format=_maybe_html
           )
       )
       s.write(f'<td style="color:green;white-space:pre-wrap">{input_str}</td>')
-      output_str = lf.repr_utils.escape_quoted(
+      output_str = pg.Html.escape(
           pg.format(
               output, verbose=False, max_bytes_len=32,
               custom_format=_maybe_html

langfun/core/eval/scoring.py CHANGED Viewed

@@ -194,9 +194,13 @@ class Scoring(base.Evaluation):
     for i, (example, output, score, message) in enumerate(self.scored):
       bgcolor = 'white' if i % 2 == 0 else '#DDDDDD'
       s.write(f'<tr style="background-color: {bgcolor}"><td>{i + 1}</td>')
-      input_str = pg.format(example, verbose=False, max_bytes_len=32)
+      input_str = pg.Html.escape(
+          pg.format(example, verbose=False, max_bytes_len=32)
+      )
       s.write(f'<td style="color:green;white-space:pre-wrap">{input_str}</td>')
-      output_str = pg.format(output, verbose=False, max_bytes_len=32)
+      output_str = pg.Html.escape(
+          pg.format(output, verbose=False, max_bytes_len=32)
+      )
       s.write(f'<td style="color:blue;white-space:pre-wrap">{output_str}</td>')
       s.write(f'<td style="color:magenta;white-space:pre-wrap">{score}</td>')
       s.write('<td>')

langfun/core/eval/v2/checkpointing.py CHANGED Viewed

@@ -13,8 +13,10 @@
 # limitations under the License.
 """Checkpointing evaluation runs."""
 import abc
+import re
 import threading
 import traceback
+from typing import Annotated
 import langfun.core as lf
 from langfun.core.eval.v2 import example as example_lib
@@ -29,6 +31,11 @@ Runner = experiment_lib.Runner
 class Checkpointer(experiment_lib.Plugin):
   """Base class for checkpointing evaluation examples."""
+  checkpoint_filename: Annotated[
+      str,
+      'Checkpoint file pattern.'
+  ] = 'checkpoint.bagz'
   def on_experiment_start(
       self,
       runner: Runner,
@@ -37,37 +44,35 @@ class Checkpointer(experiment_lib.Plugin):
     if not experiment.is_leaf:
       return
-    # For refresh runs, we don't want to load the previous state.
-    if not runner.current_run.refresh:
-      if runner.current_run.input_root != runner.current_run.output_root:
+    current_run = runner.current_run
+    if current_run.reprocess is not True:  # pylint: disable=g-bool-id-comparison
+      if current_run.input_root != current_run.output_root:
         experiment.info(
-            f'Warm starting from directory: {runner.current_run.input_root}.'
+            f'Warm starting from directory: {current_run.input_root}.'
         )
       self._load_experiment(runner, experiment)
+    example_ids_to_evaluate = current_run.examples_to_evaluate(experiment)
     if experiment.state.evaluated_examples:
       loaded_example_ids = list(
           sorted(experiment.state.evaluated_examples.keys())
       )
-      example_ids_to_evaluate = (
-          set(runner.current_run.example_ids) if runner.current_run.example_ids
-          else set(range(1, experiment.num_examples + 1))
-      )
       example_ids_to_evaluate -= set(loaded_example_ids)
+      example_ids_to_evaluate = list(sorted(example_ids_to_evaluate))
       experiment.info(
-          f'{len(experiment.state.evaluated_examples)} examples have been '
+          f'{len(experiment.state.evaluated_examples)} examples '
           'loaded from checkpoint files. Their outputs will be used '
-          f'for recomputing metrics. Example IDs: {loaded_example_ids}'
+          f'for recomputing metrics. Example IDs: {loaded_example_ids}.'
       )
       experiment.info(
           f'{len(example_ids_to_evaluate)} examples will be processed from '
-          f'scratch. Example IDs: {list(sorted(example_ids_to_evaluate))}'
+          f'scratch. Example IDs: {example_ids_to_evaluate}.'
       )
     else:
       experiment.info(
           'No examples are loaded from checkpoint files. '
-          f'Experiment {experiment.id} starts from scratch.'
+          f'{len(example_ids_to_evaluate)} examples will be processed from '
+          f'scratch. Example IDs: {example_ids_to_evaluate}.'
       )
   def on_example_complete(
@@ -81,60 +86,36 @@ class Checkpointer(experiment_lib.Plugin):
       experiment.warning(
           f'Example {example.id} has error. Skipping checkpointing.'
       )
-    else:
+    elif example.newly_processed:
       self._save_example(runner, experiment, example)
-  @abc.abstractmethod
-  def _load_experiment(self, runner: Runner, experiment: Experiment) -> None:
-    """Loads the experiment state from checkpoint files."""
-  @abc.abstractmethod
-  def _save_example(
-      self,
-      runner: Runner,
-      experiment: Experiment,
-      example: Example,
-  ) -> None:
-    """Saves an evaluated example."""
-class PerExampleCheckpointer(Checkpointer):
-  """Checkpointer that saves each example to a separate file."""
-  checkpoint_filename: str = 'checkpoint.bagz'
-  def _on_bound(self):
-    super()._on_bound()
-    prefix, ext = self._file_prefix_and_ext(self.checkpoint_filename)
-    self._checkpoint_file_prefix = prefix
-    self._checkpoint_file_ext = ext
   def _load_experiment(
       self,
       runner: Runner,
       experiment: Experiment,
   ) -> None:
     """Creates the checkpoint file."""
-    experiment_dir = runner.current_run.input_dir(experiment)
-    if pg.io.path_exists(experiment_dir):
-      ckpt_files = [
-          runner.current_run.input_path_for(experiment, filename)
-          for filename in pg.io.listdir(experiment_dir)
-          if filename.startswith(self._checkpoint_file_prefix)
-          and filename.endswith(self._checkpoint_file_ext)
-      ]
-    else:
-      ckpt_files = []
+    ckpt_files = self._list_checkpoint_filenames(runner, experiment)
     experiment.info(f'Found {len(ckpt_files)} checkpoint files to load.')
     # Load the checkpoint files in parallel.
+    current_run = runner.current_run
+    examples_to_load = current_run.examples_to_load(experiment)
+    examples_to_load_metadata = current_run.examples_to_load_metadata(
+        experiment
+    )
     context = dict(counter=0, counter_lock=threading.Lock())
+    copy_ckpt = current_run.input_root != current_run.output_root
     def _load_state(ckpt_file):
       error = None
       with pg.timeit() as t:
         try:
-          experiment.load_state(ckpt_file)
+          experiment.load_state(
+              current_run.input_path_for(experiment, ckpt_file),
+              filter=lambda x: x.id in examples_to_load,
+              load_example_metadata=lambda x: x.id in examples_to_load_metadata,
+          )
         except BaseException as e:  # pylint: disable=broad-except
           error = e
         finally:
@@ -144,21 +125,80 @@ class PerExampleCheckpointer(Checkpointer):
           progress_str = f'{context["counter"]}/{len(ckpt_files)}'
           if error is None:
             experiment.info(
-                f'Loaded checkpoint file {ckpt_file} in {t.elapse:.2f} '
+                f'Checkpoint file {ckpt_file!r} loaded in {t.elapse:.2f} '
                 f'seconds. ({progress_str})'
             )
           else:
             experiment.warning(
-                f'Failed to load checkpoint file {ckpt_file}: {error}. '
+                f'Failed to load checkpoint file {ckpt_file!r}: {error}. '
                 f'Skipping the file. ({progress_str})'
             )
+        if not copy_ckpt:
+          return
+        # Copy the checkpoint records to the output directory.
+        try:
+          with pg.io.open_sequence(
+              current_run.output_path_for(experiment, ckpt_file), 'w'
+          ) as o, pg.io.open_sequence(
+              current_run.input_path_for(experiment, ckpt_file), 'r'
+          ) as i:
+            for x in i:
+              o.add(x)
+        except BaseException as e:  # pylint: disable=broad-except
+          experiment.warning(
+              f'Failed to copy checkpoint {ckpt_file!r}: {e}.'
+          )
     _ = list(
         lf.concurrent_map(
             _load_state, ckpt_files, max_workers=16, silence_on_errors=None
         )
     )
+  @abc.abstractmethod
+  def _list_checkpoint_filenames(
+      self, runner: Runner, experiment: Experiment
+  ) -> list[str]:
+    """Lists the checkpoint filenames to restore."""
+  @abc.abstractmethod
+  def _save_example(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+      example: Example,
+  ) -> None:
+    """Saves an evaluated example."""
+class PerExampleCheckpointer(Checkpointer):
+  """Checkpointer that saves each example to a separate file."""
+  def _on_bound(self):
+    super()._on_bound()
+    prefix, ext = self._file_prefix_and_ext(self.checkpoint_filename)
+    self._checkpoint_file_prefix = prefix
+    self._checkpoint_file_ext = ext
+  def _list_checkpoint_filenames(
+      self, runner: Runner, experiment: Experiment
+  ) -> list[str]:
+    experiment_dir = runner.current_run.input_dir(experiment)
+    filenames = []
+    examples_to_load = runner.current_run.examples_to_load(experiment)
+    if pg.io.path_exists(experiment_dir):
+      regex = re.compile(
+          f'{self._checkpoint_file_prefix}_(\\d+){self._checkpoint_file_ext}'
+          .replace('.', '\\.')
+      )
+      for filename in pg.io.listdir(experiment_dir):
+        match = regex.match(filename)
+        if match and int(match.group(1)) in examples_to_load:
+          filenames.append(filename)
+    return filenames
   def _save_example(
       self,
       runner: Runner,
@@ -180,11 +220,11 @@ class PerExampleCheckpointer(Checkpointer):
         writer.add(example)
         writer.close()
         experiment.info(
-            f'Example {example.id} saved to {writer.path}.',
+            f'Example {example.id} checkpointed to {writer.path}.',
         )
       except BaseException as e:  # pylint: disable=broad-except
         experiment.error(
-            f'Failed to save example {example.id} to {writer.path}. '
+            f'Failed to checkpoint example {example.id} to {writer.path}. '
             f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
         )
         raise e
@@ -201,8 +241,6 @@ class PerExampleCheckpointer(Checkpointer):
 class BulkCheckpointer(Checkpointer):
   """Checkpointer that saves all examples to a single file."""
-  checkpoint_filename: str = 'checkpoint.bagz'
   def _on_bound(self):
     super()._on_bound()
     self._lock = threading.Lock()
@@ -253,18 +291,14 @@ class BulkCheckpointer(Checkpointer):
         if self._sequence_writer is not None:
           self._sequence_writer[experiment.id] = sequence_writer
-  def _load_experiment(
-      self,
-      runner: Runner,
-      experiment: Experiment,
-  ) -> None:
-    """Creates the checkpoint file."""
-    experiment.load_state(
-        runner.current_run.input_path_for(
-            experiment, self.checkpoint_filename
-        ),
-        raise_if_not_exist=False
-    )
+  def _list_checkpoint_filenames(
+      self, runner: Runner, experiment: Experiment
+  ) -> list[str]:
+    if pg.io.path_exists(
+        runner.current_run.input_path_for(experiment, self.checkpoint_filename)
+    ):
+      return [self.checkpoint_filename]
+    return []
   def on_experiment_complete(
       self,
@@ -299,11 +333,11 @@ class BulkCheckpointer(Checkpointer):
       try:
         writer.add(example)
         experiment.info(
-            f'Example {example.id} added to {writer.path}.',
+            f'Example {example.id} checkpointed to {writer.path}.',
         )
       except BaseException as e:  # pylint: disable=broad-except
         experiment.error(
-            f'Failed to save example {example.id} to {writer.path}. '
+            f'Failed to checkpoint example {example.id} to {writer.path}. '
             f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
         )
         raise e
@@ -316,7 +350,7 @@ class SequenceWriter:
   def __init__(self, path: str):
     self._lock = threading.Lock()
     self._path = path
-    self._sequence_writer = pg.io.open_sequence(path, 'w')
+    self._sequence_writer = pg.io.open_sequence(path, 'a')
   @property
   def path(self) -> str:

langfun/core/eval/v2/checkpointing_test.py CHANGED Viewed

@@ -52,10 +52,20 @@ class SequenceWriterTest(unittest.TestCase):
       self.assertEqual(len(list(iter(f))), 1)
-class PerExampleCheckpointerTest(unittest.TestCase):
+class CheckpointerTest(unittest.TestCase):
+  def assert_found_in_log(self, experiment, message):
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith(message):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
+class PerExampleCheckpointerTest(CheckpointerTest):
   def test_checkpointing(self):
-    pg.defaults.loggers.use_stdout()
     root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer')
     experiment = eval_test_helper.test_experiment()
     checkpoint_filename = 'checkpoint.jsonl'
@@ -85,8 +95,90 @@ class PerExampleCheckpointerTest(unittest.TestCase):
     for leaf in experiment.leaf_nodes:
       self.assertEqual(leaf.progress.num_skipped, num_processed[leaf.id])
+    # Test warm start without reprocess.
+    root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer2')
+    experiment = eval_test_helper.test_experiment()
+    _ = experiment.run(
+        root_dir, 'new', runner='sequential', plugins=[checkpointer],
+        warm_start_from=run.output_root
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertEqual(leaf.progress.num_skipped, num_processed[leaf.id])
+    # Test warm start with reprocess.
+    root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer3')
+    experiment = eval_test_helper.test_experiment()
+    _ = experiment.run(
+        root_dir, 'new', runner='sequential', plugins=[checkpointer],
+        warm_start_from=run.output_root,
+        reprocess=True
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertEqual(leaf.progress.num_skipped, 0)
-class BulkCheckpointerTest(unittest.TestCase):
+    root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer4')
+    experiment = eval_test_helper.test_experiment()
+    _ = experiment.run(
+        root_dir, 'new', runner='sequential', plugins=[checkpointer],
+        warm_start_from=run.output_root,
+        reprocess=[1, 2, 3]
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertEqual(leaf.progress.num_skipped, num_processed[leaf.id] - 3)
+  def test_loading_corrupted_checkpoint(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'per_example_checkpointer_with_corrupted_checkpoint'
+    )
+    experiment = eval_test_helper.TestEvaluation()
+    checkpoint_filename = 'checkpoint.jsonl'
+    checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
+    run = experiment.run(
+        root_dir, 'new', runner='sequential', plugins=[checkpointer]
+    )
+    num_processed = {}
+    for i in range(experiment.num_examples):
+      example = experiment.state.get(i + 1)
+      ckpt = run.output_path_for(experiment, f'checkpoint_{example.id}.jsonl')
+      if not example.has_error:
+        self.assertTrue(pg.io.path_exists(ckpt))
+        with pg.io.open_sequence(ckpt) as f:
+          self.assertEqual(len(list(iter(f))), 1)
+        # Simulate corrupting the first checkpoint.
+        if i == 0:
+          pg.io.writefile(ckpt, 'bad file')
+        num_processed[example.id] = i + 1
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'per_example_checkpointer_with_corrupted_checkpoint_warm_start'
+    )
+    experiment = eval_test_helper.TestEvaluation()
+    _ = experiment.run(
+        root_dir, 'new', runner='sequential', plugins=[checkpointer],
+        warm_start_from=run.output_root,
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertEqual(leaf.progress.num_skipped, len(num_processed) - 1)
+    self.assert_found_in_log(experiment, 'Failed to load checkpoint')
+  def test_checkpointing_error(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'per_example_checkpointer_with_checkpointing_error'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_example_checkpointing_error())
+    checkpointer = checkpointing.PerExampleCheckpointer('checkpoint.jsonl')
+    _ = experiment.run(
+        root_dir, 'new', runner='parallel', plugins=[checkpointer]
+    )
+    self.assert_found_in_log(experiment, 'Failed to checkpoint')
+class BulkCheckpointerTest(CheckpointerTest):
   def test_checkpointing(self):
     root_dir = os.path.join(tempfile.gettempdir(), 'test_bulk_checkpointer')
@@ -118,6 +210,19 @@ class BulkCheckpointerTest(unittest.TestCase):
     for leaf in experiment.leaf_nodes:
       self.assertEqual(leaf.progress.num_skipped, num_processed[leaf.id])
+  def test_checkpointing_error(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'bulk_checkpointer_with_checkpointing_error'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_example_checkpointing_error())
+    checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
+    _ = experiment.run(
+        root_dir, 'new', runner='parallel', plugins=[checkpointer]
+    )
+    self.assert_found_in_log(experiment, 'Failed to checkpoint')
 if __name__ == '__main__':
   unittest.main()

langfun/core/eval/v2/eval_test_helper.py CHANGED Viewed

@@ -72,9 +72,65 @@ class TestEvaluation(Evaluation):
     )
+class BadJsonConvertible(pg.Object):
+  def to_json(self, *args, **kwargs):
+    raise ValueError('Cannot convert to JSON.')
+class TestEvaluationWithExampleCheckpointingError(TestEvaluation):
+  """Test evaluation class with bad example checkpointing."""
+  inputs = test_inputs()
+  metrics = [metrics_lib.Match()]
+  def process(self, v):
+    return 1, dict(
+        x=BadJsonConvertible()
+    )
+class BadHtmlConvertible(pg.Object, pg.views.HtmlTreeView.Extension):
+  def _html_tree_view(self, *args, **kwargs):
+    raise ValueError('Cannot render HTML.')
+class TestEvaluationWithExampleHtmlGenerationError(Evaluation):
+  """Test evaluation class with bad example HTML generation."""
+  inputs = test_inputs()
+  metrics = [metrics_lib.Match()]
+  def process(self, v):
+    return 1, dict(
+        x=BadHtmlConvertible()
+    )
+class TestEvaluationWithIndexHtmlGenerationError(TestEvaluation):
+  """Test evaluation class with bad index HTML generation."""
+  def _html_tree_view(self, *args, **kwargs):
+    raise ValueError('Cannot render HTML.')
 def test_experiment():
   """Returns a test experiment."""
   return Suite([
       TestEvaluation(lm=TestLLM(offset=0)),
       TestEvaluation(lm=TestLLM(offset=pg.oneof(range(5)))),
   ])
+def test_experiment_with_example_checkpointing_error():
+  """Returns a test experiment with example checkpointing error."""
+  return TestEvaluationWithExampleCheckpointingError()
+def test_experiment_with_example_html_generation_error():
+  """Returns a test experiment with bad example HTML."""
+  return TestEvaluationWithExampleHtmlGenerationError()
+def test_experiment_with_index_html_generation_error():
+  """Returns a test experiment with bad index HTML."""
+  return TestEvaluationWithIndexHtmlGenerationError()

langfun/core/eval/v2/evaluation.py CHANGED Viewed

@@ -264,11 +264,21 @@ class Evaluation(experiment_lib.Experiment):
     return self._state
   def load_state(
-      self, state_file: str, raise_if_not_exist: bool = False
+      self,
+      state_file: str,
+      *,
+      load_example_metadata: bool = True,
+      filter: Callable[[example_lib.Example], bool] | None = None,  # pylint: disable=redefined-builtin
+      raise_if_not_exist: bool = False
   ) -> None:
     """Loads saved state from a sequence IO file."""
     if pg.io.path_exists(state_file):
-      self._state.load(state_file, self.example_input_by_id)
+      self._state.load(
+          state_file,
+          example_input_by_id=self.example_input_by_id,
+          load_example_metadata=load_example_metadata,
+          filter=filter,
+      )
     elif raise_if_not_exist:
       raise ValueError(f'State file {state_file} does not exist.')
@@ -680,14 +690,25 @@ class EvaluationState:
     self._evaluated_examples: dict[int, example_lib.Example] = {}
   def load(
-      self, state_file: str, example_input_by_id: Callable[[int], Any]) -> None:
+      self,
+      state_file: str,
+      *,
+      example_input_by_id: Callable[[int], Any] | None = None,
+      load_example_metadata: bool | Callable[
+          [example_lib.Example], bool] = True,
+      filter: Callable[[example_lib.Example], bool] | None = None,  # pylint: disable=redefined-builtin
+  ) -> None:
     """Loads the state from the example sequence file."""
     with pg.io.sequence.open_sequence(state_file) as f:
       for record in f:
         example = pg.from_json_str(
-            record, example_input_by_id=example_input_by_id
+            record,
+            example_input_by_id=example_input_by_id,
+            load_example_metadata=load_example_metadata
         )
         assert isinstance(example, example_lib.Example), example
+        if filter is not None and not filter(example):
+          continue
         self._evaluated_examples[example.id] = example
   @property

langfun/core/eval/v2/evaluation_test.py CHANGED Viewed

@@ -138,6 +138,17 @@ class EvaluationTest(unittest.TestCase):
     self.assertEqual(example.usage_summary.uncached.total.total_tokens, 0)
     self.assertEqual(example.usage_summary.uncached.total.num_requests, 0)
+    # Test load_state with filter.
+    exp.reset()
+    self.assertEqual(len(exp._state.evaluated_examples), 0)
+    exp.load_state(state_file, filter=lambda x: x.id == 3)
+    self.assertEqual(len(exp._state.evaluated_examples), 1)
+    exp.reset()
+    self.assertEqual(len(exp._state.evaluated_examples), 0)
+    exp.load_state(state_file, filter=lambda x: x.id == 1)
+    self.assertEqual(len(exp._state.evaluated_examples), 0)
   def test_html_view(self):
     exp = eval_test_helper.TestEvaluation()
     exp.debug('debug message')

langfun/core/eval/v2/example.py CHANGED Viewed

@@ -101,6 +101,7 @@ class Example(pg.JSONConvertible, pg.views.HtmlTreeView.Extension):
       json_value: dict[str, Any],
       *,
       example_input_by_id: Callable[[int], Any] | None = None,
+      load_example_metadata: bool | Callable[['Example'], bool] = False,
       **kwargs
   ) -> 'Example':
     """Creates an example from the JSON representation."""
@@ -128,12 +129,21 @@ class Example(pg.JSONConvertible, pg.views.HtmlTreeView.Extension):
       pg.traverse(example, _visit)
       return list(referred_types)
+    # We delay loading the metadata until the other parts of the example are
+    # loaded. So we could apply the filter to decide whether to load the
+    # metadata.
+    metadata_dict = json_value.pop('metadata', None)
     with pg.JSONConvertible.load_types_for_deserialization(
         *example_class_defs(example_input)
     ):
-      return cls(
+      example = cls(
           **{k: pg.from_json(v, **kwargs) for k, v in json_value.items()}
       )
+      if callable(load_example_metadata):
+        load_example_metadata = load_example_metadata(example)
+      if load_example_metadata:
+        example.metadata = pg.from_json(metadata_dict, **kwargs)
+      return example
   #
   # HTML rendering.

langfun/core/eval/v2/example_test.py CHANGED Viewed

@@ -70,17 +70,31 @@ class ExampleTest(unittest.TestCase):
     self.assertEqual(
         pg.from_json_str(
             json_str,
-            example_input_by_id=lambda i: inputs[i - 1]
+            example_input_by_id=lambda i: inputs[i - 1],
+            load_example_metadata=True,
         ),
         ex
     )
+    self.assertEqual(
+        pg.from_json_str(
+            json_str,
+            example_input_by_id=lambda i: inputs[i - 1],
+            load_example_metadata=False,
+        ),
+        Example(
+            id=1,
+            input=inputs[0],
+            output=inputs[0].a(1),
+            metadata={}
+        )
+    )
     pg.JSONConvertible._TYPE_REGISTRY._type_to_cls_map.pop(
         inputs[0].a.__type_name__
     )
     pg.JSONConvertible._TYPE_REGISTRY._type_to_cls_map.pop(
         inputs[0].b.__type_name__
     )
-    v = pg.from_json_str(json_str, auto_dict=True)
+    v = pg.from_json_str(json_str, auto_dict=True, load_example_metadata=True)
     v.output.pop('type_name')
     v.metadata.b.pop('type_name')
     self.assertEqual(

langfun 0.1.2.dev202501010804__py3-none-any.whl → 0.1.2.dev202501030804__py3-none-any.whl

langfun 0.1.2.dev202501010804py3-none-any.whl → 0.1.2.dev202501030804py3-none-any.whl