PyPI - langfun - Versions diffs - 0.1.2.dev202501020804__py3-none-any.whl → 0.1.2.dev202501040804__py3-none-any.whl - Mend

langfun 0.1.2.dev202501020804py3-none-any.whl → 0.1.2.dev202501040804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

langfun/core/__init__.py +0 -4
langfun/core/eval/matching.py +2 -2
langfun/core/eval/scoring.py +6 -2
langfun/core/eval/v2/checkpointing.py +106 -72
langfun/core/eval/v2/checkpointing_test.py +108 -3
langfun/core/eval/v2/eval_test_helper.py +56 -0
langfun/core/eval/v2/evaluation.py +25 -4
langfun/core/eval/v2/evaluation_test.py +11 -0
langfun/core/eval/v2/example.py +11 -1
langfun/core/eval/v2/example_test.py +16 -2
langfun/core/eval/v2/experiment.py +83 -19
langfun/core/eval/v2/experiment_test.py +121 -3
langfun/core/eval/v2/reporting.py +60 -18
langfun/core/eval/v2/reporting_test.py +119 -2
langfun/core/eval/v2/runners.py +7 -4
langfun/core/llms/__init__.py +4 -0
langfun/core/llms/anthropic.py +12 -0
langfun/core/llms/google_genai.py +11 -0
langfun/core/llms/openai.py +23 -37
langfun/core/llms/vertexai.py +13 -1
{langfun-0.1.2.dev202501020804.dist-info → langfun-0.1.2.dev202501040804.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202501020804.dist-info → langfun-0.1.2.dev202501040804.dist-info}/RECORD +25 -27
langfun/core/repr_utils.py +0 -204
langfun/core/repr_utils_test.py +0 -90
{langfun-0.1.2.dev202501020804.dist-info → langfun-0.1.2.dev202501040804.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202501020804.dist-info → langfun-0.1.2.dev202501040804.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202501020804.dist-info → langfun-0.1.2.dev202501040804.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/experiment.py CHANGED Viewed

@@ -105,8 +105,8 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
     # metrics as needed.
     experiment.run(root_dir, '20241031_1')
-    # Refresh the previous run located in 'run_20241031_1'.
-    experiment.run(root_dir, '20241031_1', refresh=True)
+    # Reprocess the previous run located in 'run_20241031_1'.
+    experiment.run(root_dir, '20241031_1', reprocess=True)
     ```
   # Experiment Registration and Lookup
@@ -380,7 +380,8 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
       filter: Callable[['Experiment'], bool] | None = None,   # pylint: disable=redefined-builtin
       example_ids: list[int] | None = None,
       raise_if_has_error: bool = False,
-      refresh: bool = False,
+      reprocess: bool | list[int] = False,
+      generate_example_html: Literal['new', 'all', 'no'] | list[int] = 'new',
       process_timeout: int | None = None,
       use_cache: Literal['global', 'per_dataset', 'no'] = 'per_dataset',
       note: str | None = None,
@@ -391,22 +392,25 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
     """Runs the experiment.
     Examples:
-      # Start a new run.
-      experiment.run('new')
+      # Start a new run under root_dir.
+      experiment.run(root_dir, 'new')
       # Continue the latest experiment run.
-      experiment.run('latest')
+      experiment.run(root_dir, 'latest')
       # Continue the latest experiment run or start a new run if it does not
       # exist.
-      experiment.run()
+      experiment.run(root_dir)
-      # Start a new run and warm start from a previous run under sub-dir
-      # 'run_20241031_1'.
-      experiment.run('new', warm_start_from='20241031_1')
+      # Start a new run and warm start from another run's directory
+      # '/path/to/another/run_20241031_1/'.
+      experiment.run(
+          root_dir, 'new',
+          warm_start_from='/path/to/another/run_20241031_1/'
+      )
-      # Refresh previous run under sub-dir 'run_20241031_1'.
-      experiment.run('20241031_1', refresh=True)
+      # Reprocess previous run under sub-dir 'run_20241031_1'.
+      experiment.run(root_dir, '20241031_1', reprocess=True)
     Args:
       root_dir: The root of the output directory of the experiment.
@@ -426,8 +430,18 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
       example_ids: The example IDs to run. If None, it will run all examples.
       raise_if_has_error: If True, it will raise an error if any example fails.
         Otherwise, it will continue and report the error in the output.
-      refresh: Whether to refresh the experiment. If True, it will delete the
-        data under the current experiment run directory and start a new run.
+      reprocess: A boolean or a list of example IDs. If boolean, it indicates
+        that whether all the examples to be evaluated will be reprocessed,
+        meaning that existing checkpoints will be ignored. If a list of
+        example IDs, it indicates that only the specified examples will be
+        reprocessed.
+      generate_example_html: Among 'new', 'all', 'no' or a list of example IDs.
+        If 'new', generate HTML files for all newly processed examples, and
+          keep/copy existing HTML files for unchanged examples.
+        If 'all', generate HTML files for all examples.
+        If 'no', do not generate HTML files for any examples.
+        If a list of example IDs, generate HTML files for the specified
+        examples.
       process_timeout: The timeout in seconds for each process. If None, it
         will use the default timeout for the runner.
       use_cache: Whether to use LLM cache for the experiment.
@@ -454,7 +468,8 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
             filter=filter,
             example_ids=example_ids,
             raise_if_has_error=raise_if_has_error,
-            refresh=refresh,
+            reprocess=reprocess,
+            generate_example_html=generate_example_html,
             use_cache=use_cache,
             process_timeout=process_timeout,
             note=note,
@@ -815,14 +830,27 @@ class Run(pg.Object, pg.views.html.HtmlTreeView.Extension):
       'The user tags for the current run.'
   ] = []
-  refresh: Annotated[
-      bool,
+  reprocess: Annotated[
+      bool | list[int],
       (
-          'If True, it will delete the data under the current '
-          'run directory and start a new run.'
+          'If True, it will reprocess all examples under the current '
+          'run directory. If a list of integers, examples of the given IDS '
+          'will be reprocessed.'
       )
   ] = False
+  generate_example_html: Annotated[
+      Literal['new', 'all', 'no'] | list[int],
+      (
+          'If "new", generate HTML files for all newly processed examples, '
+          'and keep/copy existing HTML files for unchanged examples. '
+          'If "all", generate HTML files for all examples. '
+          'If "no", do not generate HTML files for any examples. '
+          'If a list of example IDs, generate HTML files for the specified '
+          'examples.'
+      )
+  ] = 'new'
   filter: Annotated[
       Callable[[Experiment], bool] | None,
       'A filter to decide whether a leaf experiment should be run or not.'
@@ -873,6 +901,42 @@ class Run(pg.Object, pg.views.html.HtmlTreeView.Extension):
     """Returns the output path for the experiment."""
     return os.path.join(self.output_dir(experiment), relative_path)
+  def examples_to_evaluate(self, experiment: Experiment) -> set[int]:
+    """Returns the example IDs to evaluate."""
+    if not experiment.is_leaf:
+      return set()
+    return set(
+        self.example_ids if self.example_ids else
+        range(1, experiment.num_examples + 1)
+    )
+  def examples_to_reprocess(self, experiment: Experiment) -> set[int]:
+    """Returns the example IDs to reprocess per request."""
+    if not self.reprocess:
+      return set()
+    reprocess_ids = self.examples_to_evaluate(experiment)
+    if isinstance(self.reprocess, list):
+      reprocess_ids &= set(self.reprocess)
+    return reprocess_ids
+  def examples_to_load(self, experiment: Experiment) -> set[int]:
+    """Returns the example IDs to load from checkpoint files.."""
+    load_ids = self.examples_to_evaluate(experiment)
+    if isinstance(self.generate_example_html, list):
+      load_ids |= set(self.generate_example_html)
+    load_ids -= self.examples_to_reprocess(experiment)
+    return load_ids
+  def examples_to_load_metadata(self, experiment: Experiment) -> set[int]:
+    """Returns the example IDs to load the metadata."""
+    load_metadata_ids = set()
+    if isinstance(self.generate_example_html, list):
+      load_metadata_ids = set(self.generate_example_html)
+    elif self.generate_example_html == 'all':
+      load_metadata_ids = self.examples_to_evaluate(experiment)
+    load_metadata_ids -= self.examples_to_reprocess(experiment)
+    return load_metadata_ids
 class Runner(pg.Object):
   """Interface for experiment runner."""

langfun/core/eval/v2/experiment_test.py CHANGED Viewed

@@ -31,10 +31,10 @@ Runner = experiment_lib.Runner
 @pg.functor()
-def sample_inputs():
+def sample_inputs(num_examples: int = 1):
   return [
       pg.Dict(x=1)
-  ]
+  ] * num_examples
 class MyEvaluation(Evaluation):
@@ -208,7 +208,7 @@ class RunIdTest(unittest.TestCase):
 class RunTest(unittest.TestCase):
-  def test_basic(self):
+  def test_input_output_paths(self):
     run = Run(
         '/root',
         RunId.from_id('20241102_0'),
@@ -270,6 +270,124 @@ class RunTest(unittest.TestCase):
         )
     )
+  def test_examples_start_from_scratch(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+    )
+    root = run.experiment
+    self.assertEqual(run.examples_to_evaluate(root), set())
+    self.assertEqual(run.examples_to_reprocess(root), set())
+    self.assertEqual(run.examples_to_load(root), set())
+    self.assertEqual(run.examples_to_load_metadata(root), set())
+    exp = root.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set(range(1, 11)))
+    self.assertEqual(run.examples_to_reprocess(exp), set())
+    self.assertEqual(run.examples_to_load(exp), set(range(1, 11)))
+    self.assertEqual(run.examples_to_load_metadata(exp), set())
+  def test_examples_with_example_ids(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+        example_ids=[1, 3, 5]
+    )
+    exp = run.experiment.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_reprocess(exp), set())
+    self.assertEqual(run.examples_to_load(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_load_metadata(exp), set())
+  def test_examples_with_reprocess_all(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+        example_ids=[1, 3, 5],
+        reprocess=True
+    )
+    exp = run.experiment.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_reprocess(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_load(exp), set())
+    self.assertEqual(run.examples_to_load_metadata(exp), set())
+  def test_examples_with_reprocess_some(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+        example_ids=[1, 3, 5],
+        reprocess=[1],
+    )
+    exp = run.experiment.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_reprocess(exp), set([1]))
+    self.assertEqual(run.examples_to_load(exp), set([3, 5]))
+    self.assertEqual(run.examples_to_load_metadata(exp), set())
+  def test_examples_with_generate_example_html_all(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+        example_ids=[1, 3, 5],
+        reprocess=[1],
+        generate_example_html='all',
+    )
+    exp = run.experiment.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_reprocess(exp), set([1]))
+    self.assertEqual(run.examples_to_load(exp), set([3, 5]))
+    self.assertEqual(run.examples_to_load_metadata(exp), set([3, 5]))
+  def test_examples_with_generate_example_html_new(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+        example_ids=[1, 3, 5],
+        reprocess=[1],
+        generate_example_html='new',
+    )
+    exp = run.experiment.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_reprocess(exp), set([1]))
+    self.assertEqual(run.examples_to_load(exp), set([3, 5]))
+    self.assertEqual(run.examples_to_load_metadata(exp), set())
+  def test_examples_with_generate_example_html_some(self):
+    run = Run(
+        '/root',
+        RunId.from_id('20241102_0'),
+        pg.Ref(Suite([
+            MyEvaluation(replica_id=0, inputs=sample_inputs(10)),
+        ])),
+        example_ids=[1, 3, 5],
+        reprocess=[1],
+        generate_example_html=[1, 2, 3],
+    )
+    exp = run.experiment.leaf_nodes[0]
+    self.assertEqual(run.examples_to_evaluate(exp), set([1, 3, 5]))
+    self.assertEqual(run.examples_to_reprocess(exp), set([1]))
+    self.assertEqual(run.examples_to_load(exp), set([2, 3, 5]))
+    self.assertEqual(run.examples_to_load_metadata(exp), set([2, 3]))
 class RunnerTest(unittest.TestCase):

langfun/core/eval/v2/reporting.py CHANGED Viewed

@@ -172,11 +172,11 @@ class HtmlReporter(experiment_lib.Plugin):
           )
           html.save(index_html_path)
           experiment.info(
-              f'Generated HTML {index_html_path!r} in {t.elapse:.2f} seconds.',
+              f'Updated {index_html_path!r} in {t.elapse:.2f} seconds.',
           )
       except BaseException as e:  # pylint: disable=broad-except
         experiment.error(
-            f'Failed to save HTML {index_html_path!r}. '
+            f'Failed to generate {index_html_path!r}. '
             f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
         )
         raise e
@@ -194,26 +194,68 @@ class HtmlReporter(experiment_lib.Plugin):
   def _save_example_html(
       self, runner: Runner, experiment: Experiment, example: Example
   ) -> None:
-    """Saves the example."""
-    def _save():
+    """Saves the example in HTML format."""
+    current_run = runner.current_run
+    def _generate():
       try:
-        html = example.to_html(
-            collapse_level=None,
-            enable_summary_tooltip=False,
-            extra_flags=dict(
-                # For properly rendering the next link.
-                num_examples=getattr(experiment, 'num_examples', None)
-            ),
-        )
-        html.save(
-            runner.current_run.output_path_for(
-                experiment, f'{example.id}.html'
-            )
+        with pg.timeit() as t:
+          html = example.to_html(
+              collapse_level=None,
+              enable_summary_tooltip=False,
+              extra_flags=dict(
+                  # For properly rendering the next link.
+                  num_examples=getattr(experiment, 'num_examples', None)
+              ),
+          )
+          html.save(
+              runner.current_run.output_path_for(
+                  experiment, f'{example.id}.html'
+              )
+          )
+        experiment.info(
+            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
         )
       except BaseException as e:  # pylint: disable=broad-except
         experiment.error(
-            f'Failed to save HTML {example.id}.html. '
+            f'Failed to generate \'{example.id}.html\'. '
             f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
         )
         raise e
-    runner.background_run(_save)
+    def _copy():
+      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
+      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
+      if src_file == dest_file:
+        return
+      if not pg.io.path_exists(src_file):
+        experiment.warning(
+            f'Skip copying \'{example.id}.html\' as '
+            f'{src_file!r} does not exist.'
+        )
+        return
+      try:
+        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
+          content = src.read()
+          with pg.io.open(dest_file, 'w') as dest:
+            dest.write(content)
+        experiment.info(
+            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
+        )
+        raise e
+    generate_example_html = current_run.generate_example_html
+    if (generate_example_html == 'all'
+        or (generate_example_html == 'new' and example.newly_processed)
+        or (isinstance(generate_example_html, list)
+            and example.id in generate_example_html)):
+      op = _generate
+    else:
+      op = _copy
+    runner.background_run(op)

langfun/core/eval/v2/reporting_test.py CHANGED Viewed

@@ -15,6 +15,7 @@ import os
 import tempfile
 import unittest
+from langfun.core.eval.v2 import checkpointing
 from langfun.core.eval.v2 import eval_test_helper
 from langfun.core.eval.v2 import reporting
 from langfun.core.eval.v2 import runners as runners_lib  # pylint: disable=unused-import
@@ -26,15 +27,131 @@ class ReportingTest(unittest.TestCase):
   def test_reporting(self):
     root_dir = os.path.join(tempfile.gettempdir(), 'test_reporting')
     experiment = eval_test_helper.test_experiment()
+    checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
+    reporter = reporting.HtmlReporter()
+    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertTrue(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+      found_generation_log = False
+      for log_entry in leaf._log_entries:
+        if 'generated in' in log_entry.message:
+          found_generation_log = True
+          break
+      self.assertTrue(found_generation_log)
+    # Test warm start.
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_reporting2')
+    experiment = eval_test_helper.test_experiment()
+    run = experiment.run(
+        root_dir, 'new', plugins=[checkpointer, reporter],
+        warm_start_from=run.output_root
+    )
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertTrue(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+      found_copy_log = False
+      for log_entry in leaf._log_entries:
+        if 'copied in' in log_entry.message:
+          found_copy_log = True
+          break
+      self.assertTrue(found_copy_log)
+  def test_index_html_generation_error(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'test_reporting_with_index_html_generation_error'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_index_html_generation_error())
     reporter = reporting.HtmlReporter()
     run = experiment.run(root_dir, 'new', plugins=[reporter])
-    pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    self.assertFalse(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertFalse(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith('Failed to generate'):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
+  def test_example_html_generation_error(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'test_reporting_with_example_html_generation_error'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_example_html_generation_error())
+    checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
+    reporter = reporting.HtmlReporter()
+    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertFalse(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith('Failed to generate'):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
+    # Test warm start.
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'test_reporting_with_example_html_generation_error2'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_example_html_generation_error())
+    run = experiment.run(
+        root_dir, 'new', plugins=[checkpointer, reporter],
+        warm_start_from=run.output_root
+    )
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
     for leaf in experiment.leaf_nodes:
       self.assertTrue(
           pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
       )
       for i in range(leaf.num_examples):
-        pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        self.assertFalse(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith('Skip copying'):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
 if __name__ == '__main__':

langfun/core/eval/v2/runners.py CHANGED Viewed

@@ -123,6 +123,7 @@ class RunnerBase(Runner):
   def on_experiment_start(self, experiment: Experiment) -> None:
     """Called when an evaluation is started."""
     # Start the progress of the evaluation.
+    num_examples_to_evaluate = 0
     if experiment.is_leaf:
       assert isinstance(experiment, Evaluation)
       num_examples_to_evaluate = (
@@ -130,10 +131,6 @@ class RunnerBase(Runner):
           if self.current_run.example_ids else experiment.num_examples
       )
       experiment.progress.start(total=num_examples_to_evaluate)
-      experiment.info(
-          'Starting evaluation %s with %d examples to evaluate.'
-          % (experiment.id, num_examples_to_evaluate)
-      )
     else:
       experiment.progress.start(total=len(experiment.leaf_nodes))
@@ -141,6 +138,12 @@ class RunnerBase(Runner):
     for plugin in self._all_plugins(experiment):
       plugin.on_experiment_start(self, experiment)
+    if experiment.is_leaf:
+      experiment.info(
+          f'Starting evaluation {experiment.id!r} with '
+          f'{num_examples_to_evaluate} examples to evaluate.'
+      )
   def on_experiment_skipped(self, experiment: Experiment) -> None:
     """Called when an evaluation is skipped."""
     # Skip event will only be triggered for leaf evaluations.

langfun/core/llms/__init__.py CHANGED Viewed

@@ -32,6 +32,7 @@ from langfun.core.llms.rest import REST
 # Gemini models.
 from langfun.core.llms.google_genai import GenAI
+from langfun.core.llms.google_genai import GeminiFlash2_0ThinkingExp
 from langfun.core.llms.google_genai import GeminiFlash2_0Exp
 from langfun.core.llms.google_genai import GeminiExp_20241114
 from langfun.core.llms.google_genai import GeminiExp_20241206
@@ -45,6 +46,7 @@ from langfun.core.llms.google_genai import Palm2_IT
 # OpenAI models.
 from langfun.core.llms.openai import OpenAI
+from langfun.core.llms.openai import GptO1
 from langfun.core.llms.openai import GptO1Preview
 from langfun.core.llms.openai import GptO1Preview_20240912
 from langfun.core.llms.openai import GptO1Mini
@@ -106,6 +108,7 @@ from langfun.core.llms.anthropic import VertexAIAnthropic
 from langfun.core.llms.anthropic import VertexAIClaude3_5_Sonnet_20241022
 from langfun.core.llms.anthropic import VertexAIClaude3_5_Sonnet_20240620
 from langfun.core.llms.anthropic import VertexAIClaude3_5_Haiku_20241022
+from langfun.core.llms.anthropic import VertexAIClaude3_Opus_20240229
 from langfun.core.llms.groq import Groq
 from langfun.core.llms.groq import GroqLlama3_2_3B
@@ -124,6 +127,7 @@ from langfun.core.llms.groq import GroqWhisper_Large_v3Turbo
 from langfun.core.llms.vertexai import VertexAI
 from langfun.core.llms.vertexai import VertexAIGemini2_0
 from langfun.core.llms.vertexai import VertexAIGeminiFlash2_0Exp
+from langfun.core.llms.vertexai import VertexAIGeminiFlash2_0ThinkingExp
 from langfun.core.llms.vertexai import VertexAIGemini1_5
 from langfun.core.llms.vertexai import VertexAIGeminiPro1_5
 from langfun.core.llms.vertexai import VertexAIGeminiPro1_5_001

langfun/core/llms/anthropic.py CHANGED Viewed

@@ -67,6 +67,13 @@ SUPPORTED_MODELS_AND_SETTINGS = {
         cost_per_1k_input_tokens=0.001,
         cost_per_1k_output_tokens=0.005,
     ),
+    'claude-3-opus@20240229': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.015,
+        cost_per_1k_output_tokens=0.075,
+    ),
     # Anthropic hosted models.
     'claude-3-5-sonnet-20241022': pg.Dict(
         max_tokens=8192,
@@ -461,6 +468,11 @@ class VertexAIAnthropic(Anthropic):
     return request
+class VertexAIClaude3_Opus_20240229(VertexAIAnthropic):  # pylint: disable=invalid-name
+  """Anthropic's Claude 3 Opus model on VertexAI."""
+  model = 'claude-3-opus@20240229'
 class VertexAIClaude3_5_Sonnet_20241022(VertexAIAnthropic):  # pylint: disable=invalid-name
   """Anthropic's Claude 3.5 Sonnet model on VertexAI."""
   model = 'claude-3-5-sonnet-v2@20241022'

langfun/core/llms/google_genai.py CHANGED Viewed

@@ -48,6 +48,7 @@ class GenAI(lf.LanguageModel):
   model: Annotated[
       Literal[
+          'gemini-2.0-flash-thinking-exp-1219',
           'gemini-2.0-flash-exp',
           'gemini-exp-1206',
           'gemini-exp-1114',
@@ -307,6 +308,16 @@ _GOOGLE_GENAI_MODEL_HUB = _ModelHub()
 #
 # Public Gemini models.
 #
+class GeminiFlash2_0ThinkingExp(GenAI):  # pylint: disable=invalid-name
+  """Gemini 2.0 Flash Thinking Experimental model."""
+  model = 'gemini-2.0-flash-thinking-exp-1219'
+  supported_modalities = (
+      vertexai.DOCUMENT_TYPES
+      + vertexai.IMAGE_TYPES
+      + vertexai.AUDIO_TYPES
+      + vertexai.VIDEO_TYPES
+  )
 class GeminiFlash2_0Exp(GenAI):  # pylint: disable=invalid-name

langfun 0.1.2.dev202501020804__py3-none-any.whl → 0.1.2.dev202501040804__py3-none-any.whl

langfun 0.1.2.dev202501020804py3-none-any.whl → 0.1.2.dev202501040804py3-none-any.whl