PyPI - langfun - Versions diffs - 0.1.2.dev202511160804__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl - Mend

langfun 0.1.2.dev202511160804py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (41) hide show

langfun/core/__init__.py +1 -0
langfun/core/agentic/__init__.py +4 -1
langfun/core/agentic/action.py +340 -17
langfun/core/agentic/action_test.py +124 -21
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/v2/checkpointing.py +25 -1
langfun/core/eval/v2/checkpointing_test.py +8 -1
langfun/core/eval/v2/eval_test_helper.py +7 -2
langfun/core/eval/v2/evaluation.py +4 -1
langfun/core/eval/v2/example.py +5 -1
langfun/core/eval/v2/example_test.py +13 -5
langfun/core/eval/v2/experiment.py +23 -0
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/progress_tracking.py +12 -3
langfun/core/eval/v2/progress_tracking_test.py +3 -1
langfun/core/eval/v2/reporting_test.py +4 -0
langfun/core/eval/v2/runners/__init__.py +4 -0
langfun/core/eval/v2/runners/base.py +40 -21
langfun/core/eval/v2/runners/beam.py +341 -0
langfun/core/eval/v2/runners/beam_test.py +131 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
langfun/core/eval/v2/runners/debug_test.py +1 -4
langfun/core/eval/v2/runners/parallel_test.py +1 -4
langfun/core/eval/v2/runners/sequential_test.py +1 -4
langfun/core/langfunc_test.py +3 -3
langfun/core/language_model.py +38 -5
langfun/core/language_model_test.py +45 -0
langfun/core/llms/__init__.py +2 -0
langfun/core/llms/gemini.py +41 -8
langfun/core/llms/gemini_test.py +84 -0
langfun/core/llms/google_genai.py +5 -0
langfun/core/llms/vertexai.py +7 -0
langfun/core/modalities/mime.py +2 -0
langfun/core/modalities/mime_test.py +11 -0
langfun/core/structured/schema/__init__.py +1 -0
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/RECORD +41 -37
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202511160804.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0

langfun/core/agentic/action_test.py CHANGED Viewed

@@ -52,6 +52,7 @@ class Foo(action_lib.Action):
     with session.track_phase('prepare'):
       session.info('Begin Foo', x=1)
       time.sleep(self.simulate_execution_time[0])
+      Bar()(session, lm=lm)
       session.query(
           'foo',
           schema=int if self.simulate_query_error else None,
@@ -65,6 +66,7 @@ class Foo(action_lib.Action):
     def _sub_task(i):
       session.add_metadata(**{f'subtask_{i}': i})
       time.sleep(self.simulate_execution_time[2])
+      Bar()(session, lm=lm)
       return lf_structured.query(f'subtask_{i}', lm=lm)
     self._state = []
@@ -88,6 +90,50 @@ class Foo(action_lib.Action):
     lf_structured.query('additional query', lm=lm)
+class ExecutionUnitPositionTest(unittest.TestCase):
+  def test_basics(self):
+    pos1 = action_lib.ExecutionUnit.Position(None, 0)
+    self.assertEqual(repr(pos1), 'Position(0)')
+    self.assertEqual(str(pos1), '')
+    self.assertIsNone(pos1.parent)
+    self.assertEqual(pos1.index, 0)
+    self.assertEqual(pos1.indices(), (0,))
+    self.assertEqual(pos1, (0,))
+    self.assertEqual(pos1, '')
+    self.assertEqual(pos1, action_lib.ExecutionUnit.Position(None, 0))
+    self.assertNotEqual(pos1, 1)
+    self.assertNotEqual(pos1, (1,))
+    self.assertNotEqual(pos1, action_lib.ExecutionUnit.Position(None, 1))
+    pos2 = action_lib.ExecutionUnit.Position(pos1, 0)
+    self.assertEqual(repr(pos2), 'Position(0, 0)')
+    self.assertEqual(str(pos2), '1')
+    self.assertEqual(pos2, '1')
+    self.assertEqual(pos2.parent, pos1)
+    self.assertEqual(pos2.index, 0)
+    self.assertEqual(pos2.indices(), (0, 0))
+    self.assertNotEqual(pos1, pos2)
+    self.assertLess(pos1, pos2)
+    self.assertGreater(pos2, pos1)
+    self.assertEqual(
+        hash(pos2),
+        hash(
+            action_lib.ExecutionUnit.Position(
+                action_lib.ExecutionUnit.Position(None, 0), 0
+            )
+        )
+    )
+    pos3 = action_lib.ExecutionUnit.Position(pos2, 0)
+    self.assertEqual(str(pos3), '1.1')
+    self.assertEqual(pos3, '1.1')
+    self.assertEqual(pos3.parent, pos2)
+    self.assertEqual(pos3.index, 0)
+    self.assertEqual(pos3.indices(), (0, 0, 0))
+    self.assertEqual(pos3.to_str(separator='>'), '1>1')
 class ActionInvocationTest(unittest.TestCase):
   def test_basics(self):
@@ -108,9 +154,7 @@ class ExecutionTraceTest(unittest.TestCase):
     self.assertEqual(execution.id, '')
     root = action_lib.ActionInvocation(action=action_lib.RootAction())
-    action_invocation = action_lib.ActionInvocation(
-        action=Foo(1)
-    )
+    action_invocation = action_lib.ActionInvocation(action=Foo(1))
     root.execution.append(action_invocation)
     self.assertEqual(action_invocation.execution.id, '/a1')
@@ -153,6 +197,7 @@ class SessionTest(unittest.TestCase):
     self.assertIsInstance(session.root.action, action_lib.RootAction)
     self.assertIs(session.current_action, session.root)
+    self.assertIs(session.metadata, session.root.metadata)
     #
     # Inspecting the root invocation.
@@ -175,20 +220,25 @@ class SessionTest(unittest.TestCase):
     )
     # The root space should have one action (foo), no queries, and no logs.
+    self.assertEqual(len(root.execution_units), 1)
     self.assertEqual(len(root.actions), 1)
     self.assertEqual(len(root.queries), 0)
     self.assertEqual(len(root.logs), 0)
-    # 1 query from Bar, 2 from Foo and 3 from parallel executions.
-    self.assertEqual(len(session.all_queries), 6)
-    self.assertEqual(len(root.all_queries), 6)
-    # 2 actions: Foo and Bar.
-    self.assertEqual(len(session.all_actions), 2)
-    self.assertEqual(len(root.all_actions), 2)
-    # 1 log from Bar and 1 from Foo.
-    self.assertEqual(len(session.all_logs), 2)
-    self.assertEqual(len(root.all_logs), 2)
+    # 2 query from Bar, 2 from Foo and 2 * 3 from parallel executions.
+    self.assertEqual(len(session.all_queries), 10)
+    self.assertEqual(len(root.all_queries), 10)
+    # 6 actions: Foo and 2 Bar, and 3 Bar from parallel executions.
+    self.assertEqual(len(session.all_actions), 6)
+    self.assertEqual(
+        [str(a.position) for a in session.all_actions],
+        ['1', '1.1', '1.2.1.1', '1.2.2.1', '1.2.3.1', '1.3']
+    )
+    self.assertEqual(len(root.all_actions), 6)
+    # 1 log from Bar and 1 from Foo and 3 from Bar in parallel executions.
+    self.assertEqual(len(session.all_logs), 6)
+    self.assertEqual(len(root.all_logs), 6)
     self.assertIs(session.usage_summary, root.usage_summary)
-    self.assertEqual(root.usage_summary.total.num_requests, 6)
+    self.assertEqual(root.usage_summary.total.num_requests, 10)
     # Inspecting the top-level action (Foo)
     foo_invocation = root.execution[0]
@@ -200,15 +250,19 @@ class SessionTest(unittest.TestCase):
     # Prepare phase.
     prepare_phase = foo_invocation.execution[0]
+    self.assertIsNone(prepare_phase.position)
     self.assertIsInstance(prepare_phase, action_lib.ExecutionTrace)
     self.assertEqual(prepare_phase.id, 'agent@1:/a1/prepare')
-    self.assertEqual(len(prepare_phase.items), 2)
+    self.assertEqual(len(prepare_phase.items), 3)
     self.assertTrue(prepare_phase.has_started)
     self.assertTrue(prepare_phase.has_stopped)
-    self.assertEqual(prepare_phase.usage_summary.total.num_requests, 1)
+    self.assertEqual(prepare_phase.usage_summary.total.num_requests, 2)
     self.assertIsInstance(prepare_phase.items[0], lf.logging.LogEntry)
-    self.assertIsInstance(prepare_phase.items[1], lf_structured.QueryInvocation)
-    self.assertEqual(prepare_phase.items[1].id, 'agent@1:/a1/prepare/q1')
+    self.assertIsInstance(prepare_phase.items[1], action_lib.ActionInvocation)
+    self.assertIs(prepare_phase.items[1].parent_execution_unit, foo_invocation)
+    self.assertEqual(prepare_phase.items[1].id, 'agent@1:/a1/prepare/a1')
+    self.assertIsInstance(prepare_phase.items[2], lf_structured.QueryInvocation)
+    self.assertEqual(prepare_phase.items[2].id, 'agent@1:/a1/prepare/q1')
     # Tracked queries.
     query_invocation = foo_invocation.execution[1]
@@ -230,20 +284,44 @@ class SessionTest(unittest.TestCase):
     # Tracked parallel executions.
     parallel_executions = foo_invocation.execution[2]
+    # root (0) > foo (0) > parallel executions (1)
+    self.assertEqual(parallel_executions.position, (0, 0, 1))
     self.assertEqual(parallel_executions.id, 'agent@1:/a1/p1')
     self.assertIsInstance(parallel_executions, action_lib.ParallelExecutions)
+    self.assertIs(
+        parallel_executions.all_actions[0].parent_execution_unit,
+        parallel_executions
+    )
+    self.assertIs(
+        parallel_executions.all_actions[0].parent_action,
+        foo_invocation
+    )
     self.assertEqual(len(parallel_executions), 3)
     self.assertEqual(parallel_executions[0].id, 'agent@1:/a1/p1/b1')
     self.assertEqual(parallel_executions[1].id, 'agent@1:/a1/p1/b2')
     self.assertEqual(parallel_executions[2].id, 'agent@1:/a1/p1/b3')
+    self.assertEqual(len(parallel_executions[0].execution_units), 1)
+    self.assertEqual(len(parallel_executions[1].execution_units), 1)
+    self.assertEqual(len(parallel_executions[2].execution_units), 1)
     self.assertEqual(len(parallel_executions[0].queries), 1)
+    self.assertEqual(len(parallel_executions[0].all_queries), 2)
     self.assertEqual(len(parallel_executions[1].queries), 1)
+    self.assertEqual(len(parallel_executions[1].all_queries), 2)
     self.assertEqual(len(parallel_executions[2].queries), 1)
+    self.assertEqual(len(parallel_executions[2].all_queries), 2)
+    self.assertEqual(len(parallel_executions.execution_units), 0)
+    self.assertEqual(len(parallel_executions.actions), 0)
+    self.assertEqual(len(parallel_executions.queries), 0)
+    self.assertEqual(len(parallel_executions.logs), 0)
+    self.assertEqual(len(parallel_executions.all_actions), 3)
+    self.assertEqual(len(parallel_executions.all_queries), 6)
+    self.assertEqual(len(parallel_executions.all_logs), 3)
     # Invocation to Bar.
     bar_invocation = foo_invocation.execution[3]
     self.assertIs(bar_invocation.parent_action, foo_invocation)
-    self.assertEqual(bar_invocation.id, 'agent@1:/a1/a1')
+    self.assertIs(bar_invocation.parent_execution_unit, foo_invocation)
+    self.assertEqual(bar_invocation.id, 'agent@1:/a1/a5')
     self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
     self.assertIsInstance(bar_invocation.action, Bar)
     self.assertEqual(bar_invocation.result, 2)
@@ -497,26 +575,51 @@ class SessionTest(unittest.TestCase):
         super()._on_bound()
         self.progresses = []
+      def on_session_start(self, session):
+        session.add_metadata(progresses=pg.Ref(self.progresses))
       def on_action_progress(self, session, action, title, **kwargs):
         self.progresses.append((action.id, title))
     handler = MyActionHandler()
+    self.assertIs(handler.get(MyActionHandler), handler)
+    self.assertIsNone(handler.get(action_lib.SessionLogging))
+    handler_chain = action_lib.SessionEventHandlerChain(
+        handlers=[handler, action_lib.SessionLogging()]
+    )
+    self.assertIs(handler_chain.get(MyActionHandler), handler)
+    self.assertIs(
+        handler_chain.get(action_lib.SessionLogging),
+        handler_chain.handlers[1]
+    )
     session = action_lib.Session(
         id='agent@1',
-        event_handler=action_lib.SessionEventHandlerChain(
-            handlers=[handler, action_lib.SessionLogging()]
-        )
+        event_handler=handler_chain
     )
     bar = Bar()
     with session:
       bar(session, lm=fake.StaticResponse('lm response'))
       session.update_progress('Trajectory completed')
+    self.assertIs(session.metadata['progresses'], handler.progresses)
     self.assertEqual(handler.progresses, [
         ('agent@1:/a1', 'Query completed'),
         ('agent@1:', 'Trajectory completed'),
     ])
+  def test_clone(self):
+    event_handler = action_lib.SessionLogging()
+    session = action_lib.Session(event_handler=event_handler)
+    other = session.clone()
+    self.assertIsNot(session, other)
+    self.assertIs(other.event_handler, event_handler)
+    other = session.clone(deep=True)
+    self.assertIsNot(session, other)
+    self.assertIsNot(other.event_handler, session.event_handler)
   def test_log(self):
     session = action_lib.Session()
     session.debug('hi', x=1, y=2)

langfun/core/eval/base_test.py CHANGED Viewed

@@ -101,7 +101,7 @@ class EvaluationTest(unittest.TestCase):
     self.assertEqual(s.dir, os.path.join(s.root_dir, s.id))
     self.assertEqual(s.hash, s.clone().hash)
     # Test persistent hash.
-    self.assertEqual(s.hash, 'ee958159')
+    self.assertEqual(s.hash, '4dfe486a')
     self.assertEqual(
         s.hash, s.clone(override={'max_workers': 2, 'lm.timeout': 20}).hash
     )
@@ -211,7 +211,7 @@ class EvaluationTest(unittest.TestCase):
         s.result,
         dict(
             experiment_setup=dict(
-                id='Evaluation@27a702cb',
+                id='Evaluation@e028b6e6',
                 dir=s.dir,
                 model='StaticSequence',
                 prompt_template='{{example.question}}',
@@ -269,7 +269,7 @@ class EvaluationTest(unittest.TestCase):
         s.root_dir, base.Evaluation.SUMMARY_HTML.replace('.html', '.json')
     )
     self.assertTrue(os.path.exists(summary_json))
-    summary = pg.load(summary_json, auto_dict=True)
+    summary = pg.load(summary_json, convert_unknown=True)
     self.assertIn('Evaluation', summary)
     self.assertEqual(len(summary['Evaluation']), 1)
     self.assertIsNotNone(summary['Evaluation'][0].experiment)
@@ -376,7 +376,7 @@ class EvaluationTest(unittest.TestCase):
         s.children[0].dir, os.path.join(s.root_dir, s.children[0].id)
     )
     # Test persistent hash.
-    self.assertEqual(s.hash, 'f47532a7')
+    self.assertEqual(s.hash, 'fa8f5419')
     summary = s.run(verbose=True)
     self.assertEqual(len(summary.evaluations), 2)
@@ -526,7 +526,7 @@ class SuiteTest(unittest.TestCase):
         lm=lm
     )
     # Test for persistent hash.
-    self.assertEqual(s.hash, '4bd6a2f5')
+    self.assertEqual(s.hash, 'ec3901b8')
     s.run()
     expected = {
         s.children[0].id: dict(

langfun/core/eval/v2/checkpointing.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 """Checkpointing evaluation runs."""
 import abc
+import datetime
 import re
 import threading
 import traceback
@@ -44,7 +45,12 @@ class Checkpointer(experiment_lib.Plugin):
   checkpoint_filename: Annotated[
       str,
       'Checkpoint file pattern.'
-  ] = 'checkpoint.bagz'
+  ] = 'checkpoint.jsonl'
+  enable_inprogress_file: Annotated[
+      bool,
+      'If True, write file "<example_id>.inprogress" when example gets started.'
+  ] = True
   max_ckpt_loading_threads: Annotated[
       int,
@@ -90,6 +96,24 @@ class Checkpointer(experiment_lib.Plugin):
           f'scratch. Example IDs: {example_ids_to_evaluate}.'
       )
+  def on_example_start(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+      example: Example,
+  ) -> None:
+    """Saves the example to the checkpoint file."""
+    if self.enable_inprogress_file:
+      def _save_inprogress_file(example: Example):
+        inprogress_file = runner.current_run.output_path_for(
+            experiment, f'{example.id}.inprogress'
+        )
+        pg.io.writefile(
+            inprogress_file,
+            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        )
+      runner.background_run(_save_inprogress_file, example)
   def on_example_complete(
       self,
       runner: Runner,

langfun/core/eval/v2/checkpointing_test.py CHANGED Viewed

@@ -90,7 +90,10 @@ class PerExampleCheckpointerTest(CheckpointerTest):
     root_dir = os.path.join(tempfile.mkdtemp(), 'per_example_checkpointer')
     experiment = eval_test_helper.test_experiment()
     checkpoint_filename = 'checkpoint.jsonl'
-    checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
+    checkpointer = checkpointing.PerExampleCheckpointer(
+        checkpoint_filename,
+        enable_inprogress_file=True
+    )
     collector = ExampleCollector()
     run = experiment.run(
         root_dir, 'new', runner='sequential', plugins=[checkpointer, collector]
@@ -102,6 +105,10 @@ class PerExampleCheckpointerTest(CheckpointerTest):
         example = collector.examples[i + 1]
         ckpt = run.output_path_for(leaf, f'checkpoint_{example.id}.jsonl')
         self.assertTrue(pg.io.path_exists(ckpt))
+        inprogress_file = run.output_path_for(
+            leaf, f'{example.id}.inprogress'
+        )
+        self.assertTrue(pg.io.path_exists(inprogress_file))
         with pg.io.open_sequence(ckpt) as f:
           examples_from_ckpt = list(iter(f))
           # `eval_test_helper.test_experiment` has two TestEvaluation with

langfun/core/eval/v2/eval_test_helper.py CHANGED Viewed

@@ -127,11 +127,16 @@ class TestEvaluationWithIndexHtmlGenerationError(TestEvaluation):
     raise ValueError('Cannot render HTML.')
+def test_evaluation(offset: int | pg.hyper.OneOf = 0):
+  """Returns a test evaluation."""
+  return TestEvaluation(lm=TestLLM(offset=offset))
 def test_experiment():
   """Returns a test experiment."""
   return Suite([
-      TestEvaluation(lm=TestLLM(offset=0)),
-      TestEvaluation(lm=TestLLM(offset=pg.oneof(range(5)))),
+      test_evaluation(),
+      test_evaluation(pg.oneof(range(5))),
   ])

langfun/core/eval/v2/evaluation.py CHANGED Viewed

@@ -880,8 +880,9 @@ class EvaluationState:
       load_example_metadata: bool | Callable[
           [example_lib.Example], bool] = True,
       filter: Callable[[example_lib.Example], bool] | None = None,  # pylint: disable=redefined-builtin
-  ) -> None:
+  ) -> list[example_lib.Example]:
     """Loads the state from the example sequence file."""
+    examples = []
     for example in example_lib.Example.iter_ckpts(
         state_file,
         example_input_by_id=example_input_by_id,
@@ -891,6 +892,8 @@ class EvaluationState:
         continue
       example.newly_processed = False
       self._ckpt_examples[example.id] = example
+      examples.append(example)
+    return examples
   @property
   def evaluation_status(self) -> dict[int, ExampleStatus]:

langfun/core/eval/v2/example.py CHANGED Viewed

@@ -155,6 +155,8 @@ class Example(pg.JSONConvertible, pg.views.HtmlTreeView.Extension):
       ckpt_file: str | list[str],
       example_input_by_id: Callable[[int], Any] | None = None,
       load_example_metadata: bool = True,
+      convert_unknown: bool = True,
+      **kwargs
   ) -> Iterator['Example']:
     """Iterates Examples from the checkpoint files."""
     ckpt_files = [ckpt_file] if isinstance(ckpt_file, str) else ckpt_file
@@ -164,7 +166,9 @@ class Example(pg.JSONConvertible, pg.views.HtmlTreeView.Extension):
           example = pg.from_json_str(
               record,
               example_input_by_id=example_input_by_id,
-              load_example_metadata=load_example_metadata
+              load_example_metadata=load_example_metadata,
+              convert_unknown=convert_unknown,
+              **kwargs
           )
           assert isinstance(example, cls), example
           yield example

langfun/core/eval/v2/example_test.py CHANGED Viewed

@@ -94,15 +94,23 @@ class ExampleTest(unittest.TestCase):
     pg.JSONConvertible._TYPE_REGISTRY._type_to_cls_map.pop(
         inputs[0].b.__type_name__
     )
-    v = pg.from_json_str(json_str, auto_dict=True, load_example_metadata=True)
-    v.output.pop('type_name')
-    v.metadata.b.pop('type_name')
+    v = pg.from_json_str(
+        json_str,
+        convert_unknown=True,
+        load_example_metadata=True
+    )
     self.assertEqual(
         v,
         Example(
             id=1,
-            output=pg.Dict(x=1),
-            metadata=dict(b=pg.Dict(x=1, y=2)),
+            output=pg.symbolic.UnknownTypedObject(
+                inputs[0].a.__type_name__, x=1
+            ),
+            metadata=dict(
+                b=pg.symbolic.UnknownTypedObject(
+                    inputs[0].b.__type_name__, x=1, y=2
+                )
+            ),
         )
     )
     # Serialize with input.

langfun/core/eval/v2/experiment.py CHANGED Viewed

@@ -1055,6 +1055,29 @@ class Plugin(lf.Component):
   or result processing.
   """
+  @classmethod
+  def is_per_example(cls) -> bool:
+    """Returns whether the plugin is per example only.
+    Per-example plugins can be installed on individual workers when examples
+    are evaluated by multiple processes in parallel.
+    """
+    def same_code(method1, method2):
+      return method1.__code__ == method2.__code__
+    return all(
+        same_code(method1, method2)
+        for method1, method2 in [
+            (Plugin.on_run_start, cls.on_run_start),
+            (Plugin.on_run_complete, cls.on_run_complete),
+            (Plugin.on_run_abort, cls.on_run_abort),
+            (Plugin.on_experiment_start, cls.on_experiment_start),
+            (Plugin.on_experiment_skipped, cls.on_experiment_skipped),
+            (Plugin.on_experiment_complete, cls.on_experiment_complete),
+            (Plugin.on_experiment_abort, cls.on_experiment_abort),
+        ]
+    )
   def on_run_start(
       self,
       runner: Runner,

langfun/core/eval/v2/experiment_test.py CHANGED Viewed

@@ -433,5 +433,24 @@ class RunnerTest(unittest.TestCase):
           pass
+class PluginTest(unittest.TestCase):
+  def test_per_example_only(self):
+    class PerExamplePlugin(experiment_lib.Plugin):
+      def on_example_complete(self, runner, experiment, example):
+        print('on_example_complete')
+    self.assertTrue(PerExamplePlugin.is_per_example())
+    class NonPerExamplePlugin(experiment_lib.Plugin):
+      def on_experiment_complete(self, runner, experiment):
+        print('on_example_complete')
+    self.assertFalse(NonPerExamplePlugin.is_per_example())
 if __name__ == '__main__':
   unittest.main()

langfun/core/eval/v2/progress_tracking.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Tracking evaluation run progress."""
 import os
+from typing import Literal
 import langfun.core as lf
 from langfun.core.eval.v2 import example as example_lib
 from langfun.core.eval.v2 import experiment as experiment_lib
@@ -24,16 +25,24 @@ Experiment = experiment_lib.Experiment
 Example = example_lib.Example
-def progress_tracker(tqdm: bool = False) -> experiment_lib.Plugin:
+def progress_tracker(
+    tracker_type: Literal['tqdm', 'html', 'auto'] = 'auto'
+) -> experiment_lib.Plugin:
   """Creates a progress tracker as a plugin.
   Args:
-    tqdm: If True, force using tqdm for progress update.
+    tracker_type: The type of progress tracker to use.
+      If `tqdm`, force using tqdm for progress update.
+      If `html`, force using html for progress update.
+      If `auto`, determine it automatically based on the running
+        environment (console vs. notebook)
   Returns:
     The progress tracker plugin.
   """
-  if tqdm or not lf.console.under_notebook():
+  if tracker_type == 'tqdm' or (
+      tracker_type == 'auto' and not lf.console.under_notebook()
+  ):
     return _TqdmProgressTracker()
   else:
     return _HtmlProgressTracker()

langfun/core/eval/v2/progress_tracking_test.py CHANGED Viewed

@@ -21,7 +21,7 @@ import unittest
 from langfun.core import concurrent as lf_concurrent
 from langfun.core import console as lf_console
 from langfun.core.eval.v2 import eval_test_helper
-from langfun.core.eval.v2 import progress_tracking    # pylint: disable=unused-import
+from langfun.core.eval.v2 import progress_tracking
 from langfun.core.eval.v2 import runners as runners_lib  # pylint: disable=unused-import
 import pyglove as pg
@@ -33,6 +33,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
     def display(x):
       result['view'] = x.to_html()
+    self.assertFalse(progress_tracking._HtmlProgressTracker.is_per_example())
     lf_console._notebook = pg.Dict(
         display=display
     )
@@ -46,6 +47,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
 class TqdmProgressTrackerTest(unittest.TestCase):
   def test_basic(self):
+    self.assertFalse(progress_tracking._TqdmProgressTracker.is_per_example())
     root_dir = os.path.join(tempfile.mkdtemp(), 'test_tqdm_progress_tracker')
     experiment = eval_test_helper.test_experiment()
     string_io = io.StringIO()

langfun/core/eval/v2/reporting_test.py CHANGED Viewed

@@ -29,7 +29,11 @@ class ReportingTest(unittest.TestCase):
     experiment = eval_test_helper.test_experiment()
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
+    self.assertFalse(reporter.is_per_example())
     example_html_generator = reporting.ExampleHtmlGenerator()
+    self.assertTrue(example_html_generator.is_per_example())
     run = experiment.run(
         root_dir,
         'new',

langfun/core/eval/v2/runners/__init__.py CHANGED Viewed

@@ -13,13 +13,17 @@
 # limitations under the License.
 """Langfun evaluation runners."""
+# pylint: disable=g-importing-member
 from langfun.core.eval.v2.runners.base import RunnerBase
+from langfun.core.eval.v2.runners.beam import BeamRunner
 from langfun.core.eval.v2.runners.debug import DebugRunner
 from langfun.core.eval.v2.runners.parallel import ParallelRunner
 from langfun.core.eval.v2.runners.sequential import SequentialRunner
+# pylint: enable=g-importing-member
 __all__ = [
     'RunnerBase',
+    'BeamRunner',
     'DebugRunner',
     'ParallelRunner',
     'SequentialRunner',

langfun 0.1.2.dev202511160804__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202511160804py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl