PyPI - langfun - Versions diffs - 0.1.2.dev202508060805__py3-none-any.whl → 0.1.2.dev202508070805__py3-none-any.whl - Mend

langfun 0.1.2.dev202508060805py3-none-any.whl → 0.1.2.dev202508070805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (11) hide show

langfun/core/eval/v2/checkpointing.py CHANGED Viewed

@@ -82,11 +82,7 @@ class Checkpointer(experiment_lib.Plugin):
       example: Example,
   ) -> None:
     """Saves the example to the checkpoint file."""
-    if example.has_error:
-      experiment.warning(
-          f'Example {example.id} has error. Skipping checkpointing.'
-      )
-    elif example.newly_processed:
+    if example.newly_processed:
       self._save_example(runner, experiment, example)
   def _load_experiment(

langfun/core/eval/v2/checkpointing_test.py CHANGED Viewed

@@ -101,12 +101,15 @@ class PerExampleCheckpointerTest(CheckpointerTest):
         self.assertIn(i + 1, collector.examples)
         example = collector.examples[i + 1]
         ckpt = run.output_path_for(leaf, f'checkpoint_{example.id}.jsonl')
-        if example.has_error:
-          self.assertFalse(pg.io.path_exists(ckpt))
-        else:
-          self.assertTrue(pg.io.path_exists(ckpt))
-          with pg.io.open_sequence(ckpt) as f:
-            self.assertEqual(len(list(iter(f))), 1)
+        self.assertTrue(pg.io.path_exists(ckpt))
+        with pg.io.open_sequence(ckpt) as f:
+          examples_from_ckpt = list(iter(f))
+          # `eval_test_helper.test_experiment` has two TestEvaluation with
+          # llm offset 0, which makes that evaluation run twice.
+          if example.has_error and leaf.lm.offset == 0:
+            self.assertEqual(len(examples_from_ckpt), 2)
+          else:
+            self.assertEqual(len(examples_from_ckpt), 1)
       if leaf.id not in num_processed:
         self.assertEqual(leaf.progress.num_skipped, 0)
         num_processed[leaf.id] = leaf.progress.num_processed
@@ -222,7 +225,10 @@ class BulkCheckpointerTest(CheckpointerTest):
       with pg.io.open_sequence(ckpt) as f:
         self.assertEqual(
             len(list(iter(f))),
-            leaf.progress.num_completed - leaf.progress.num_failed
+            # `eval_test_helper.test_experiment` has two TestEvaluation with
+            # llm offset 0, which makes that evaluation run twice.
+            leaf.progress.num_completed
+            + (leaf.progress.num_failed if leaf.lm.offset == 0 else 0)
         )
       if leaf.id not in num_processed:
         self.assertEqual(leaf.progress.num_skipped, 0)

langfun/core/eval/v2/evaluation.py CHANGED Viewed

@@ -149,12 +149,15 @@ class Evaluation(experiment_lib.Experiment):
       self,
       example: example_lib.Example | int,
       raise_if_has_error: bool = False,
+      reevaluate_upon_previous_errors: bool = True,
   ) -> example_lib.Example:
     """Evaluates a single example input.
     Args:
       example: An example ID or an example object with ID.
       raise_if_has_error: Whether to raise an error if the example has error.
+      reevaluate_upon_previous_errors: Whether to reevaluate the example if
+        the previous checkpointed run has error.
     Returns:
       The evaluated example with the output and metric metadata populated.
@@ -169,7 +172,9 @@ class Evaluation(experiment_lib.Experiment):
     checkpointed = self._state.ckpt_example(example.id)
     with pg.timeit('evaluate') as timeit, lf.track_usages() as usage_summary:
-      if checkpointed is None or checkpointed.has_error:
+      if checkpointed is None or (
+          reevaluate_upon_previous_errors and checkpointed.has_error
+      ):
         if checkpointed is None:
           self.info(
               f'Example {example.id} is being processed for the first time '
@@ -184,17 +189,27 @@ class Evaluation(experiment_lib.Experiment):
         self._state.update(example, in_progress=True)
         self._process(example, raise_if_has_error=raise_if_has_error)
       else:
-        self.info(
-            f'Example {example.id} skipped processing as prior run '
-            'is available and error free.'
-        )
+        if checkpointed.has_error:
+          self.warning(
+              f'Example {example.id} skipped processing despite its checkpoint '
+              f'has error: {checkpointed.error}, as '
+              '`reevaluate_upon_previous_errors` is set to False.'
+          )
+        else:
+          self.info(
+              f'Example {example.id} skipped processing as prior run '
+              'is available and error free.'
+          )
         example.start_time = checkpointed.start_time
         self._state.update(example, in_progress=True)
         # Use the output and metadata obtained from the previous processing.
         example.output = checkpointed.output
         example.metadata = checkpointed.metadata
+        example.error = checkpointed.error
         example.newly_processed = False
+        example.execution_status = checkpointed.execution_status
+        example.end_time = checkpointed.end_time
         # For previously processed examples, we merge previous usages as
         # cached, so the usage summary will account previous usages, but as

langfun/core/eval/v2/evaluation_test.py CHANGED Viewed

@@ -128,10 +128,17 @@ class EvaluationTest(unittest.TestCase):
       self.assertEqual(len(exp.state.evaluation_status), 1)
       f.add(pg.to_json_str(example))
+      example2 = exp.evaluate(6)
+      self.assertTrue(example2.newly_processed)
+      self.assertEqual(example2.input, pg.Dict(x=5, y=25, groundtruth=30))
+      self.assertTrue(example2.has_error)
+      self.assertEqual(len(exp.state.evaluation_status), 2)
+      f.add(pg.to_json_str(example2))
     exp.reset()
     self.assertEqual(len(exp.state.ckpt_examples), 0)
     exp.load_state(state_file)
-    self.assertEqual(len(exp.state.ckpt_examples), 1)
+    self.assertEqual(len(exp.state.ckpt_examples), 2)
     self.assertEqual(len(exp.state.evaluation_status), 0)
     example = exp.evaluate(3)
     self.assertFalse(example.newly_processed)
@@ -142,6 +149,19 @@ class EvaluationTest(unittest.TestCase):
     self.assertEqual(example.usage_summary.cached.total.num_requests, 1)
     self.assertEqual(example.usage_summary.uncached.total.total_tokens, 0)
     self.assertEqual(example.usage_summary.uncached.total.num_requests, 0)
+    self.assertIsNotNone(example.start_time)
+    self.assertIsNotNone(example.end_time)
+    self.assertIn('evaluate.process', example.execution_status)
+    self.assertIn('evaluate.metric', example.execution_status)
+    example2 = exp.evaluate(6, reevaluate_upon_previous_errors=False)
+    self.assertFalse(example2.newly_processed)
+    self.assertEqual(example2.input, pg.Dict(x=5, y=25, groundtruth=30))
+    self.assertTrue(example2.has_error)
+    self.assertIsNotNone(example2.start_time)
+    self.assertIsNotNone(example2.end_time)
+    self.assertIn('evaluate.process', example2.execution_status)
+    self.assertIn('evaluate.metric', example2.execution_status)
     # Test load_state with filter.
     exp.reset()

langfun/core/eval/v2/experiment.py CHANGED Viewed

@@ -384,6 +384,7 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
       example_ids: list[int] | None = None,
       shuffle_inputs: bool = False,
       raise_if_has_error: bool = False,
+      reevaluate_upon_previous_errors: bool = True,
       reprocess: bool | list[int] = False,
       generate_example_html: Literal['new', 'all', 'no'] | list[int] = 'new',
       process_timeout: int | None = None,
@@ -436,6 +437,9 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
         Neverthless, the example ID remains unchanged for each example.
       raise_if_has_error: If True, it will raise an error if any example fails.
         Otherwise, it will continue and report the error in the output.
+      reevaluate_upon_previous_errors: If True, it will reevaluate the example
+        if the previous checkpointed run has error when experiment runs with
+        existing checkpoints.
       reprocess: A boolean or a list of example IDs. If boolean, it indicates
         that whether all the examples to be evaluated will be reprocessed,
         meaning that existing checkpoints will be ignored. If a list of
@@ -475,6 +479,7 @@ class Experiment(lf.Component, pg.views.HtmlTreeView.Extension):
             example_ids=example_ids,
             shuffle_inputs=shuffle_inputs,
             raise_if_has_error=raise_if_has_error,
+            reevaluate_upon_previous_errors=reevaluate_upon_previous_errors,
             reprocess=reprocess,
             generate_example_html=generate_example_html,
             use_cache=use_cache,
@@ -835,6 +840,14 @@ class Run(pg.Object, pg.views.html.HtmlTreeView.Extension):
       )
   ] = False
+  reevaluate_upon_previous_errors: Annotated[
+      bool,
+      (
+          'If True, it will reevaluate the example if the previously '
+          'checkpointed run has error. Otherwise, it will skip the example.'
+      )
+  ] = True
   note: Annotated[
       str | None,
       'The user note for the current run.'

langfun/core/eval/v2/runners.py CHANGED Viewed

@@ -384,6 +384,7 @@ class RunnerBase(Runner):
     item = evaluation.evaluate(
         item,
         raise_if_has_error=self.current_run.raise_if_has_error,
+        reevaluate_upon_previous_errors=self.current_run.reevaluate_upon_previous_errors,
     )
     self.on_example_complete(evaluation, item)
     return item

{langfun-0.1.2.dev202508060805.dist-info → langfun-0.1.2.dev202508070805.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langfun
-Version: 0.1.2.dev202508060805
+Version: 0.1.2.dev202508070805
 Summary: Langfun: Language as Functions.
 Home-page: https://github.com/google/langfun
 Author: Langfun Authors

{langfun-0.1.2.dev202508060805.dist-info → langfun-0.1.2.dev202508070805.dist-info}/RECORD RENAMED Viewed

@@ -69,14 +69,14 @@ langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrC
 langfun/core/eval/scoring.py,sha256=_DvnlgI1SdRVaOojao_AkV3pnenfCPOqyhvlg-Sw-5M,6322
 langfun/core/eval/scoring_test.py,sha256=UcBH0R6vAovZ0A4yM22s5cBHL1qVKASubrbu1t8dYBw,4529
 langfun/core/eval/v2/__init__.py,sha256=9lNKJwbvl0lcFblAXYT_OHI8fOubJsTOdSkxEqsP1xU,1726
-langfun/core/eval/v2/checkpointing.py,sha256=t47rBfzGZYgIqWW1N1Ak9yQnNtHd-IRbEO0cZjG2VRo,11755
-langfun/core/eval/v2/checkpointing_test.py,sha256=cuQ1zom5DMXIebxYW6L3N5XRyhfoEEDrs7XQcAxg8Nc,9164
+langfun/core/eval/v2/checkpointing.py,sha256=0ll04cCH3WAXoU2ocoSE8GaSWVCw9JERcnuvUiHRSBU,11624
+langfun/core/eval/v2/checkpointing_test.py,sha256=NZo-yt3j5bUF-yPVfHGFBVGWZhhpk5xaT-6fsuIQUSs,9571
 langfun/core/eval/v2/eval_test_helper.py,sha256=sKFi_wPYCNmr96WyTduuXY0KnxjFxcJyEhXey-_nGX8,3962
-langfun/core/eval/v2/evaluation.py,sha256=ihT5dljnUkHM97XS9OwE2wOnYC-oYnHYgG5KN1hmiaU,27037
-langfun/core/eval/v2/evaluation_test.py,sha256=46bGjNZmd57NXcJSoaC17DO9B74rpVBOVTEln_4W61c,6916
+langfun/core/eval/v2/evaluation.py,sha256=Oiqjx7rL0Fql6OaYifiXlgZX5tzz1uAs88GydjJPDI8,27722
+langfun/core/eval/v2/evaluation_test.py,sha256=GdB-CexNxQSNZoXqpyGNLdHzq0D9vXqgwAJ13iaAOO0,7878
 langfun/core/eval/v2/example.py,sha256=v1dIz89pccIqujt7utrk0EbqMWM9kBn-2fYGRTKe358,10890
 langfun/core/eval/v2/example_test.py,sha256=wsHQD6te7ghROmxe3Xg_NK4TU0xS2MkNfnpo-H0H8xM,3399
-langfun/core/eval/v2/experiment.py,sha256=fb3RHNOSRftV7ZTBfYVV50iEevqdPwRHCt3mgtLzuFw,33408
+langfun/core/eval/v2/experiment.py,sha256=Y6a-Qkb4AwWWmpOVGyYiK29feuQ3u2t6SWs5mXF2xjM,33957
 langfun/core/eval/v2/experiment_test.py,sha256=BYrPYfQfU2jDfAlZcHDT0KUaXOnCnyTWyUEKhDoqXfw,13645
 langfun/core/eval/v2/metric_values.py,sha256=_B905bC-jxrYPLSEcP2M8MaHZOVMz_bVrUw8YC4arCE,4660
 langfun/core/eval/v2/metric_values_test.py,sha256=ab2oF_HsIwrSy459108ggyjgefHSPn8UVILR4dRwx14,2634
@@ -88,7 +88,7 @@ langfun/core/eval/v2/progress_tracking.py,sha256=zNhNPGlnJnHELEfFpbTMCSXFn8d1IJ5
 langfun/core/eval/v2/progress_tracking_test.py,sha256=sJhlVfinGsg3Kf2wQ_hT7VMcpQfaI4ZkqyW9ujElkwA,2282
 langfun/core/eval/v2/reporting.py,sha256=yUIPCAMnp7InIzpv1DDWrcLO-75iiOUTpscj7smkfrA,8335
 langfun/core/eval/v2/reporting_test.py,sha256=CMK-vwho8cNRJwlbkCqm_v5fykE7Y3V6SaIOCY0CDyA,5671
-langfun/core/eval/v2/runners.py,sha256=iqbH4jMtnNMhfuv1eHaxJmk1Vvsrz-sAJJFP8U44-tA,16758
+langfun/core/eval/v2/runners.py,sha256=bEniZDNu44AQgvqpwLsvBU4V_7WltAe-NPhYgIsLj1E,16848
 langfun/core/eval/v2/runners_test.py,sha256=spjkmqlls_vyERdZMdjv6dhIN9ZfxsDDvIQAWTj2kMk,11954
 langfun/core/llms/__init__.py,sha256=CtxUdXohQ8AQk1DqBT6MBy2zdAoPSggNo00SYrj9-AY,9521
 langfun/core/llms/anthropic.py,sha256=YcQ2VG8iOfXtry_tTpAukmiwXa2hK_9LkpkmXk41Nm0,26226
@@ -165,8 +165,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
 langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
 langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
 langfun/core/templates/selfplay_test.py,sha256=Ot__1P1M8oJfoTp-M9-PQ6HUXqZKyMwvZ5f7yQ3yfyM,2326
-langfun-0.1.2.dev202508060805.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
-langfun-0.1.2.dev202508060805.dist-info/METADATA,sha256=Ksl1Bg7ntBn1YKRL9H-6pP1FTl-oeJutAv7PVvgjg-g,7380
-langfun-0.1.2.dev202508060805.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-langfun-0.1.2.dev202508060805.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
-langfun-0.1.2.dev202508060805.dist-info/RECORD,,
+langfun-0.1.2.dev202508070805.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
+langfun-0.1.2.dev202508070805.dist-info/METADATA,sha256=NKc-FcclfdUmowPZSNtHzPwmJZ8DpkdGHN0MiO-LUVM,7380
+langfun-0.1.2.dev202508070805.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+langfun-0.1.2.dev202508070805.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
+langfun-0.1.2.dev202508070805.dist-info/RECORD,,

{langfun-0.1.2.dev202508060805.dist-info → langfun-0.1.2.dev202508070805.dist-info}/WHEEL RENAMED Viewed

File without changes

{langfun-0.1.2.dev202508060805.dist-info → langfun-0.1.2.dev202508070805.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{langfun-0.1.2.dev202508060805.dist-info → langfun-0.1.2.dev202508070805.dist-info}/top_level.txt RENAMED Viewed

File without changes

langfun 0.1.2.dev202508060805__py3-none-any.whl → 0.1.2.dev202508070805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202508060805py3-none-any.whl → 0.1.2.dev202508070805py3-none-any.whl