PyPI - langfun - Versions diffs - 0.1.2.dev202412140804__py3-none-any.whl → 0.1.2.dev202412170805__py3-none-any.whl - Mend

langfun 0.1.2.dev202412140804py3-none-any.whl → 0.1.2.dev202412170805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

langfun/core/coding/python/correction.py CHANGED Viewed

@@ -76,7 +76,7 @@ def run_with_correction(
   # Delay import at runtime to avoid circular depenency.
   # pylint: disable=g-import-not-at-top
   # pytype: disable=import-error
-  from langfun.core.structured import prompting
+  from langfun.core.structured import querying
   # pytype: enable=import-error
   # pylint: enable=g-import-not-at-top
@@ -119,7 +119,7 @@ def run_with_correction(
     # structure.
     try:
       # Disable autofix for code correction to avoid recursion.
-      correction = prompting.query(
+      correction = querying.query(
           CodeWithError(code=code, error=error), CorrectedCode, lm=lm, autofix=0
       )
     except errors.CodeError:

langfun/core/eval/v2/__init__.py CHANGED Viewed

@@ -29,10 +29,14 @@ from langfun.core.eval.v2.metrics import Metric
 from langfun.core.eval.v2 import metrics
 from langfun.core.eval.v2.experiment import Plugin
 from langfun.core.eval.v2.experiment import Runner
 from langfun.core.eval.v2 import runners
+# Plugins
+from langfun.core.eval.v2.checkpointing import BulkCheckpointer
+from langfun.core.eval.v2.checkpointing import PerExampleCheckpointer
+from langfun.core.eval.v2.reporting import HtmlReporter
 # pylint: enable=g-bad-import-order
 # pylint: enable=g-importing-member

langfun/core/eval/v2/checkpointing.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Checkpointing evaluation runs."""
 import threading
+import langfun.core as lf
 from langfun.core.eval.v2 import example as example_lib
 from langfun.core.eval.v2 import experiment as experiment_lib
 import pyglove as pg
@@ -24,21 +25,100 @@ Runner = experiment_lib.Runner
 class Checkpointer(experiment_lib.Plugin):
-  """Plugin for checkpointing evaluation runs."""
+  """Base class for checkpointing evaluation examples."""
+class PerExampleCheckpointer(Checkpointer):
+  """Checkpointer that saves each example to a separate file."""
+  checkpoint_filename: str = 'checkpoint.bagz'
+  def _on_bound(self):
+    super()._on_bound()
+    prefix, ext = self._file_prefix_and_ext(self.checkpoint_filename)
+    self._checkpoint_file_prefix = prefix
+    self._checkpoint_file_ext = ext
+  def on_experiment_start(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+  ) -> None:
+    """Creates the checkpoint file."""
+    if not experiment.is_leaf:
+      return
+    # For refresh runs, we don't want to load the previous state.
+    if not runner.current_run.refresh:
+      def _load_state(ckpt_file):
+        experiment.load_state(ckpt_file)
+      experiment_dir = runner.current_run.input_dir(experiment)
+      if pg.io.path_exists(experiment_dir):
+        ckpt_files = [
+            runner.current_run.input_path_for(experiment, filename)
+            for filename in pg.io.listdir(experiment_dir)
+            if filename.startswith(self._checkpoint_file_prefix)
+            and filename.endswith(self._checkpoint_file_ext)
+        ]
+      else:
+        ckpt_files = []
+      for ckpt_file, _, error in lf.concurrent_map(
+          _load_state, ckpt_files, max_workers=64,
+      ):
+        if error is not None:
+          pg.logging.warning(
+              'Failed to load checkpoint file %s: %s. Skipping the file.',
+              ckpt_file, error
+          )
+  def on_example_complete(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+      example: Example,
+  ) -> None:
+    """Saves the example to the checkpoint file."""
+    if not example.has_error:
+      def save_state(example: Example):
+        writer = SequenceWriter(
+            runner.current_run.output_path_for(
+                experiment,
+                (
+                    f'{self._checkpoint_file_prefix}_{example.id}'
+                    f'{self._checkpoint_file_ext}'
+                )
+            )
+        )
+        writer.add(example)
+        del writer
+      runner.background_run(save_state, example)
+  def _file_prefix_and_ext(self, filename: str) -> tuple[str, str]:
+    ext_index = filename.rfind('.')
+    if ext_index == -1:
+      return filename, ''
+    else:
+      return filename[:ext_index], filename[ext_index:]
+class BulkCheckpointer(Checkpointer):
+  """Checkpointer that saves all examples to a single file."""
   checkpoint_filename: str = 'checkpoint.bagz'
   def _on_bound(self):
     super()._on_bound()
     self._lock = threading.Lock()
-    self._state_writer = None
+    self._sequence_writer = None
   def on_run_start(
       self,
       runner: Runner,
       root: Experiment,
   ) -> None:
-    self._state_writer = {}
+    self._sequence_writer = {}
   def on_run_abort(
       self,
@@ -47,8 +127,8 @@ class Checkpointer(experiment_lib.Plugin):
       error: BaseException
   ) -> None:
     with self._lock:
-      if self._state_writer is not None:
-        self._state_writer.clear()
+      if self._sequence_writer is not None:
+        self._sequence_writer.clear()
   def on_run_complete(
       self,
@@ -56,7 +136,7 @@ class Checkpointer(experiment_lib.Plugin):
       root: Experiment,
   ) -> None:
     with self._lock:
-      assert self._state_writer is not None and not self._state_writer
+      assert self._sequence_writer is not None and not self._sequence_writer
   def on_experiment_start(
       self,
@@ -74,14 +154,14 @@ class Checkpointer(experiment_lib.Plugin):
           ),
           raise_if_not_exist=False
       )
-    state_writer = StateWriter(
+    sequence_writer = SequenceWriter(
         runner.current_run.output_path_for(
             experiment, self.checkpoint_filename
         )
     )
     with self._lock:
-      if self._state_writer is not None:
-        self._state_writer[experiment.id] = state_writer
+      if self._sequence_writer is not None:
+        self._sequence_writer[experiment.id] = sequence_writer
   def on_experiment_complete(
       self,
@@ -91,10 +171,10 @@ class Checkpointer(experiment_lib.Plugin):
     """Closes the checkpoint file."""
     if not experiment.is_leaf:
       return
-    assert experiment.id in self._state_writer
+    assert experiment.id in self._sequence_writer
     with self._lock:
-      if self._state_writer is not None:
-        del self._state_writer[experiment.id]
+      if self._sequence_writer is not None:
+        del self._sequence_writer[experiment.id]
   def on_example_complete(
       self,
@@ -103,13 +183,13 @@ class Checkpointer(experiment_lib.Plugin):
       example: Example,
   ) -> None:
     """Saves the example to the checkpoint file."""
-    assert experiment.id in self._state_writer
+    assert experiment.id in self._sequence_writer
     if not example.has_error:
-      runner.background_run(self._state_writer[experiment.id].add, example)
+      runner.background_run(self._sequence_writer[experiment.id].add, example)
-class StateWriter:
-  """Thread safe state writer."""
+class SequenceWriter:
+  """Thread safe sequence writer."""
   def __init__(self, path: str):
     self._lock = threading.Lock()

langfun/core/eval/v2/checkpointing_test.py CHANGED Viewed

@@ -24,11 +24,11 @@ import pyglove as pg
 Example = example_lib.Example
-class StateWriterTest(unittest.TestCase):
+class SequenceWriterTest(unittest.TestCase):
   def test_basic(self):
     file = os.path.join(tempfile.gettempdir(), 'test.jsonl')
-    writer = checkpointing.StateWriter(file)
+    writer = checkpointing.SequenceWriter(file)
     example = Example(id=1, input=pg.Dict(x=1), output=2)
     writer.add(example)
     del writer
@@ -36,7 +36,7 @@ class StateWriterTest(unittest.TestCase):
   def test_error_handling(self):
     file = os.path.join(tempfile.gettempdir(), 'test_error_handling.jsonl')
-    writer = checkpointing.StateWriter(file)
+    writer = checkpointing.SequenceWriter(file)
     writer.add(Example(id=1, input=pg.Dict(x=1), output=2))
     def f():
@@ -52,17 +52,50 @@ class StateWriterTest(unittest.TestCase):
       self.assertEqual(len(list(iter(f))), 1)
-class CheckpointingTest(unittest.TestCase):
+class PerExampleCheckpointerTest(unittest.TestCase):
   def test_checkpointing(self):
-    root_dir = os.path.join(tempfile.gettempdir(), 'test_checkpointing')
+    root_dir = os.path.join(tempfile.gettempdir(), 'per_example_checkpointer')
     experiment = test_helper.test_experiment()
     checkpoint_filename = 'checkpoint.jsonl'
-    checkpointer = checkpointing.Checkpointer(checkpoint_filename)
+    checkpointer = checkpointing.PerExampleCheckpointer(checkpoint_filename)
     run = experiment.run(
         root_dir, 'new', runner='sequential', plugins=[checkpointer]
     )
-    self.assertEqual(len(checkpointer._state_writer), 0)
+    num_processed = {}
+    for leaf in experiment.leaf_nodes:
+      for i in range(leaf.num_examples):
+        example = leaf.state.get(i + 1)
+        ckpt = run.output_path_for(leaf, f'checkpoint_{example.id}.jsonl')
+        if example.has_error:
+          self.assertFalse(pg.io.path_exists(ckpt))
+        else:
+          self.assertTrue(pg.io.path_exists(ckpt))
+          with pg.io.open_sequence(ckpt) as f:
+            self.assertEqual(len(list(iter(f))), 1)
+      if leaf.id not in num_processed:
+        self.assertEqual(leaf.progress.num_skipped, 0)
+        num_processed[leaf.id] = leaf.progress.num_processed
+    # Run again, should skip existing.
+    _ = experiment.run(
+        root_dir, 'latest', runner='sequential', plugins=[checkpointer]
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertEqual(leaf.progress.num_skipped, num_processed[leaf.id])
+class BulkCheckpointerTest(unittest.TestCase):
+  def test_checkpointing(self):
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_bulk_checkpointer')
+    experiment = test_helper.test_experiment()
+    checkpoint_filename = 'checkpoint.jsonl'
+    checkpointer = checkpointing.BulkCheckpointer(checkpoint_filename)
+    run = experiment.run(
+        root_dir, 'new', runner='sequential', plugins=[checkpointer]
+    )
+    self.assertEqual(len(checkpointer._sequence_writer), 0)
     num_processed = {}
     for leaf in experiment.leaf_nodes:
       ckpt = run.output_path_for(leaf, checkpoint_filename)
@@ -80,7 +113,7 @@ class CheckpointingTest(unittest.TestCase):
     _ = experiment.run(
         root_dir, 'latest', runner='sequential', plugins=[checkpointer]
     )
-    self.assertEqual(len(checkpointer._state_writer), 0)
+    self.assertEqual(len(checkpointer._sequence_writer), 0)
     for leaf in experiment.leaf_nodes:
       self.assertEqual(leaf.progress.num_skipped, num_processed[leaf.id])

langfun/core/eval/v2/runners.py CHANGED Viewed

@@ -53,7 +53,7 @@ class RunnerBase(Runner):
   ] = False
   plugins = [
-      checkpointing.Checkpointer(),
+      checkpointing.BulkCheckpointer(),
       reporting.HtmlReporter(),
   ]

langfun/core/structured/__init__.py CHANGED Viewed

@@ -36,12 +36,6 @@ from langfun.core.structured.schema import class_definitions
 from langfun.core.structured.schema import annotation
 from langfun.core.structured.schema import structure_from_python
-from langfun.core.structured.schema import SchemaRepr
-from langfun.core.structured.schema import SchemaJsonRepr
-from langfun.core.structured.schema import SchemaPythonRepr
-from langfun.core.structured.schema import ValueRepr
-from langfun.core.structured.schema import ValueJsonRepr
-from langfun.core.structured.schema import ValuePythonRepr
 from langfun.core.structured.schema import schema_repr
 from langfun.core.structured.schema import source_form
 from langfun.core.structured.schema import value_repr
@@ -56,26 +50,17 @@ from langfun.core.structured.mapping import Mapping
 from langfun.core.structured.mapping import MappingError
 from langfun.core.structured.mapping import MappingExample
-from langfun.core.structured.parsing import ParseStructure
-from langfun.core.structured.parsing import ParseStructureJson
-from langfun.core.structured.parsing import ParseStructurePython
 from langfun.core.structured.parsing import parse
 from langfun.core.structured.parsing import call
-from langfun.core.structured.prompting import QueryStructure
-from langfun.core.structured.prompting import QueryStructureJson
-from langfun.core.structured.prompting import QueryStructurePython
-from langfun.core.structured.prompting import query
-from langfun.core.structured.prompting import query_prompt
-from langfun.core.structured.prompting import query_output
-from langfun.core.structured.prompting import query_reward
-from langfun.core.structured.prompting import QueryInvocation
-from langfun.core.structured.prompting import track_queries
-from langfun.core.structured.description import DescribeStructure
-from langfun.core.structured.description import describe
+from langfun.core.structured.querying import track_queries
+from langfun.core.structured.querying import QueryInvocation
+from langfun.core.structured.querying import query
+from langfun.core.structured.querying import query_prompt
+from langfun.core.structured.querying import query_output
+from langfun.core.structured.querying import query_reward
-from langfun.core.structured.completion import CompleteStructure
+from langfun.core.structured.description import describe
 from langfun.core.structured.completion import complete
 from langfun.core.structured.scoring import score

langfun/core/structured/completion.py CHANGED Viewed

@@ -21,7 +21,7 @@ from langfun.core.structured import schema as schema_lib
 import pyglove as pg
-class CompleteStructure(mapping.Mapping):
+class _CompleteStructure(mapping.Mapping):
   """Complete structure by filling the missing fields."""
   input: Annotated[
@@ -241,7 +241,7 @@ def complete(
   Returns:
     The result based on the schema.
   """
-  t = CompleteStructure(
+  t = _CompleteStructure(
       input=schema_lib.mark_missing(input_value),
       default=default,
       examples=examples,

langfun/core/structured/completion_test.py CHANGED Viewed

@@ -46,7 +46,7 @@ class TripPlan(pg.Object):
 class CompleteStructureTest(unittest.TestCase):
   def test_render_no_examples(self):
-    l = completion.CompleteStructure()
+    l = completion._CompleteStructure()
     input_value = schema_lib.mark_missing(
         TripPlan.partial(
             place='San Francisco',
@@ -120,7 +120,7 @@ class CompleteStructureTest(unittest.TestCase):
     )
   def test_render_no_class_definitions(self):
-    l = completion.CompleteStructure()
+    l = completion._CompleteStructure()
     input_value = schema_lib.mark_missing(
         TripPlan.partial(
             place='San Francisco',
@@ -200,7 +200,7 @@ class CompleteStructureTest(unittest.TestCase):
     )
   def test_render_with_examples(self):
-    l = completion.CompleteStructure()
+    l = completion._CompleteStructure()
     input_value = schema_lib.mark_missing(
         TripPlan.partial(
             place='San Francisco',
@@ -411,7 +411,7 @@ class CompleteStructureTest(unittest.TestCase):
             modalities.Image.from_bytes(b'image_of_elephant'),
         )
     )
-    l = completion.CompleteStructure(
+    l = completion._CompleteStructure(
         input=input_value,
         examples=[
             mapping.MappingExample(

langfun/core/structured/description.py CHANGED Viewed

@@ -22,7 +22,7 @@ import pyglove as pg
 @pg.use_init_args(['examples'])
-class DescribeStructure(mapping.Mapping):
+class _DescribeStructure(mapping.Mapping):
   """Describe a structured value in natural language."""
   input_title = 'PYTHON_OBJECT'
@@ -106,7 +106,7 @@ def describe(
   Returns:
     The parsed result based on the schema.
   """
-  return DescribeStructure(
+  return _DescribeStructure(
       input=value,
       context=context,
       examples=examples or default_describe_examples(),

langfun/core/structured/description_test.py CHANGED Viewed

@@ -36,7 +36,7 @@ class Itinerary(pg.Object):
 class DescribeStructureTest(unittest.TestCase):
   def test_render(self):
-    l = description_lib.DescribeStructure(
+    l = description_lib._DescribeStructure(
         input=Itinerary(
             day=1,
             type='daytime',
@@ -137,7 +137,7 @@ class DescribeStructureTest(unittest.TestCase):
         ],
         hotel=None,
     )
-    l = description_lib.DescribeStructure(
+    l = description_lib._DescribeStructure(
         input=value, context='1 day itinerary to SF'
     )
     self.assertEqual(
@@ -187,7 +187,7 @@ class DescribeStructureTest(unittest.TestCase):
         ],
         hotel=None,
     )
-    l = description_lib.DescribeStructure(input=value)
+    l = description_lib._DescribeStructure(input=value)
     self.assertEqual(
         l.render().text,
         inspect.cleandoc("""

langfun/core/structured/function_generation.py CHANGED Viewed

@@ -21,7 +21,7 @@ from typing import Any, Callable, Literal, Optional, Tuple
 from langfun.core import language_model
 from langfun.core import template
 from langfun.core.coding import python
-from langfun.core.structured import prompting
+from langfun.core.structured import querying
 import pyglove as pg
@@ -39,7 +39,7 @@ def unittest_gen(signature, lm, num_retries=1):
   unittest_examples = None
   for _ in range(num_retries):
-    r = prompting.query(
+    r = querying.query(
         PythonFunctionSignature(signature=signature),
         list[UnitTest],
         lm=lm,
@@ -145,7 +145,7 @@ def _function_gen(
   last_error = None
   for _ in range(num_retries):
     try:
-      source_code = prompting.query(
+      source_code = querying.query(
           PythonFunctionPrompt(signature=signature), lm=lm
       )
       f = python.evaluate(source_code, global_vars=context)

langfun/core/structured/parsing.py CHANGED Viewed

@@ -16,13 +16,13 @@ from typing import Any, Callable, Type, Union
 import langfun.core as lf
 from langfun.core.structured import mapping
-from langfun.core.structured import prompting
+from langfun.core.structured import querying
 from langfun.core.structured import schema as schema_lib
 import pyglove as pg
 @lf.use_init_args(['schema', 'default', 'examples'])
-class ParseStructure(mapping.Mapping):
+class _ParseStructure(mapping.Mapping):
   """Parse an object out from a natural language text."""
   context_title = 'USER_REQUEST'
@@ -37,7 +37,7 @@ class ParseStructure(mapping.Mapping):
   ]
-class ParseStructureJson(ParseStructure):
+class _ParseStructureJson(_ParseStructure):
   """Parse an object out from a NL text using JSON as the protocol."""
   preamble = """
@@ -53,7 +53,7 @@ class ParseStructureJson(ParseStructure):
   output_title = 'JSON'
-class ParseStructurePython(ParseStructure):
+class _ParseStructurePython(_ParseStructure):
   """Parse an object out from a NL text using Python as the protocol."""
   preamble = """
@@ -87,7 +87,7 @@ def parse(
     returns_message: bool = False,
     **kwargs,
 ) -> Any:
-  """Parse a natural langugage message based on schema.
+  """Parse a natural language message based on schema.
   Examples:
@@ -271,7 +271,7 @@ def call(
     return lm_output if returns_message else lm_output.text
   # Call `parsing_lm` for structured parsing.
-  parsing_message = prompting.query(
+  parsing_message = querying.query(
       lm_output.text,
       schema,
       examples=parsing_examples,
@@ -293,11 +293,11 @@ def call(
 def _parse_structure_cls(
     protocol: schema_lib.SchemaProtocol,
-) -> Type[ParseStructure]:
+) -> Type[_ParseStructure]:
   if protocol == 'json':
-    return ParseStructureJson
+    return _ParseStructureJson
   elif protocol == 'python':
-    return ParseStructurePython
+    return _ParseStructurePython
   else:
     raise ValueError(f'Unknown protocol: {protocol!r}.')

langfun/core/structured/parsing_test.py CHANGED Viewed

@@ -37,7 +37,7 @@ class Itinerary(pg.Object):
 class ParseStructurePythonTest(unittest.TestCase):
   def test_render_no_examples(self):
-    l = parsing.ParseStructurePython(int)
+    l = parsing._ParseStructurePython(int)
     m = lf.AIMessage('12 / 6 + 2 = 4')
     self.assertEqual(
         l.render(input=m, context='Compute 12 / 6 + 2.').text,
@@ -62,7 +62,7 @@ class ParseStructurePythonTest(unittest.TestCase):
     )
   def test_render_no_context(self):
-    l = parsing.ParseStructurePython(int)
+    l = parsing._ParseStructurePython(int)
     m = lf.AIMessage('12 / 6 + 2 = 4')
     self.assertEqual(
@@ -85,7 +85,7 @@ class ParseStructurePythonTest(unittest.TestCase):
     )
   def test_render(self):
-    l = parsing.ParseStructurePython(
+    l = parsing._ParseStructurePython(
         int,
         examples=[
             mapping.MappingExample(
@@ -212,7 +212,7 @@ class ParseStructurePythonTest(unittest.TestCase):
         ),
         override_attrs=True,
     ):
-      l = parsing.ParseStructurePython(
+      l = parsing._ParseStructurePython(
           [Itinerary],
           examples=[
               mapping.MappingExample(
@@ -295,7 +295,7 @@ class ParseStructurePythonTest(unittest.TestCase):
 class ParseStructureJsonTest(unittest.TestCase):
   def test_render_no_examples(self):
-    l = parsing.ParseStructureJson(int)
+    l = parsing._ParseStructureJson(int)
     m = lf.AIMessage('12 / 6 + 2 = 4')
     self.assertEqual(
         l.render(input=m, context='Compute 12 / 6 + 2.').text,
@@ -320,7 +320,7 @@ class ParseStructureJsonTest(unittest.TestCase):
     )
   def test_render_no_context(self):
-    l = parsing.ParseStructureJson(int)
+    l = parsing._ParseStructureJson(int)
     m = lf.AIMessage('12 / 6 + 2 = 4')
     self.assertEqual(
@@ -343,7 +343,7 @@ class ParseStructureJsonTest(unittest.TestCase):
     )
   def test_render(self):
-    l = parsing.ParseStructureJson(
+    l = parsing._ParseStructureJson(
         int,
         examples=[
             mapping.MappingExample(
@@ -504,7 +504,7 @@ class ParseStructureJsonTest(unittest.TestCase):
         override_attrs=True,
     ):
       message = lf.LangFunc(lm_input)()
-      l = parsing.ParseStructureJson(
+      l = parsing._ParseStructureJson(
           [Itinerary],
           examples=[
               mapping.MappingExample(

langfun 0.1.2.dev202412140804__py3-none-any.whl → 0.1.2.dev202412170805__py3-none-any.whl

langfun 0.1.2.dev202412140804py3-none-any.whl → 0.1.2.dev202412170805py3-none-any.whl