PyPI - langfun - Versions diffs - 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

langfun/__init__.py +22 -2
langfun/core/__init__.py +17 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -28
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +69 -2
langfun/core/component_test.py +54 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +17 -0
langfun/core/eval/base.py +767 -140
langfun/core/eval/base_test.py +238 -53
langfun/core/eval/matching.py +80 -76
langfun/core/eval/matching_test.py +19 -9
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +37 -28
langfun/core/eval/scoring_test.py +21 -3
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +3 -21
langfun/core/langfunc_test.py +26 -8
langfun/core/language_model.py +686 -48
langfun/core/language_model_test.py +681 -44
langfun/core/llms/__init__.py +100 -12
langfun/core/llms/anthropic.py +488 -0
langfun/core/llms/anthropic_test.py +235 -0
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +88 -28
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +39 -26
langfun/core/llms/fake_test.py +136 -11
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -197
langfun/core/llms/groq.py +276 -0
langfun/core/llms/groq_test.py +64 -0
langfun/core/llms/llama_cpp.py +15 -40
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +436 -226
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +35 -174
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -23
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +15 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +9 -8
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +278 -0
langfun/core/structured/function_generation_test.py +399 -0
langfun/core/structured/mapping.py +150 -46
langfun/core/structured/mapping_test.py +105 -0
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +71 -22
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +545 -60
langfun/core/structured/schema.py +208 -99
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +133 -34
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +240 -11
langfun/core/template_test.py +146 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +14 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -217
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240330.dist-info/METADATA +0 -99
langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/agentic/action_eval.py ADDED Viewed

@@ -0,0 +1,150 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Evaluation (v1) for Langfun agentic actions."""
+import io
+import os
+from typing import Annotated, Any
+import langfun.core as lf
+from langfun.core import eval as lf_eval
+from langfun.core.agentic import action as action_lib
+import pyglove as pg
+class ActionEval(lf.eval.v2.Evaluation):
+  """Agent evaluation."""
+  action_args: Annotated[
+      dict[str, Any],
+      'Arguments to call the action.'
+  ] = {}
+  def process(self, example: pg.Dict) -> tuple[str, dict[str, Any]]:
+    action = example.action
+    session = action_lib.Session()
+    with lf.logging.use_log_level('fatal'):
+      action(session=session, **self.action_args)
+    return session.final_result, dict(session=session)
+#
+# TODO(daiyip): Remove V1 once V2 is fully launched.
+#
+@pg.functor()
+def _dummy_schema():
+  return int
+class ExampleView(pg.Object):
+  id: int
+  input: Any
+  output: Any
+  error: str | None = None
+class ActionEvalV1(lf_eval.Matching):
+  """Base class for action evaluations.
+  The input function should returns a list of pg.Dict, with `action` and
+  `groundtruth` fields.
+  """
+  # We override the schema and prompt to dummy values since they are not used.
+  schema_fn = _dummy_schema()
+  prompt = '<unused>'
+  def process(self, example: pg.Dict, **kwargs):
+    action = example.action
+    session = action_lib.Session()
+    action(session=session, lm=self.lm, **kwargs)
+    return session.as_message()
+  def answer(self, output: Any, example: pg.Dict) -> Any:
+    return output
+  def groundtruth(self, example: Any) -> Any:
+    return example.groundtruth
+  def audit(
+      self,
+      example_idx: int,
+      example: Any,
+      message: lf.Message | None,
+      error: Exception | None = None,
+      dryrun: bool = False,
+  ):
+    super().audit(example_idx, example, message, error, dryrun)
+    # Write each example to HTML.
+    if not dryrun and self.dir:
+      def _save_html():
+        ExampleView(
+            example_idx,
+            example,
+            None if message is None else message.result,
+            error
+        ).to_html(
+            collapse_level=None,
+            enable_summary_tooltip=False,
+        ).save(
+            os.path.join(self.dir, f'example_{example_idx}.html')
+        )
+      # Write HTML in a separate thread to avoid blocking the main thread.
+      lf.concurrent.get_executor(
+          'background_eval_io', max_workers=16
+      ).submit(_save_html)
+  def _render_mismatches(self, s: io.StringIO) -> None:
+    s.write('<h2> Mismatches (Incorrect) </h2>')
+    first_url = None
+    mismatched_ids = sorted([
+        example_idx for example_idx, *_ in self.mismatches
+    ])
+    for example_idx in mismatched_ids:
+      url = os.path.join(self.dir, f'example_{example_idx}.html')
+      if first_url is None:
+        first_url = url
+      s.write(
+          f'<a href="{url}" style="margin-right: 10px" target="example_view">'
+          f'{example_idx}</a> '
+      )
+    if first_url:
+      s.write(
+          '<iframe style="border:0;width:100%;height:100%" name="example_view"'
+          f'src="{first_url}" title="Example View"></iframe>'
+      )
+    else:
+      s.write('No mismatches found.')
+  def _render_matches(self, s: io.StringIO) -> None:
+    s.write('<h2> Matches (correct) </h2>')
+    first_url = None
+    matched_ids = sorted([
+        example_idx for example_idx, *_ in self.matches
+    ])
+    for example_idx in matched_ids:
+      url = os.path.join(self.dir, f'example_{example_idx}.html')
+      if first_url is None:
+        first_url = url
+      s.write(
+          f'<a href="{url}" style="margin-right: 10px">{example_idx}</a> '
+      )
+    if first_url:
+      s.write(
+          '<iframe style="border:0;width:100%;height:100%" name="example_view"'
+          f'src="{first_url}" title="Example View"></iframe>'
+      )
+    else:
+      s.write('No matches found.')

langfun/core/agentic/action_eval_test.py ADDED Viewed

@@ -0,0 +1,109 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for action evaluation."""
+import os
+import tempfile
+import unittest
+from langfun.core import eval as lf_eval
+from langfun.core import llms as lf_llms
+from langfun.core.agentic import action as action_lib
+from langfun.core.agentic import action_eval
+import pyglove as pg
+class Foo(action_lib.Action):
+  x: int
+  def call(self, session, **kwargs):
+    del session, kwargs
+    return self.x
+@pg.functor()
+def foo_inputs():
+  return [
+      pg.Dict(action=Foo(1), groundtruth=1),
+      pg.Dict(action=Foo(2), groundtruth=1),
+  ]
+class ActionEvalTest(unittest.TestCase):
+  def test_basics(self):
+    class FooEval(action_eval.ActionEval):
+      inputs = foo_inputs()
+      metrics = [lf_eval.v2.metrics.Match()]
+      action_args = dict(
+          lm=lf_llms.Echo()
+      )
+    s = FooEval()
+    root_dir = os.path.join(tempfile.gettempdir(), 'foo_eval')
+    s.run(root_dir, plugins=[])
+    self.assertEqual(s.metrics[0].matches, 0.5)
+    self.assertEqual(s.metrics[0].mismatches, 0.5)
+class ActionEvalV1Test(unittest.TestCase):
+  def test_basics(self):
+    class FooEval(action_eval.ActionEvalV1):
+      lm = lf_llms.Echo()
+      inputs = foo_inputs()
+    s = FooEval()
+    result = s.run(summary=False)
+    pg.print(result)
+    self.assertEqual(
+        result,
+        dict(
+            experiment_setup=dict(
+                id=s.id,
+                dir=None,
+                model='Echo',
+                prompt_template='<unused>',
+                method='query',
+                schema_fn='_dummy_schema()'
+            ),
+            cache_stats=dict(
+                use_cache=True,
+                num_queries=0,
+                num_hits=0,
+                num_updates=0,
+            ),
+            metrics=dict(
+                total=2,
+                failures=0,
+                failure_rate=0.0,
+                oop_failures=0,
+                oop_failure_rate=0.0,
+                non_oop_failures=0,
+                non_oop_failure_rate=0.0,
+                failure_breakdown={},
+                num_matches=0,
+                match_rate=0.0,
+                num_mismatches=2,
+                mismatch_rate=1.0
+            ),
+            usage=None
+        )
+    )
+if __name__ == '__main__':
+  unittest.main()

langfun/core/agentic/action_test.py ADDED Viewed

@@ -0,0 +1,136 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for base action."""
+import unittest
+import langfun.core as lf
+from langfun.core.agentic import action as action_lib
+from langfun.core.llms import fake
+import langfun.core.structured as lf_structured
+import pyglove as pg
+class SessionTest(unittest.TestCase):
+  def test_basics(self):
+    test = self
+    class Bar(action_lib.Action):
+      def call(self, session, *, lm, **kwargs):
+        test.assertIs(session.current_action.action, self)
+        session.info('Begin Bar')
+        session.query('bar', lm=lm)
+        session.add_metadata(note='bar')
+        return 2
+    class Foo(action_lib.Action):
+      x: int
+      def call(self, session, *, lm, **kwargs):
+        test.assertIs(session.current_action.action, self)
+        with session.phase('prepare'):
+          session.info('Begin Foo', x=1)
+          session.query('foo', lm=lm)
+        with session.track_queries():
+          self.make_additional_query(lm)
+        session.add_metadata(note='foo')
+        return self.x + Bar()(session, lm=lm)
+      def make_additional_query(self, lm):
+        lf_structured.query('additional query', lm=lm)
+    lm = fake.StaticResponse('lm response')
+    foo = Foo(1)
+    self.assertEqual(foo(lm=lm), 3)
+    session = foo.session
+    self.assertIsNotNone(session)
+    self.assertIsInstance(session.root.action, action_lib.RootAction)
+    self.assertIs(session.current_action, session.root)
+    #
+    # Inspecting the root invocation.
+    #
+    root = session.root
+    self.assertEqual(len(root.execution.items), 1)
+    self.assertIs(root.execution.items[0].action, foo)
+    self.assertTrue(root.execution.has_started)
+    self.assertTrue(root.execution.has_stopped)
+    self.assertGreater(root.execution.elapse, 0)
+    self.assertEqual(root.result, 3)
+    self.assertEqual(root.metadata, dict(note='foo'))
+    # The root space should have one action (foo), no queries, and no logs.
+    self.assertEqual(len(list(root.actions)), 1)
+    self.assertEqual(len(list(root.queries)), 0)
+    self.assertEqual(len(list(root.logs)), 0)
+    # 1 query from Bar and 2 from Foo.
+    self.assertEqual(len(list(root.all_queries)), 3)
+    # 1 log from Bar and 1 from Foo.
+    self.assertEqual(len(list(root.all_logs)), 2)
+    self.assertEqual(root.usage_summary.total.num_requests, 3)
+    # Inspecting the top-level action (Foo)
+    foo_invocation = root.execution.items[0]
+    self.assertEqual(len(foo_invocation.execution.items), 3)
+    # Prepare phase.
+    prepare_phase = foo_invocation.execution.items[0]
+    self.assertIsInstance(
+        prepare_phase, action_lib.ExecutionTrace
+    )
+    self.assertEqual(len(prepare_phase.items), 2)
+    self.assertTrue(prepare_phase.has_started)
+    self.assertTrue(prepare_phase.has_stopped)
+    self.assertEqual(prepare_phase.usage_summary.total.num_requests, 1)
+    # Tracked queries.
+    query_invocation = foo_invocation.execution.items[1]
+    self.assertIsInstance(query_invocation, lf_structured.QueryInvocation)
+    self.assertIs(query_invocation.lm, lm)
+    # Invocation to Bar.
+    bar_invocation = foo_invocation.execution.items[2]
+    self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
+    self.assertIsInstance(bar_invocation.action, Bar)
+    self.assertEqual(bar_invocation.result, 2)
+    self.assertEqual(bar_invocation.metadata, dict(note='bar'))
+    self.assertEqual(len(bar_invocation.execution.items), 2)
+    # Save to HTML
+    self.assertIn('result', session.to_html().content)
+    # Save session to JSON
+    json_str = session.to_json_str(save_ref_value=True)
+    self.assertIsInstance(pg.from_json_str(json_str), action_lib.Session)
+  def test_log(self):
+    session = action_lib.Session()
+    session.debug('hi', x=1, y=2)
+    session.info('hi', x=1, y=2)
+    session.warning('hi', x=1, y=2)
+    session.error('hi', x=1, y=2)
+    session.fatal('hi', x=1, y=2)
+  def test_as_message(self):
+    session = action_lib.Session()
+    self.assertIsInstance(session.as_message(), lf.AIMessage)
+if __name__ == '__main__':
+  unittest.main()

langfun/core/coding/python/__init__.py CHANGED Viewed

@@ -16,19 +16,13 @@
 # pylint: disable=g-bad-import-order
 # pylint: disable=g-importing-member
-from langfun.core.coding.python.errors import CodeError
-from langfun.core.coding.python.permissions import CodePermission
-from langfun.core.coding.python.permissions import permission
-from langfun.core.coding.python.permissions import get_permission
-from langfun.core.coding.python.parsing import PythonCodeParser
+# Expose from `lf.coding` as aliases for `pg.coding` for backward compatibility.
+from langfun.core.coding.python.execution import CodeError
+from langfun.core.coding.python.execution import CodePermission
 from langfun.core.coding.python.execution import context
-from langfun.core.coding.python.execution import get_context
+from langfun.core.coding.python.parsing import clean
 from langfun.core.coding.python.execution import evaluate
-from langfun.core.coding.python.execution import sandbox_call
-from langfun.core.coding.python.execution import call
 from langfun.core.coding.python.execution import run
 from langfun.core.coding.python.generation import PythonCode

langfun/core/coding/python/correction.py CHANGED Viewed

@@ -12,10 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Python code error correction."""
-import re
 from typing import Any
 import langfun.core as lf
-from langfun.core.coding.python import errors
 from langfun.core.coding.python import execution
 import pyglove as pg
@@ -31,11 +29,6 @@ class CorrectedCode(pg.Object):
   corrected_code: str
-def remove_docstrings(code):
-  pattern = re.compile(r"(def .+?:\s*?)('''|\"\"\")((.|\s)*?)(\2)", re.DOTALL)
-  return pattern.sub(r"\1", code)
 def run_with_correction(
     code: str,
     error: str | None = None,
@@ -46,6 +39,7 @@ def run_with_correction(
     sandbox: bool | None = None,
     timeout: int | None = 5,
     returns_code: bool = False,
+    returns_stdout: bool = False,
     outputs_intermediate: bool = False,
 ) -> Any | tuple[Any, str]:
   """Correct code with a language model via self-play.
@@ -68,6 +62,7 @@ def run_with_correction(
       timeout. Applicable only when sandbox is set to True.
     returns_code: If True, the return value is a tuple of (result, final code).
       Otherwise the return value is the result only.
+    returns_stdout: If True, the stdout (a str) will be returned.
     outputs_intermediate: If True, intermediate output will be outputted as a
       dict, with the last line's value accessible by key '__result__'. Otherwise
       the value of the last line will be returned.
@@ -82,29 +77,33 @@ def run_with_correction(
   # Delay import at runtime to avoid circular depenency.
   # pylint: disable=g-import-not-at-top
   # pytype: disable=import-error
-  from langfun.core.structured import prompting
+  from langfun.core.structured import querying
   # pytype: enable=import-error
   # pylint: enable=g-import-not-at-top
-  code = remove_docstrings(code)
   if max_attempts == 0:
-    result = execution.run(
-        code,
-        global_vars=global_vars,
-        sandbox=sandbox,
-        timeout=timeout,
-        outputs_intermediate=outputs_intermediate,
+    result = _maybe_custom_validate(
+        execution.run(
+            code,
+            global_vars=global_vars,
+            sandbox=sandbox,
+            timeout=timeout,
+            returns_stdout=returns_stdout,
+            outputs_intermediate=outputs_intermediate,
+        )
     )
     return (result, code) if returns_code else result
   def result_and_error(code: str) -> tuple[Any, str | None]:
     try:
-      result = execution.run(
-          code,
-          global_vars=global_vars,
-          sandbox=sandbox,
-          timeout=timeout,
-          outputs_intermediate=outputs_intermediate,
+      result = _maybe_custom_validate(
+          execution.run(
+              code,
+              global_vars=global_vars,
+              sandbox=sandbox,
+              timeout=timeout,
+              outputs_intermediate=outputs_intermediate,
+          )
       )
       return (result, None)
     except Exception as e:  # pylint: disable=broad-exception-caught
@@ -122,10 +121,10 @@ def run_with_correction(
     # structure.
     try:
       # Disable autofix for code correction to avoid recursion.
-      correction = prompting.query(
+      correction = querying.query(
           CodeWithError(code=code, error=error), CorrectedCode, lm=lm, autofix=0
       )
-    except errors.CodeError:
+    except pg.coding.CodeError:
       break
     code = correction.corrected_code
@@ -133,7 +132,7 @@ def run_with_correction(
     if error is None:
       return (result, code) if returns_code else result
-  raise errors.CodeError(
+  raise pg.coding.CodeError(
       code,
       RuntimeError(
           f"Cannot correct code after {num_attempts} attempts. "
@@ -191,9 +190,19 @@ def correct(
 def _error_feedback_str(error: Exception) -> str:
   """Returns the error str for feedback."""
-  if isinstance(error, errors.CodeError):
-    return lf.text_formatting.decolored(
-        error.format(include_complete_code=False)
-    )
+  if isinstance(error, pg.coding.CodeError):
+    return pg.decolor(error.format(include_complete_code=False))
   else:
     return f"Encountered {error.__class__.__name__}: {error}"
+def _maybe_custom_validate(result: Any) -> Any:
+  """Apply custom validation through __validate_generation__ method."""
+  if isinstance(result, dict) and "__result__" in result:
+    r = result["__result__"]
+  else:
+    r = result
+  if hasattr(r, "__validate__"):
+    r.__validate__()
+  return result

langfun/core/coding/python/correction_test.py CHANGED Viewed

@@ -17,8 +17,8 @@ import inspect
 import unittest
 from langfun.core.coding.python import correction
-from langfun.core.coding.python import errors
 from langfun.core.llms import fake
+import pyglove as pg
 class RunWithCorrectionTest(unittest.TestCase):
@@ -45,6 +45,32 @@ class RunWithCorrectionTest(unittest.TestCase):
     )
     self.assertEqual(result, 4)
+  def test_run_with_correction_upon_custom_validation(self):
+    class Foo(pg.Object):
+      x: int
+      def __validate__(self):
+        if self.x > 1:
+          raise ValueError('value should be less or equal than 1.')
+        if self.x < 0:
+          self.rebind(x=0, skip_notification=True)
+    result = correction.run_with_correction(
+        inspect.cleandoc("""
+            Foo(x=2)
+            """),
+        global_vars=dict(Foo=Foo),
+        lm=fake.StaticSequence([
+            inspect.cleandoc("""
+                CorrectedCode(
+                    corrected_code='Foo(x=-1)',
+                )
+                """),
+        ]),
+    )
+    self.assertEqual(result, Foo(0))
   def test_run_without_correction(self):
     result = correction.run_with_correction(
         inspect.cleandoc("""
@@ -55,7 +81,7 @@ class RunWithCorrectionTest(unittest.TestCase):
         max_attempts=0,
     )
     self.assertEqual(result, 4)
-    with self.assertRaises(errors.CodeError):
+    with self.assertRaises(pg.coding.CodeError):
       correction.run_with_correction(
           inspect.cleandoc("""
             x = 1,
@@ -98,7 +124,7 @@ class CorrectTest(unittest.TestCase):
   def test_correct_reaching_limit(self):
     with self.assertRaisesRegex(
-        errors.CodeError, 'Cannot correct code after 1 attempts'
+        pg.coding.CodeError, 'Cannot correct code after 1 attempts'
     ):
       correction.correct(
           inspect.cleandoc("""

langfun 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl