PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

langfun/__init__.py +20 -2
langfun/core/__init__.py +16 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -21
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +63 -2
langfun/core/component_test.py +53 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +16 -1
langfun/core/eval/base.py +622 -174
langfun/core/eval/base_test.py +200 -54
langfun/core/eval/matching.py +63 -76
langfun/core/eval/matching_test.py +17 -8
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +26 -26
langfun/core/eval/scoring_test.py +19 -2
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +4 -17
langfun/core/langfunc_test.py +22 -6
langfun/core/language_model.py +577 -39
langfun/core/language_model_test.py +470 -56
langfun/core/llms/__init__.py +87 -16
langfun/core/llms/anthropic.py +312 -87
langfun/core/llms/anthropic_test.py +71 -3
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +53 -2
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +11 -7
langfun/core/llms/fake_test.py +14 -0
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -202
langfun/core/llms/groq.py +160 -144
langfun/core/llms/groq_test.py +31 -137
langfun/core/llms/llama_cpp.py +15 -42
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +395 -203
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +30 -395
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -26
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +12 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +7 -6
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +60 -27
langfun/core/structured/function_generation_test.py +72 -2
langfun/core/structured/mapping.py +97 -47
langfun/core/structured/mapping_test.py +90 -2
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +53 -9
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
langfun/core/structured/schema.py +204 -97
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_test.py +130 -29
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +115 -1
langfun/core/template_test.py +71 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +10 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -238
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/modalities/video.py CHANGED Viewed

@@ -13,35 +13,18 @@
 # limitations under the License.
 """Video modality."""
-import base64
-from typing import cast
+import functools
 from langfun.core.modalities import mime
-class Video(mime.MimeType):
-  """Base class for Video."""
+class Video(mime.Mime):
+  """Video."""
-  @property
-  def video_format(self) -> str:
-    return cast(str, self.mime_type.lstrip('video/'))
-  @property
-  def mime_type(self) -> str:
-    # TODO(daiyip): after cl/619658455, LaunchPad binaries cannot import `magic`
-    # correctly. This is to mitigate the issue for major Langfun users who do
-    # not use Video. We shall move this import out once the issue is fixed.
-    import magic  # pylint: disable=g-import-not-at-top
+  MIME_PREFIX = 'video'
-    video_mime_type = magic.from_buffer(self.to_bytes(), mime=True)
-    if 'video/' not in video_mime_type:
-      raise ValueError(f'Not a video: {video_mime_type!r}.')
-    return video_mime_type
+  @functools.cached_property
+  def video_format(self) -> str:
+    return self.mime_type.removeprefix(self.MIME_PREFIX + '/')
-  def _repr_html_(self) -> str:
-    if self.uri and self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
-      return f'<video controls> <source src="{self.uri}"> </video>'
-    video_raw = base64.b64encode(self.to_bytes()).decode()
-    return (
-        '<video controls> <source'
-        f' src="data:video/{self.video_format};base64,{video_raw}"> </video>'
-    )
+  def _mime_control_for(self, uri: str) -> str:
+    return f'<video controls> <source src="{uri}"> </video>'

langfun/core/modalities/video_test.py CHANGED Viewed

@@ -38,12 +38,12 @@ class VideoContentTest(unittest.TestCase):
     video = video_lib.Video.from_bytes(mp4_bytes)
     self.assertEqual(video.mime_type, 'video/mp4')
     self.assertEqual(video.video_format, 'mp4')
-    self.assertIn('data:video/mp4;base64,', video._repr_html_())
+    self.assertIn('data:video/mp4;base64,', video._raw_html())
     self.assertEqual(video.to_bytes(), mp4_bytes)
   def test_bad_video(self):
     video = video_lib.Video.from_bytes(b'bad')
-    with self.assertRaisesRegex(ValueError, 'Not a video'):
+    with self.assertRaisesRegex(ValueError, 'Expected MIME type'):
       _ = video.video_format
@@ -56,7 +56,7 @@ class VideoFileTest(unittest.TestCase):
       self.assertEqual(video.video_format, 'mp4')
       self.assertEqual(video.mime_type, 'video/mp4')
       self.assertEqual(
-          video._repr_html_(),
+          video._raw_html(),
           '<video controls> <source src="http://mock/web/a.mp4"> </video>',
       )
       self.assertEqual(video.to_bytes(), mp4_bytes)

langfun/core/modality.py CHANGED Viewed

@@ -14,6 +14,8 @@
 """Interface for modality (e.g. Image, Video, etc.)."""
 import abc
+import functools
+import hashlib
 from typing import Any, ContextManager
 from langfun.core import component
 import pyglove as pg
@@ -29,11 +31,16 @@ def format_modality_as_ref(enabled: bool = True) -> ContextManager[None]:
   )
-class Modality(component.Component):
+class Modality(component.Component, pg.views.HtmlTreeView.Extension):
   """Base class for multimodal object."""
-  REF_START = '{{'
-  REF_END = '}}'
+  REF_START = '<<[['
+  REF_END = ']]>>'
+  def _on_bound(self):
+    super()._on_bound()
+    # Invalidate cached hash if modality member is changed.
+    self.__dict__.pop('hash', None)
   def format(self, *args, **kwargs) -> str:
     if self.referred_name is None or not pg.object_utils.thread_local_get(
@@ -42,10 +49,22 @@ class Modality(component.Component):
       return super().format(*args, **kwargs)
     return Modality.text_marker(self.referred_name)
+  def __str_kwargs__(self) -> dict[str, Any]:
+    # For modality objects, we don't want to use markdown format when they
+    # are rendered as parts of the prompt.
+    kwargs = super().__str_kwargs__()
+    kwargs.pop('markdown', None)
+    return kwargs
   @abc.abstractmethod
   def to_bytes(self) -> bytes:
     """Returns content in bytes."""
+  @functools.cached_property
+  def hash(self) -> str:
+    """Returns a 8-byte MD5 hash as the identifier for this modality object."""
+    return hashlib.md5(self.to_bytes()).hexdigest()[:8]
   @classmethod
   def text_marker(cls, var_name: str) -> str:
     """Returns a marker in the text for this object."""
@@ -108,3 +127,7 @@ class ModalityRef(pg.Object, pg.typing.CustomTyping):
         return ModalityRef(name=value.sym_path + k)
       return v
     return value.clone().rebind(_placehold, raise_on_no_change=False)
+class ModalityError(RuntimeError):  # pylint: disable=g-bad-exception-name
+  """Exception raised when modality is not supported."""

langfun/core/modality_test.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tests for modality."""
 from typing import Any
 import unittest
@@ -32,12 +31,13 @@ class ModalityTest(unittest.TestCase):
     v = CustomModality('a')
     self.assertIsNone(v.referred_name)
     self.assertEqual(str(v), "CustomModality(\n  content = 'a'\n)")
+    self.assertEqual(v.hash, '0cc175b9')
     _ = pg.Dict(metadata=pg.Dict(x=pg.Dict(metadata=pg.Dict(y=v))))
     self.assertEqual(v.referred_name, 'x.metadata.y')
     self.assertEqual(str(v), "CustomModality(\n  content = 'a'\n)")
     with modality.format_modality_as_ref():
-      self.assertEqual(str(v), '{{x.metadata.y}}')
+      self.assertEqual(str(v), '<<[[x.metadata.y]]>>')
 class ModalityRefTest(unittest.TestCase):

langfun/core/sampling.py CHANGED Viewed

@@ -28,14 +28,14 @@ def sweep(
     *,
     max_workers: int = 32,
     silence_on_errors: Union[
-        Type[Exception], Tuple[Type[Exception]], None
+        Type[BaseException], Tuple[Type[BaseException], ...], None
     ] = None,
     ignore_examples_with_errors: bool = True,
     **kwargs,
 ) -> Iterator[
     Tuple[
-        message_lib.Message | Exception,              # LM input.
-        Union[message_lib.Message, Exception, None],  # LM output.
+        message_lib.Message | BaseException,              # LM input.
+        Union[message_lib.Message, BaseException, None],  # LM output.
     ],
 ]:
   """Sweeps the input/output of this LangFunc concurrently.
@@ -73,15 +73,15 @@ def random_sample(
     *,
     max_workers: int = 32,
     silence_on_errors: Union[
-        Type[Exception], Tuple[Type[Exception]], None
+        Type[BaseException], Tuple[Type[BaseException], ...], None
     ] = None,
     ignore_examples_with_errors: bool = True,
     seed: int | None = None,
     **kwargs,
 ) -> Iterator[
     Tuple[
-        message_lib.Message | Exception,              # LM input.
-        Union[message_lib.Message, Exception, None],  # LM output.
+        message_lib.Message | BaseException,              # LM input.
+        Union[message_lib.Message, BaseException, None],  # LM output.
     ],
 ]:
   """Random samples the input/output of this LangFunc concurrently.
@@ -121,14 +121,14 @@ def _concurrent_sample(
     *,
     max_workers: int = 32,
     silence_on_errors: Union[
-        Type[Exception], Tuple[Type[Exception]], None
+        Type[BaseException], Tuple[Type[BaseException], ...], None
     ] = None,
     ignore_examples_with_errors: bool = True,
     **kwargs,
 ) -> Generator[
     Tuple[
-        message_lib.Message | Exception,              # LM input.
-        Union[message_lib.Message, Exception, None],  # LM output.
+        message_lib.Message | BaseException,              # LM input.
+        Union[message_lib.Message, BaseException, None],  # LM output.
     ],
     None,
     None,  # Sender type and return type.
@@ -177,6 +177,6 @@ def _concurrent_sample(
     else:
       lm_input, lm_output = error, error
     if (not ignore_examples_with_errors
-        or not (isinstance(lm_input, Exception)
-                or isinstance(lm_output, Exception))):
+        or not (isinstance(lm_input, BaseException)
+                or isinstance(lm_output, BaseException))):
       yield lm_input, lm_output

langfun/core/structured/__init__.py CHANGED Viewed

@@ -16,6 +16,8 @@
 # pylint: disable=g-bad-import-order
 # pylint: disable=g-importing-member
+from langfun.core.structured.schema import include_method_in_prompt
 from langfun.core.structured.schema import Missing
 from langfun.core.structured.schema import MISSING
 from langfun.core.structured.schema import Unknown
@@ -34,12 +36,6 @@ from langfun.core.structured.schema import class_definitions
 from langfun.core.structured.schema import annotation
 from langfun.core.structured.schema import structure_from_python
-from langfun.core.structured.schema import SchemaRepr
-from langfun.core.structured.schema import SchemaJsonRepr
-from langfun.core.structured.schema import SchemaPythonRepr
-from langfun.core.structured.schema import ValueRepr
-from langfun.core.structured.schema import ValueJsonRepr
-from langfun.core.structured.schema import ValuePythonRepr
 from langfun.core.structured.schema import schema_repr
 from langfun.core.structured.schema import source_form
 from langfun.core.structured.schema import value_repr
@@ -54,25 +50,25 @@ from langfun.core.structured.mapping import Mapping
 from langfun.core.structured.mapping import MappingError
 from langfun.core.structured.mapping import MappingExample
-from langfun.core.structured.parsing import ParseStructure
-from langfun.core.structured.parsing import ParseStructureJson
-from langfun.core.structured.parsing import ParseStructurePython
 from langfun.core.structured.parsing import parse
 from langfun.core.structured.parsing import call
-from langfun.core.structured.prompting import QueryStructure
-from langfun.core.structured.prompting import QueryStructureJson
-from langfun.core.structured.prompting import QueryStructurePython
-from langfun.core.structured.prompting import query
+from langfun.core.structured.querying import track_queries
+from langfun.core.structured.querying import QueryInvocation
+from langfun.core.structured.querying import query
+from langfun.core.structured.querying import query_and_reduce
-from langfun.core.structured.description import DescribeStructure
-from langfun.core.structured.description import describe
+from langfun.core.structured.querying import query_prompt
+from langfun.core.structured.querying import query_output
+from langfun.core.structured.querying import query_reward
-from langfun.core.structured.completion import CompleteStructure
+from langfun.core.structured.description import describe
 from langfun.core.structured.completion import complete
 from langfun.core.structured.scoring import score
+from langfun.core.structured.tokenization import tokenize
 # Expose default examples for structured operations so users could refer to
 # them.
 from langfun.core.structured.parsing import default_parse_examples

langfun/core/structured/completion.py CHANGED Viewed

@@ -21,7 +21,7 @@ from langfun.core.structured import schema as schema_lib
 import pyglove as pg
-class CompleteStructure(mapping.Mapping):
+class _CompleteStructure(mapping.Mapping):
   """Complete structure by filling the missing fields."""
   input: Annotated[
@@ -30,7 +30,7 @@ class CompleteStructure(mapping.Mapping):
   mapping_template = lf.Template("""
       {{ input_title }}:
-      {{ example.input_repr() | indent(2, True) }}
+      {{ example.input_repr(use_modality_ref=True) | indent(2, True) }}
       {%- if missing_type_dependencies(example.input) %}
@@ -45,13 +45,16 @@ class CompleteStructure(mapping.Mapping):
       {{ output_title }}:
       {%- if example.has_output %}
-      {{ example.output_repr() | indent(2, True) }}
+      {{ example.output_repr(use_modality_ref=True) | indent(2, True) }}
       {% endif -%}
       """)
   input_title = 'INPUT_OBJECT'
   output_title = 'OUTPUT_OBJECT'
   schema_title = 'CLASS_DEFINITIONS'
+  modality_refs_title: Annotated[
+      str, 'The section title for modality refs.'
+  ] = 'MODALITY_REFERENCES'
   preamble = lf.LangFunc(
       """
@@ -107,7 +110,9 @@ class CompleteStructure(mapping.Mapping):
   def class_defs_repr(self, value: Any) -> str | None:
     return schema_lib.class_definitions(
-        self.missing_type_dependencies(value), markdown=True
+        self.missing_type_dependencies(value),
+        markdown=True,
+        allowed_dependencies=set()
     )
   def postprocess_result(self, result: Any) -> Any:
@@ -146,6 +151,28 @@ class CompleteStructure(mapping.Mapping):
     pg.traverse(self.input, _visit)
     return context
+  #
+  # Helper methods for handling modalities.
+  #
+  def has_modality_refs(self, value: Any) -> bool:
+    """Returns true if the value has modalities."""
+    return not isinstance(value, lf.Modality) and pg.contains(
+        value, type=lf.Modality
+    )
+  def modalities(self, value: Any) -> dict[str, lf.Modality]:
+    return lf.Modality.from_value(value)
+  def modality_refs_repr(self, value: Any) -> str:
+    with lf.modality.format_modality_as_ref(True):
+      return pg.format(
+          self.modalities(value),
+          compact=False,
+          verbose=False,
+          python_format=True,
+      )
 def complete(
     input_value: pg.Symbolic,
@@ -214,7 +241,7 @@ def complete(
   Returns:
     The result based on the schema.
   """
-  t = CompleteStructure(
+  t = _CompleteStructure(
       input=schema_lib.mark_missing(input_value),
       default=default,
       examples=examples,

langfun/core/structured/completion_test.py CHANGED Viewed

@@ -46,7 +46,7 @@ class TripPlan(pg.Object):
 class CompleteStructureTest(unittest.TestCase):
   def test_render_no_examples(self):
-    l = completion.CompleteStructure()
+    l = completion._CompleteStructure()
     input_value = schema_lib.mark_missing(
         TripPlan.partial(
             place='San Francisco',
@@ -120,7 +120,7 @@ class CompleteStructureTest(unittest.TestCase):
     )
   def test_render_no_class_definitions(self):
-    l = completion.CompleteStructure()
+    l = completion._CompleteStructure()
     input_value = schema_lib.mark_missing(
         TripPlan.partial(
             place='San Francisco',
@@ -200,7 +200,7 @@ class CompleteStructureTest(unittest.TestCase):
     )
   def test_render_with_examples(self):
-    l = completion.CompleteStructure()
+    l = completion._CompleteStructure()
     input_value = schema_lib.mark_missing(
         TripPlan.partial(
             place='San Francisco',
@@ -411,7 +411,7 @@ class CompleteStructureTest(unittest.TestCase):
             modalities.Image.from_bytes(b'image_of_elephant'),
         )
     )
-    l = completion.CompleteStructure(
+    l = completion._CompleteStructure(
         input=input_value,
         examples=[
             mapping.MappingExample(
@@ -464,7 +464,7 @@ class CompleteStructureTest(unittest.TestCase):
             MODALITY_REFERENCES:
               {
-                'examples[0].input.image': {{examples[0].input.image}}
+                'examples[0].input.image': <<[[examples[0].input.image]]>>
               }
             OUTPUT_OBJECT:
@@ -490,7 +490,7 @@ class CompleteStructureTest(unittest.TestCase):
             MODALITY_REFERENCES:
               {
-                'input.image': {{input.image}}
+                'input.image': <<[[input.image]]>>
               }
             OUTPUT_OBJECT:
@@ -581,6 +581,7 @@ class CompleteStructureTest(unittest.TestCase):
             text='Activity(description="foo")',
             result=Activity(description='foo'),
             score=1.0,
+            is_cached=False,
             logprobs=None,
             usage=lf.LMSamplingUsage(553, 27, 580),
             tags=['lm-response', 'lm-output', 'transformed']

langfun/core/structured/description.py CHANGED Viewed

@@ -22,7 +22,7 @@ import pyglove as pg
 @pg.use_init_args(['examples'])
-class DescribeStructure(mapping.Mapping):
+class _DescribeStructure(mapping.Mapping):
   """Describe a structured value in natural language."""
   input_title = 'PYTHON_OBJECT'
@@ -106,7 +106,7 @@ def describe(
   Returns:
     The parsed result based on the schema.
   """
-  return DescribeStructure(
+  return _DescribeStructure(
       input=value,
       context=context,
       examples=examples or default_describe_examples(),

langfun/core/structured/description_test.py CHANGED Viewed

@@ -36,7 +36,7 @@ class Itinerary(pg.Object):
 class DescribeStructureTest(unittest.TestCase):
   def test_render(self):
-    l = description_lib.DescribeStructure(
+    l = description_lib._DescribeStructure(
         input=Itinerary(
             day=1,
             type='daytime',
@@ -137,7 +137,7 @@ class DescribeStructureTest(unittest.TestCase):
         ],
         hotel=None,
     )
-    l = description_lib.DescribeStructure(
+    l = description_lib._DescribeStructure(
         input=value, context='1 day itinerary to SF'
     )
     self.assertEqual(
@@ -187,7 +187,7 @@ class DescribeStructureTest(unittest.TestCase):
         ],
         hotel=None,
     )
-    l = description_lib.DescribeStructure(input=value)
+    l = description_lib._DescribeStructure(input=value)
     self.assertEqual(
         l.render().text,
         inspect.cleandoc("""

langfun/core/structured/function_generation.py CHANGED Viewed

@@ -16,16 +16,16 @@
 import functools
 import inspect
 import re
-from typing import Any, Callable, Optional, Tuple
+from typing import Any, Callable, Literal, Optional, Tuple
 from langfun.core import language_model
 from langfun.core import template
 from langfun.core.coding import python
-from langfun.core.structured import prompting
+from langfun.core.structured import querying
 import pyglove as pg
-def unittest_gen(signature, lm, num_retries=10):
+def unittest_gen(signature, lm, num_retries=1):
   """Generates unit tests for a python function signature."""
   class UnitTest(pg.Object):
@@ -39,7 +39,7 @@ def unittest_gen(signature, lm, num_retries=10):
   unittest_examples = None
   for _ in range(num_retries):
-    r = prompting.query(
+    r = querying.query(
         PythonFunctionSignature(signature=signature),
         list[UnitTest],
         lm=lm,
@@ -76,12 +76,16 @@ def unittest_with_test_cases(f, unittests):
 def _function_gen(
     func: Callable[..., Any],
+    context: dict[str, Any],
     signature: str,
     lm: language_model.LanguageModel,
-    num_retries: int = 10,
+    num_retries: int = 1,
     unittest: Optional[
-        Callable[[Callable[..., Any]], None] | list[Tuple[Any, Any]]
+        Callable[[Callable[..., Any]], None]
+        | list[Tuple[Any, Any]]
+        | Literal["auto"]
     ] = None,
+    unittest_num_retries: int = 1,
 ):
   """Generates a python function with LLM and verify its quality with unit testing."""
@@ -131,32 +135,43 @@ def _function_gen(
     """
   unittest_examples = None
-  if unittest is None:
-    unittest_examples = unittest_gen(signature, lm=lm)
-  elif not callable(unittest):
+  if unittest == "auto":
+    unittest_examples = unittest_gen(
+        signature, lm=lm, num_retries=unittest_num_retries
+    )
+  elif isinstance(unittest, list):
     unittest_examples = unittest
+  last_error = None
   for _ in range(num_retries):
     try:
-      source_code = prompting.query(
+      source_code = querying.query(
           PythonFunctionPrompt(signature=signature), lm=lm
       )
-      f = python.evaluate(source_code)
+      f = python.evaluate(source_code, global_vars=context)
       # Check whether the sigantures are the same.
       if inspect.signature(f) != inspect.signature(func):
-        continue
+        raise python.CodeError(
+            code=source_code,
+            cause=TypeError(
+                f"Signature mismatch: Expected: {inspect.signature(func)}, "
+                f"Actual: {inspect.signature(f)}.",
+            ),
+        )
       if callable(unittest):
         unittest(f)
-      else:
+      elif unittest_examples:
         unittest_with_test_cases(f, unittest_examples)
       return f, source_code
-    except Exception:  # pylint: disable=broad-exception-caught
-      pass
-  return None, None
+    except python.CodeError as e:
+      last_error = e
+      pg.logging.warning(
+          f"Bad code generated: {e}",
+      )
+  raise last_error
 def _process_signature(signature):
@@ -172,10 +187,13 @@ def _process_signature(signature):
 def function_gen(
     lm: language_model.LanguageModel,
     cache_filename: str | None = None,
-    num_retries: int = 10,
+    num_retries: int = 1,
     unittest: Optional[
-        Callable[[Callable[..., Any]], None] | list[Tuple[Any, Any]]
+        Callable[[Callable[..., Any]], None]
+        | list[Tuple[Any, Any]]
+        | Literal["auto"]
     ] = None,
+    unittest_num_retries: int = 1,
 ):
   """A decorator for automating function generation using a language model.
@@ -192,9 +210,12 @@ def function_gen(
         make to generate a suitable function implementation.
       unittest: This optional parameter enables the definition of custom unit
         tests. You can either provide a list of test cases as tuples of inputs
-        and outputs, or a function that throws an error if a test fails. If left
-        as None (the default setting), the LLM will automatically create the
-        unit test cases.
+        and outputs, or a function that throws an error if a test fails, or let
+        LLM automatically create the unit test cases. If a generated function is
+        and returned, it should pass all the unittests.
+      unittest_num_retries: If unittest is set to "auto", this parameter
+        specifies the number of times the LLM's attempts to generate unit test
+        cases.
   Returns:
       The implemented function object.
@@ -204,6 +225,13 @@ def function_gen(
     setattr(func, "__function__", None)
     setattr(func, "__source_code__", None)
+    # Prepare the globals/locals for the generated code to be evaluated against.
+    callstack = inspect.stack()
+    assert len(callstack) > 1
+    context = dict(callstack[1][0].f_globals)
+    context.update(callstack[1][0].f_locals)
+    context.pop(func.__name__, None)
     @functools.wraps(func)
     def lm_generated_func(*args, **kwargs):
       if func.__function__ is not None:
@@ -222,15 +250,20 @@ def function_gen(
         if signature in cache:
           func.__source_code__ = cache[signature]
-          func.__function__ = python.evaluate(func.__source_code__)
+          func.__function__ = python.evaluate(
+              func.__source_code__, global_vars=context
+          )
           return func.__function__(*args, **kwargs)
       func.__function__, func.__source_code__ = _function_gen(
-          func, signature, lm, num_retries=num_retries, unittest=unittest
+          func,
+          context,
+          signature,
+          lm,
+          num_retries=num_retries,
+          unittest=unittest,
+          unittest_num_retries=unittest_num_retries,
       )
-      if func.__function__ is None:
-        raise ValueError(f"Function generation failed. Signature:\n{signature}")
       if cache_filename is not None:
         cache[signature] = func.__source_code__
         cache.save(cache_filename)

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl