PyPI - langfun - Versions diffs - 0.1.1.dev20240819__py3-none-any.whl → 0.1.1.dev20240821__py3-none-any.whl - Mend

langfun 0.1.1.dev20240819py3-none-any.whl → 0.1.1.dev20240821py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

langfun/core/eval/base.py CHANGED Viewed

@@ -1061,6 +1061,8 @@ class Evaluation(Evaluable):
     try:
       with lf.use_settings(debug=debug):
         output_message = copy.process(example, **(self.additional_args or {}))
+        self.process_output(example, output_message)
         if self.schema is None:
           output = output_message.text
         else:
@@ -1123,7 +1125,9 @@ class Evaluation(Evaluable):
         # generated code with calls to `input` will raise an error, thus not
         # blocking the evaluation.
         with lf_coding.context(input=None):
-          return self.process(example, **(self.additional_args or {}))
+          output_message = self.process(example, **(self.additional_args or {}))
+          self.process_output(example, output_message)
+          return output_message
       try:
         for example, message, error in lf.concurrent_map(
@@ -1201,6 +1205,29 @@ class Evaluation(Evaluable):
           **kwargs,
       )
+  def process_output(self, example: Any, output: lf.Message) -> None:
+    """Process the output for an example.
+    Subclasses can override this method to generate and attach additional
+    metadata for debugging purpose. For example, draw bounding boxes on the
+    input image based on LLM predicted boxes and attach to output_message's
+    metadata.
+    Example:
+      class BoundingBoxEval(lf.eval.Matching):
+        ...
+        def process_output(example, output):
+          output.metadata.image_with_bbox = draw_bboxes(
+              example.image, output.result)
+    Args:
+      example: User input.
+      output: LLM's output message. Users could attach additional
+        information to the message, which will be shown in debugging
+    """
+    del example, output
   def _status(self, progress: lf.concurrent.Progress) -> dict[str, Any]:
     return {
         'Model': self.lm.model_id,

langfun/core/repr_utils.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import collections
 import contextlib
+import html
 import io
 from typing import Any, Callable, Iterator
@@ -126,7 +127,7 @@ def html_repr(
     if hasattr(v, '_repr_html_'):
       cs = v._repr_html_()  # pylint: disable=protected-access
     else:
-      cs = f'<span style="white-space: pre-wrap">{str(v)}</span>'
+      cs = f'<span style="white-space: pre-wrap">{html.escape(str(v))}</span>'
     key_color, key_bg_color, value_color, value_bg_color = item_color(k, v)
     key_span = html_round_text(

langfun/core/repr_utils_test.py CHANGED Viewed

@@ -63,9 +63,12 @@ class SharingContentTest(unittest.TestCase):
     class Foo(pg.Object):
       x: int
-    html = repr_utils.html_repr({'foo': pg.Ref(Foo(1))})
+    html = repr_utils.html_repr(
+        {'foo': pg.Ref(Foo(1)), 'bar': '<lf_image>'}
+    )
     self.assertIn('foo</span>', html)
     self.assertNotIn('Ref', html)
+    self.assertIn('&lt;lf_image&gt;', html)
 if __name__ == '__main__':

langfun/core/structured/mapping.py CHANGED Viewed

@@ -92,6 +92,15 @@ class MappingExample(lf.NaturalLanguageFormattable, lf.Component):
       'The natural language context for this mapping. ',
   ] = None
+  metadata: Annotated[
+      dict[str, Any],
+      (
+          'The metadata associated with the mapping example, '
+          'which chould carry structured data, such as tool function input. '
+          'It is a `pg.Dict` object whose keys can be accessed by attributes.'
+      ),
+  ] = pg.Dict()
   def schema_repr(
       self, protocol: schema_lib.SchemaProtocol = 'python', **kwargs
   ) -> str:
@@ -157,16 +166,21 @@ class MappingExample(lf.NaturalLanguageFormattable, lf.Component):
     result.write(lf.colored('[INPUT]\n', styles=['bold']))
     result.write(lf.colored(self.input_repr(), color='green'))
-    result.write('\n\n')
     if self.schema is not None:
+      result.write('\n\n')
       result.write(lf.colored('[SCHEMA]\n', styles=['bold']))
       result.write(lf.colored(self.schema_repr(), color='red'))
-      result.write('\n\n')
     if schema_lib.MISSING != self.output:
+      result.write('\n\n')
       result.write(lf.colored('[OUTPUT]\n', styles=['bold']))
       result.write(lf.colored(self.output_repr(), color='blue'))
+    if self.metadata:
+      result.write('\n\n')
+      result.write(lf.colored('[METADATA]\n', styles=['bold']))
+      result.write(lf.colored(str(self.metadata), color='cyan'))
     return result.getvalue().strip()

langfun/core/structured/mapping_test.py CHANGED Viewed

@@ -129,6 +129,33 @@ class MappingExampleTest(unittest.TestCase):
             """),
     )
+  def test_str_with_metadata(self):
+    self.assertEqual(
+        str(
+            mapping.MappingExample(
+                '1 + 1 = 2',
+                schema=int,
+                context='Give the answer.',
+                metadata={'foo': 'bar'},
+            )
+        ),
+        inspect.cleandoc("""
+            \x1b[1m[CONTEXT]
+            \x1b[0m\x1b[35mGive the answer.\x1b[0m
+            \x1b[1m[INPUT]
+            \x1b[0m\x1b[32m1 + 1 = 2\x1b[0m
+            \x1b[1m[SCHEMA]
+            \x1b[0m\x1b[31mint\x1b[0m
+            \x1b[1m[METADATA]
+            \x1b[0m\x1b[36m{
+              foo = 'bar'
+            }\x1b[0m
+            """),
+    )
   def test_serialization(self):
     example = mapping.MappingExample(
         'the answer is 2', 2, int, context='compute 1 + 1'

langfun/core/structured/scoring.py CHANGED Viewed

@@ -35,7 +35,55 @@ def score(
     return_scoring_results: bool = False,
     **kwargs,
 ) -> list[float] | list[lf.LMScoringResult]:
-  """Scores the outputs based on the prompt."""
+  """Scores the outputs based on the prompt.
+  Examples:
+    ```
+    # Example 1: Scoring text output based on the user prompt.
+    scores = lf.score('{{x}} + {{y}} =', ['1', '2', '3'], lm=lm, x=1, y=2)
+    assert len(scores) == 3
+    # Example 2: Scoring int output based on the formulated OOP prompt.
+    scores = lf.score('1 + 1 =', [1, 2, 3], lm=lm)
+    assert len(scores) == 3
+    class Answer(pg.Object):
+      result: int
+    # Example 3: Scoring object output based on the formulated OOP prompt.
+    scores = lf.score('1 + 1 =', [Answer(1), Answer(2), Answer(3)], lm=lm)
+    assert len(scores) == 3
+    # Example 4: Scoring object field value based on the formulated OOP prompt
+    # and the generated tokens before the first `pg.oneof`.
+    scores = lf.score('1 + 1 =', [Answer(pg.oneof([1, 2, 3]))], lm=lm)
+    assert len(scores) == 3
+    # Example 5: Scoring multiple prompt/completion pairs.
+    scores = lf.score(
+        ['1 + 1=', '2 + 3='],
+        ['2', '4'],
+        lm=lm
+    )
+    assert len(scores) == 2
+    ```
+  Args:
+    prompt: The prompt(s) based on which each completion will be scored.
+    completions: A list of strings or symbolic objects as the output.
+    schema: The schema as the output type. If None, it will be inferred from
+      the completions.
+    lm: The language model used for scoring.
+    examples: Fewshot exemplars used together with the prompt in getting the
+      completions.
+    protocol: The protocol for formulating the prompt based on objects.
+    return_scoring_results: If True, returns a list of `lf.LMScoringResult`,
+      otherwise returns a list of floats as the scores of each completion.
+    **kwargs: Keyword arguments that are referred by the prompt.
+  Returns:
+    A list of floats or `lf.LMScoringResult` as the score of each completion.
+  """
   if not completions:
     raise ValueError('`completions` must not be empty.')
@@ -79,12 +127,36 @@ def score(
   completion_reprs = []
   for c in completions:
     if isinstance(c, mapping.MappingError):
-      rep = c.lm_response
+      completion_reprs.append(c.lm_response)
     else:
       rep = mapping.MappingExample.value_repr(
           c, protocol=protocol, compact=False, verbose=False
       )
-    completion_reprs.append(rep)
+      # NOTE(daiyip): supporting scenario of scoring object field with
+      # `pg.oneof`.
+      oneof_pos = rep.find('OneOf(')
+      if oneof_pos == -1:
+        completion_reprs.append(rep)
+      else:
+        assert protocol == 'python', protocol
+        if isinstance(input_message, list):
+          raise ValueError(
+              'Scoring on object fields using `pg.oneof` must share the '
+              f'same prompt. Encountered: {prompt}'
+          )
+        input_message.text += '\n' + rep[:oneof_pos]
+        oneof = _get_first_oneof(c)
+        for v in oneof.candidates:
+          completion_reprs.append(
+              pg.format(
+                  v,
+                  python_format=True,
+                  compact=False,
+                  verbose=False,
+                  root_indent=oneof.sym_path.depth
+              )
+          )
   results = lm.score(
       input_message,
@@ -93,3 +165,17 @@ def score(
   if return_scoring_results:
     return results
   return [r.score for r in results]
+def _get_first_oneof(value: Any) -> pg.hyper.OneOf:
+  """Gets the first pg.oneof from a symbolic object."""
+  oneofs = []
+  def select_oneofs(k, v, p):
+    del k, p
+    if isinstance(v, pg.hyper.OneOf):
+      oneofs.append(v)
+      return pg.TraverseAction.CONTINUE
+    return pg.TraverseAction.ENTER
+  pg.traverse(value, select_oneofs)
+  assert oneofs
+  return oneofs[0]

langfun/core/structured/scoring_test.py CHANGED Viewed

@@ -16,6 +16,11 @@ import unittest
 import langfun.core as lf
 from langfun.core.llms import fake
 from langfun.core.structured import scoring
+import pyglove as pg
+class Answer(pg.Object):
+  result: int
 class ScoringTest(unittest.TestCase):
@@ -32,9 +37,28 @@ class ScoringTest(unittest.TestCase):
     with self.assertRaisesRegex(ValueError, '`lm` must be specified'):
       scoring.score('hi', [1, 2])
+    with self.assertRaisesRegex(
+        ValueError,
+        'Scoring on object fields using `pg.oneof` must share the same prompt',
+    ):
+      scoring.score(
+          ['1 + 1=', '2 + 3='],
+          [Answer(pg.oneof([1, 2, 3]))],
+          lm=fake.Echo(),
+      )
   def test_score(self):
     self.assertEqual(scoring.score('hi', [1, 2], lm=fake.Echo()), [0.0, -1.0])
+  def test_score_on_field_values(self):
+    self.assertEqual(
+        scoring.score(
+            '1 + 1=',
+            [Answer(pg.oneof([1, 2, 3]))], lm=fake.Echo()
+        ),
+        [0.0, -1.0, -2.0]
+    )
   def test_score_returning_scoring_results(self):
     self.assertEqual(scoring.score(
         'hi', [1, 2], lm=fake.Echo(), return_scoring_results=True),

{langfun-0.1.1.dev20240819.dist-info → langfun-0.1.1.dev20240821.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langfun
-Version: 0.1.1.dev20240819
+Version: 0.1.1.dev20240821
 Summary: Langfun: Language as Functions.
 Home-page: https://github.com/google/langfun
 Author: Langfun Authors

{langfun-0.1.1.dev20240819.dist-info → langfun-0.1.1.dev20240821.dist-info}/RECORD RENAMED Viewed

@@ -19,8 +19,8 @@ langfun/core/modality.py,sha256=Tla4t86DUYHpbZ2G7dy1r19fTj_Ga5XOvlYp6lbWa-Q,3512
 langfun/core/modality_test.py,sha256=HyZ5xONKQ0Fw18SzoWAq-Ob9njOXIIjBo1hNtw-rudw,2400
 langfun/core/natural_language.py,sha256=3ynSnaYQnjE60LIPK5fyMgdIjubnPYZwzGq4rWPeloE,1177
 langfun/core/natural_language_test.py,sha256=LHGU_1ytbkGuSZQFIFP7vP3dBlcY4-A12fT6dbjUA0E,1424
-langfun/core/repr_utils.py,sha256=nKB9U4-8NE8qjx7Zl2g1yXLCbpM6Niq38ReMSyPtfJQ,5512
-langfun/core/repr_utils_test.py,sha256=Z018ULMZ8cgmygAH4dNnKBEKduBC7bl1-tZClD1pv9g,2606
+langfun/core/repr_utils.py,sha256=Y6ccoQUMpRxDv_jUy2QtnP9cdz3QBjJtTIgxGIU-kfM,5537
+langfun/core/repr_utils_test.py,sha256=_VhWpDbtlWaGadXL0gpmwQVmACenvzmLUng_AqR6zaE,2685
 langfun/core/sampling.py,sha256=vygWvgC8MFw0_AKNSmz-ywMXJYWf8cl0tI8QycvAmyI,5795
 langfun/core/sampling_test.py,sha256=U7PANpMsl9E_pa4_Y4FzesSjcwg-u-LKHGCWSgv-8FY,3663
 langfun/core/subscription.py,sha256=euawEuSZP-BHydaT-AQpfYFL0m5pWPGcW0upFhrojqc,10930
@@ -44,7 +44,7 @@ langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-d
 langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
 langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
 langfun/core/eval/__init__.py,sha256=Ogdr9OtTywhhLPHi3AZzOD2mXX2oyaHWflrSTMm96uA,1899
-langfun/core/eval/base.py,sha256=0_iaKuQhS49PlbWqCQ5EABUMKavr2R4ltcJZWCVoZZg,73816
+langfun/core/eval/base.py,sha256=BiWColibVo9-4P27Z0hIWXe8_UPocJTSTUdKeOPVwxI,74746
 langfun/core/eval/base_test.py,sha256=p1EfqviHMz_ppQY8FU67h5OCgL0tzhLvXzGIsq0sVyI,26930
 langfun/core/eval/matching.py,sha256=9GX8HfO9jKxgNLAivgy5K88Xhoh6Z7Pptq65pe7vht8,9762
 langfun/core/eval/matching_test.py,sha256=f7iVyXH5KGJBWt4Wp14Bt9J3X59A6Ayfog9MbuFvPew,5532
@@ -96,8 +96,8 @@ langfun/core/structured/description.py,sha256=SXW4MJvshFjbR-0gw6rE21o6WXq12UlRXa
 langfun/core/structured/description_test.py,sha256=UtZGjSFUaQ6130t1E5tcL7ODu0xIefkapb53TbnqsK8,7362
 langfun/core/structured/function_generation.py,sha256=pFgS3vcRAWiuFBol2x5Eeip3XqoudONsOpeJpWyjT3s,7479
 langfun/core/structured/function_generation_test.py,sha256=ZJI-aaGgWWszn92u7h5IZ9Pl70N2DgAGGJrIxPzsvwg,10065
-langfun/core/structured/mapping.py,sha256=QKbSnvOgut-sx2mZPjHJcdlDLxR8b3ZC16ZLWociwog,11298
-langfun/core/structured/mapping_test.py,sha256=PiXklMeIa8L6KtMi3ju7J9Y39gZy0hIGz-Oeq4A_7XE,3835
+langfun/core/structured/mapping.py,sha256=CsflMwm5cKJYZ2ag-neroA4CQlhu2wjFRSxKpd_qQDQ,11778
+langfun/core/structured/mapping_test.py,sha256=zQoVx3kAD5oSm_OJAQA6q41NXLLyn8qs6CIVJgAoP_w,4489
 langfun/core/structured/parsing.py,sha256=keoVqEfzAbdULh6GawWFsTQzU91MzJXYFZjXGXLaD8g,11492
 langfun/core/structured/parsing_test.py,sha256=34wDrXaQ-EYhJLfDL8mX9K53oQMSzh5pVYdKjnESmK8,20895
 langfun/core/structured/prompting.py,sha256=_U6Z65AwXvVvfaQFCY9GawB_QV9S3u7P7BOU2URABmw,8873
@@ -106,8 +106,8 @@ langfun/core/structured/schema.py,sha256=oiT4P4Q9pG-QOnFzxETN2EQZqNln8nG4zAJHxcm
 langfun/core/structured/schema_generation.py,sha256=U3nRQsqmMZg_qIVDh2fiY3K4JLfsAL1LcKzIFP1iXFg,5316
 langfun/core/structured/schema_generation_test.py,sha256=RM9s71kMNg2jTePwInkiW9fK1ACN37eyPeF8OII-0zw,2950
 langfun/core/structured/schema_test.py,sha256=RjYhwTgktQgyqAjzLvo967nTiIK9KWgP-aNGg4e7ihE,25258
-langfun/core/structured/scoring.py,sha256=pE2ilZC7cV1qlPZANOFIFVbNB7IixSTLcnmf9pRU3tc,2883
-langfun/core/structured/scoring_test.py,sha256=39_dw6p_FkoqeUccO67yIqos-MccAWezoozS21i8mi0,1732
+langfun/core/structured/scoring.py,sha256=ae6SjLqoqsKFmcPnaJbsFmH4XFGKOQaJRjYZ1wm1Ywo,5860
+langfun/core/structured/scoring_test.py,sha256=QvlwDAzwuamKL5tCotm1L3Sx0cs3idoNK4aIEhaO4Yk,2272
 langfun/core/templates/__init__.py,sha256=bO0eMsVJbi7sxEB2YlInKRQ2EVP-RyyKUwcD-8msuN4,927
 langfun/core/templates/completion.py,sha256=mUqZHOEV3ag6-A08XghpeEltcrBvCDxXP004eDDfeag,1931
 langfun/core/templates/completion_test.py,sha256=vGnjnM38UHyVDUyaUYtmp20s9KBGOdbPVsX-H-ET11E,1636
@@ -117,8 +117,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
 langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
 langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
 langfun/core/templates/selfplay_test.py,sha256=rBW2Qr8yi-aWYwoTwRR-n1peKyMX9QXPZXURjLgoiRs,2264
-langfun-0.1.1.dev20240819.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
-langfun-0.1.1.dev20240819.dist-info/METADATA,sha256=XaHWEVmO67aqEbYT0Aa0wqV81wHGVkcZ3SgAiI5jOyM,5234
-langfun-0.1.1.dev20240819.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-langfun-0.1.1.dev20240819.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
-langfun-0.1.1.dev20240819.dist-info/RECORD,,
+langfun-0.1.1.dev20240821.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
+langfun-0.1.1.dev20240821.dist-info/METADATA,sha256=469KPCsIx2U_ZtMDN0qA4UTOnbcVayQyduyUs65ccVE,5234
+langfun-0.1.1.dev20240821.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
+langfun-0.1.1.dev20240821.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
+langfun-0.1.1.dev20240821.dist-info/RECORD,,

{langfun-0.1.1.dev20240819.dist-info → langfun-0.1.1.dev20240821.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.2.0)
+Generator: setuptools (73.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{langfun-0.1.1.dev20240819.dist-info → langfun-0.1.1.dev20240821.dist-info}/LICENSE RENAMED Viewed

File without changes

{langfun-0.1.1.dev20240819.dist-info → langfun-0.1.1.dev20240821.dist-info}/top_level.txt RENAMED Viewed

File without changes

langfun 0.1.1.dev20240819__py3-none-any.whl → 0.1.1.dev20240821__py3-none-any.whl

langfun 0.1.1.dev20240819py3-none-any.whl → 0.1.1.dev20240821py3-none-any.whl