PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.0.2.dev20240501__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.0.2.dev20240501py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

langfun/core/eval/__init__.py +14 -1
langfun/core/eval/base.py +490 -105
langfun/core/eval/base_test.py +185 -53
langfun/core/eval/matching.py +22 -21
langfun/core/eval/matching_test.py +23 -2
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +4 -4
langfun/core/eval/scoring_test.py +19 -2
langfun/core/llms/openai.py +1 -1
langfun/core/llms/openai_test.py +2 -1
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/METADATA +1 -2
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/RECORD +16 -14
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/WHEEL +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/top_level.txt +0 -0

langfun/core/eval/patching_test.py ADDED Viewed

@@ -0,0 +1,170 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for evaluation patching."""
+import unittest
+from langfun.core import llms as lf_llms
+from langfun.core.eval import base
+from langfun.core.eval import patching
+import pyglove as pg
+class PatchingCommonTest(unittest.TestCase):
+  def test_patch_member(self):
+    class A(pg.Object):
+      x: int = 1
+    class B(pg.Object):
+      a: A
+    b = B(A())
+    pg.patch(b, [patching.patch_member(A, 'x', 2)])
+    self.assertEqual(b, B(A(2)))
+  def test_patch_args(self):
+    s = base.Suite(
+        [base.Evaluation(inputs=base.as_inputs([1]))],
+        additional_args=dict(x=1, y=2),
+    )
+    pg.patch(s, [patching.patch_additional_args(x=3, z=4)])
+    self.assertTrue(
+        pg.eq(
+            s,
+            base.Suite(
+                [
+                    base.Evaluation(
+                        inputs=base.as_inputs([1]),
+                        additional_args=dict(x=3, y=2, z=4),
+                    )
+                ],
+                additional_args=dict(x=3, y=2, z=4),
+            ),
+        )
+    )
+  def test_patch_lm(self):
+    s = base.Suite(
+        [base.Evaluation(inputs=base.as_inputs([1]))],
+        lm=lf_llms.Gpt35Turbo(),
+    )
+    pg.patch(
+        s, [patching.patch_lm(pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]))]
+    )
+    self.assertTrue(
+        pg.eq(
+            s,
+            base.Suite(
+                [
+                    base.Evaluation(
+                        inputs=base.as_inputs([1]),
+                        lm=pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]),
+                    )
+                ],
+                lm=pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]),
+            ),
+        )
+    )
+  def test_patch_parsing_lm(self):
+    s = base.Suite(
+        [base.Evaluation(inputs=base.as_inputs([1]))],
+        lm=lf_llms.Gpt4(),
+    )
+    pg.patch(s, [patching.patch_parsing_lm(lf_llms.Gpt35Turbo())])
+    self.assertTrue(
+        pg.eq(
+            s,
+            base.Suite(
+                [
+                    base.Evaluation(
+                        inputs=base.as_inputs([1]),
+                        lm=lf_llms.Gpt4(),
+                        parsing_lm=lf_llms.Gpt35Turbo(),
+                    )
+                ],
+                # NOTE(daiyip): Suite does not have `parsing_lm` as one of its
+                # variable keyword fields yet, so patching does not add to it.
+                # This is okay since we only care about the leaf nodes.
+                lm=lf_llms.Gpt4(),
+            ),
+        )
+    )
+  def test_patch_prompt(self):
+    e = base.Evaluation(inputs=base.as_inputs([1]))
+    pg.patch(e, [patching.patch_prompt('Q: {{example.question}}')])
+    self.assertTrue(
+        pg.eq(
+            e,
+            base.Evaluation(
+                inputs=base.as_inputs([1]),
+                prompt='Q: {{example.question}}',
+            ),
+        )
+    )
+  def test_patch_inputs(self):
+    e = base.Evaluation(inputs=base.as_inputs([1]))
+    pg.patch(e, [patching.patch_inputs(base.as_inputs([2]))])
+    self.assertTrue(
+        pg.eq(
+            e,
+            base.Evaluation(
+                inputs=base.as_inputs([2]),
+            ),
+        )
+    )
+  def test_patch_schema_fn(self):
+    @pg.functor()
+    def int_schema():
+      return int
+    e = base.Evaluation(inputs=base.as_inputs([1]))
+    pg.patch(e, [patching.patch_schema_fn(int_schema())])
+    self.assertTrue(
+        pg.eq(
+            e,
+            base.Evaluation(
+                inputs=base.as_inputs([1]),
+                schema_fn=int_schema(),
+            ),
+        )
+    )
+class StringPatcheTest(unittest.TestCase):
+  def test_lm(self):
+    target = pg.patch(
+        base.Evaluation(inputs=base.as_inputs([1])),
+        ['lm?haiku:gpt4', 'max_tokens?1024', 'temperature?0.7'],
+    )
+    self.assertEqual(
+        target.lm,
+        pg.oneof([
+            lf_llms.Claude3Haiku(temperature=0.7, max_tokens=1024),
+            lf_llms.Gpt4(temperature=0.7, max_tokens=1024),
+        ]),
+    )
+    with self.assertRaisesRegex(ValueError, 'Unknown model name'):
+      pg.patch(
+          base.Evaluation(inputs=base.as_inputs([1])),
+          ['lm?gpt2'],
+      )
+if __name__ == '__main__':
+  unittest.main()

langfun/core/eval/scoring.py CHANGED Viewed

@@ -113,8 +113,8 @@ class Scoring(base.Evaluation):
         m.total,
     )
-  def summarize(self) -> pg.Dict:
-    result = super().summarize()
+  def finalize(self) -> pg.Dict:
+    result = super().finalize()
     result.metrics.update(
         num_scored=self.num_scored,
         score_rate=self.score_rate,
@@ -168,7 +168,7 @@ class Scoring(base.Evaluation):
         )
     )
-  def _render_metric(self, s: io.StringIO) -> None:
+  def _render_summary_metrics(self, s: io.StringIO) -> None:
     """Renders metrics in HTML."""
     assert self.result is not None
     m = self.result.metrics
@@ -182,7 +182,7 @@ class Scoring(base.Evaluation):
         )
     )
     s.write(' | ')
-    super()._render_metric(s)
+    super()._render_summary_metrics(s)
   def _render_scored(self, s: io.StringIO) -> None:
     """Formats the matched cases into html."""

langfun/core/eval/scoring_test.py CHANGED Viewed

@@ -98,6 +98,11 @@ class ScoringTest(unittest.TestCase):
                 total=2,
                 failures=0,
                 failure_rate=0.0,
+                oop_failures=0,
+                oop_failure_rate=0.0,
+                non_oop_failures=0,
+                non_oop_failure_rate=0.0,
+                failure_breakdown={},
                 num_scored=2,
                 score_rate=1.0,
                 avg_score=0.5,
@@ -124,7 +129,12 @@ class ScoringTest(unittest.TestCase):
     )
     self.assertTrue(
         os.path.exists(
-            os.path.join(s.dir, scoring.Scoring.FAILURES_JSON)
+            os.path.join(s.dir, scoring.Scoring.OOP_FAILURES_JSON)
+        )
+    )
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(s.dir, scoring.Scoring.NON_OOP_FAILURES_JSON)
         )
     )
     self.assertTrue(
@@ -143,7 +153,14 @@ class ScoringTest(unittest.TestCase):
     self.assertTrue(
         os.path.exists(
             os.path.join(
-                s.dir, scoring.Scoring.FAILURES_HTML
+                s.dir, scoring.Scoring.OOP_FAILURES_HTML
+            )
+        )
+    )
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(
+                s.dir, scoring.Scoring.NON_OOP_FAILURES_HTML
             )
         )
     )

langfun/core/llms/openai.py CHANGED Viewed

@@ -234,7 +234,7 @@ class OpenAI(lf.LanguageModel):
           if isinstance(chunk, str):
             item = dict(type='text', text=chunk)
           elif isinstance(chunk, lf_modalities.Image) and chunk.uri:
-            item = dict(type='image_url', image_url=chunk.uri)
+            item = dict(type='image_url', image_url=dict(url=chunk.uri))
           else:
             raise ValueError(f'Unsupported modality object: {chunk!r}.')
           content.append(item)

langfun/core/llms/openai_test.py CHANGED Viewed

@@ -66,7 +66,8 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
   del kwargs
   choices = []
   urls = [
-      c['image_url'] for c in messages[0]['content'] if c['type'] == 'image_url'
+      c['image_url']['url']
+      for c in messages[0]['content'] if c['type'] == 'image_url'
   ]
   for k in range(n):
     choices.append(pg.Dict(

{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langfun
-Version: 0.0.2.dev20240429
+Version: 0.0.2.dev20240501
 Summary: Langfun: Language as Functions.
 Home-page: https://github.com/google/langfun
 Author: Langfun Authors
@@ -21,7 +21,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Topic :: Software Development :: Libraries
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: absl-py >=1.0.0
 Requires-Dist: google-generativeai >=0.3.2
 Requires-Dist: jinja2 >=3.1.2
 Requires-Dist: openai ==0.27.2

{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/RECORD RENAMED Viewed

@@ -39,13 +39,15 @@ langfun/core/coding/python/parsing.py,sha256=uyvI1c5OLZhMVK2Oltkl3oJxSLlG0wadlpQ
 langfun/core/coding/python/parsing_test.py,sha256=9vAWF484kWIm6JZq8NFiMgKUDhXV-deRl1QMmNERfAA,7386
 langfun/core/coding/python/permissions.py,sha256=1QWGHvzL8MM0Ok_auQ9tURqZHtdOfJaDpBzZ29GUE-c,2544
 langfun/core/coding/python/permissions_test.py,sha256=w5EDb8QxpxgJyZkojyzVWQvDfg366zn99-g__6TbPQ0,2699
-langfun/core/eval/__init__.py,sha256=NSmPe2lxdxFoY4h8VkNyONPAFtOTUpK9WhmZRaqUgiI,1335
-langfun/core/eval/base.py,sha256=1svQoZ0C2DGCVLvr0Qt0TcrlJKtJptdoOBVAxkxnHoU,60264
-langfun/core/eval/base_test.py,sha256=g3lRp2dcq411cLYHpn8spI4feyv2nOccs5PlFBwav3g,22512
-langfun/core/eval/matching.py,sha256=Ks-L9vyMNDj4R8zFczzByT_4DK2wAFatyCZupdHzx_g,9932
-langfun/core/eval/matching_test.py,sha256=5Qs9ETaLoyNcJ43f-_bK2Bfe--2Y3U79DnSA55-l6pc,4932
-langfun/core/eval/scoring.py,sha256=A3y6HMcmpREQPqUD-WtImYOb2jG-23WpcUO2-WGhel0,6360
-langfun/core/eval/scoring_test.py,sha256=vxJR-2rBghUDUOCLTIMd6M3i1F8xDhA-U45wuBHVfc0,4058
+langfun/core/eval/__init__.py,sha256=Evt-E4FEhZF2tXL6-byh_AyA7Cc_ZoGmvnN7vkAZedk,1898
+langfun/core/eval/base.py,sha256=VgHdnfkHeGPp0XjIGHw9LDZsR0Z4-yuWIkzn4pqJj3Y,73967
+langfun/core/eval/base_test.py,sha256=cHOTIWVW4Dp8gKKIKcZrAcJ-w84j2GIozTzJoiAX7p4,26743
+langfun/core/eval/matching.py,sha256=Y4vFoNTQEOwko6IA8l9OZ52-vt52e3VGmcTtvLA67wM,9782
+langfun/core/eval/matching_test.py,sha256=f7iVyXH5KGJBWt4Wp14Bt9J3X59A6Ayfog9MbuFvPew,5532
+langfun/core/eval/patching.py,sha256=R0s2eAd1m97exQt06dmUL0V_MBG0W2Hxg7fhNB7cXW0,3866
+langfun/core/eval/patching_test.py,sha256=8kCd54Egjju22FMgtJuxEsrXkW8ifs-UUBHtrCG1L6w,4775
+langfun/core/eval/scoring.py,sha256=1J7IATo-8FXUR0SBqk9icztHiM0lWkBFcWUo-vUURgQ,6376
+langfun/core/eval/scoring_test.py,sha256=O8olHbrUEg60gMxwOkWzKBJZpZoUlmVnBANX5Se2SXM,4546
 langfun/core/llms/__init__.py,sha256=1bPg1QI8duOZCYINm-jWi094x0JtLmsk4KX60qIC_gs,3245
 langfun/core/llms/anthropic.py,sha256=7W9YdPN3SlAFhAIQlihMkrpo7tTY_4NvD0KIlCrqcsk,8505
 langfun/core/llms/anthropic_test.py,sha256=TMM30myyEhwF99Le4RvJEXOn8RYl0q1FRkt9Q9nl1jk,5540
@@ -57,8 +59,8 @@ langfun/core/llms/groq.py,sha256=NaGItVL_pkOpqPpI4bPGU27xLFRoaeizZ49v2s-4ERs,784
 langfun/core/llms/groq_test.py,sha256=M6GtlrsOvDun_j-sR8cPh4W_moHWZNSTiThu3kuwbbc,5281
 langfun/core/llms/llama_cpp.py,sha256=Y_KkMUf3Xfac49koMUtUslKl3h-HWp3-ntq7Jaa3bdo,2385
 langfun/core/llms/llama_cpp_test.py,sha256=ZxC6defGd_HX9SFRU9U4cJiQnBKundbOrchbXuC1Z2M,1683
-langfun/core/llms/openai.py,sha256=06nPhmw0zIA5Zqv3eqsrZtYLHnKwW7N8yt3LlFUFVpI,13247
-langfun/core/llms/openai_test.py,sha256=MiLqBaYliAkWVEwOBmX3HTj_eAuWLv77q8-I3VyVEBU,14841
+langfun/core/llms/openai.py,sha256=rPwO4qPGEwbB4O7TaQD0spg_PXIfF2ioRI_ilE3Pg6Y,13257
+langfun/core/llms/openai_test.py,sha256=asSA1sVy_7hnXioD_2HTxtSDpVTKBUO_EjZuyHpwbn0,14854
 langfun/core/llms/cache/__init__.py,sha256=QAo3InUMDM_YpteNnVCSejI4zOsnjSMWKJKzkb3VY64,993
 langfun/core/llms/cache/base.py,sha256=cFfYvOIUae842pncqCAsRvqXCk2AnAsRYVx0mcIoAeY,3338
 langfun/core/llms/cache/in_memory.py,sha256=YfFyJEhLs73cUiB0ZfhMxYpdE8Iuxxw-dvMFwGHTSHw,4742
@@ -101,8 +103,8 @@ langfun/core/templates/demonstration.py,sha256=vCrgYubdZM5Umqcgp8NUVGXgr4P_c-fik
 langfun/core/templates/demonstration_test.py,sha256=SafcDQ0WgI7pw05EmPI2S4v1t3ABKzup8jReCljHeK4,2162
 langfun/core/templates/selfplay.py,sha256=yhgrJbiYwq47TgzThmHrDQTF4nDrTI09CWGhuQPNv-s,2273
 langfun/core/templates/selfplay_test.py,sha256=DYVrkk7uNKCqJGEHH31HssU2BPuMItU1vJLzfcXIlYg,2156
-langfun-0.0.2.dev20240429.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
-langfun-0.0.2.dev20240429.dist-info/METADATA,sha256=2ilR8AAbFugi7GfU5Szd9nOmkThPTNsTrOCOseGc7gQ,3436
-langfun-0.0.2.dev20240429.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-langfun-0.0.2.dev20240429.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
-langfun-0.0.2.dev20240429.dist-info/RECORD,,
+langfun-0.0.2.dev20240501.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
+langfun-0.0.2.dev20240501.dist-info/METADATA,sha256=SUhJ4RRQcyqLKu16sGip7Z2D875PI5EarCo3VDAGxuQ,3405
+langfun-0.0.2.dev20240501.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+langfun-0.0.2.dev20240501.dist-info/top_level.txt,sha256=RhlEkHxs1qtzmmtWSwYoLVJAc1YrbPtxQ52uh8Z9VvY,8
+langfun-0.0.2.dev20240501.dist-info/RECORD,,

{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/LICENSE RENAMED Viewed

File without changes

{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/WHEEL RENAMED Viewed

File without changes

{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240501.dist-info}/top_level.txt RENAMED Viewed

File without changes

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.0.2.dev20240501__py3-none-any.whl

langfun 0.0.2.dev20240429py3-none-any.whl → 0.0.2.dev20240501py3-none-any.whl