PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.0.2.dev20240511__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.0.2.dev20240511py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (37) hide show

langfun/__init__.py +5 -0
langfun/core/eval/__init__.py +14 -1
langfun/core/eval/base.py +503 -112
langfun/core/eval/base_test.py +185 -53
langfun/core/eval/matching.py +22 -21
langfun/core/eval/matching_test.py +23 -2
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +4 -4
langfun/core/eval/scoring_test.py +19 -2
langfun/core/langfunc.py +1 -17
langfun/core/langfunc_test.py +4 -0
langfun/core/language_model.py +6 -0
langfun/core/llms/__init__.py +8 -0
langfun/core/llms/fake.py +6 -6
langfun/core/llms/google_genai.py +8 -0
langfun/core/llms/openai.py +3 -2
langfun/core/llms/openai_test.py +2 -1
langfun/core/llms/vertexai.py +291 -0
langfun/core/llms/vertexai_test.py +233 -0
langfun/core/modalities/image.py +1 -3
langfun/core/modalities/mime.py +6 -0
langfun/core/modalities/video.py +1 -3
langfun/core/structured/__init__.py +2 -0
langfun/core/structured/mapping.py +5 -1
langfun/core/structured/prompting.py +39 -11
langfun/core/structured/prompting_test.py +43 -0
langfun/core/structured/schema.py +34 -4
langfun/core/structured/schema_test.py +32 -1
langfun/core/structured/scoring.py +4 -1
langfun/core/structured/scoring_test.py +6 -0
langfun/core/template.py +22 -1
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240511.dist-info}/METADATA +2 -2
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240511.dist-info}/RECORD +37 -33
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240511.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240511.dist-info}/WHEEL +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.0.2.dev20240511.dist-info}/top_level.txt +0 -0

langfun/core/eval/patching_test.py ADDED Viewed

@@ -0,0 +1,170 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for evaluation patching."""
+import unittest
+from langfun.core import llms as lf_llms
+from langfun.core.eval import base
+from langfun.core.eval import patching
+import pyglove as pg
+class PatchingCommonTest(unittest.TestCase):
+  def test_patch_member(self):
+    class A(pg.Object):
+      x: int = 1
+    class B(pg.Object):
+      a: A
+    b = B(A())
+    pg.patch(b, [patching.patch_member(A, 'x', 2)])
+    self.assertEqual(b, B(A(2)))
+  def test_patch_args(self):
+    s = base.Suite(
+        [base.Evaluation(inputs=base.as_inputs([1]))],
+        additional_args=dict(x=1, y=2),
+    )
+    pg.patch(s, [patching.patch_additional_args(x=3, z=4)])
+    self.assertTrue(
+        pg.eq(
+            s,
+            base.Suite(
+                [
+                    base.Evaluation(
+                        inputs=base.as_inputs([1]),
+                        additional_args=dict(x=3, y=2, z=4),
+                    )
+                ],
+                additional_args=dict(x=3, y=2, z=4),
+            ),
+        )
+    )
+  def test_patch_lm(self):
+    s = base.Suite(
+        [base.Evaluation(inputs=base.as_inputs([1]))],
+        lm=lf_llms.Gpt35Turbo(),
+    )
+    pg.patch(
+        s, [patching.patch_lm(pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]))]
+    )
+    self.assertTrue(
+        pg.eq(
+            s,
+            base.Suite(
+                [
+                    base.Evaluation(
+                        inputs=base.as_inputs([1]),
+                        lm=pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]),
+                    )
+                ],
+                lm=pg.oneof([lf_llms.Gpt35Turbo(), lf_llms.Gpt4()]),
+            ),
+        )
+    )
+  def test_patch_parsing_lm(self):
+    s = base.Suite(
+        [base.Evaluation(inputs=base.as_inputs([1]))],
+        lm=lf_llms.Gpt4(),
+    )
+    pg.patch(s, [patching.patch_parsing_lm(lf_llms.Gpt35Turbo())])
+    self.assertTrue(
+        pg.eq(
+            s,
+            base.Suite(
+                [
+                    base.Evaluation(
+                        inputs=base.as_inputs([1]),
+                        lm=lf_llms.Gpt4(),
+                        parsing_lm=lf_llms.Gpt35Turbo(),
+                    )
+                ],
+                # NOTE(daiyip): Suite does not have `parsing_lm` as one of its
+                # variable keyword fields yet, so patching does not add to it.
+                # This is okay since we only care about the leaf nodes.
+                lm=lf_llms.Gpt4(),
+            ),
+        )
+    )
+  def test_patch_prompt(self):
+    e = base.Evaluation(inputs=base.as_inputs([1]))
+    pg.patch(e, [patching.patch_prompt('Q: {{example.question}}')])
+    self.assertTrue(
+        pg.eq(
+            e,
+            base.Evaluation(
+                inputs=base.as_inputs([1]),
+                prompt='Q: {{example.question}}',
+            ),
+        )
+    )
+  def test_patch_inputs(self):
+    e = base.Evaluation(inputs=base.as_inputs([1]))
+    pg.patch(e, [patching.patch_inputs(base.as_inputs([2]))])
+    self.assertTrue(
+        pg.eq(
+            e,
+            base.Evaluation(
+                inputs=base.as_inputs([2]),
+            ),
+        )
+    )
+  def test_patch_schema_fn(self):
+    @pg.functor()
+    def int_schema():
+      return int
+    e = base.Evaluation(inputs=base.as_inputs([1]))
+    pg.patch(e, [patching.patch_schema_fn(int_schema())])
+    self.assertTrue(
+        pg.eq(
+            e,
+            base.Evaluation(
+                inputs=base.as_inputs([1]),
+                schema_fn=int_schema(),
+            ),
+        )
+    )
+class StringPatcheTest(unittest.TestCase):
+  def test_lm(self):
+    target = pg.patch(
+        base.Evaluation(inputs=base.as_inputs([1])),
+        ['lm?haiku:gpt4', 'max_tokens?1024', 'temperature?0.7'],
+    )
+    self.assertEqual(
+        target.lm,
+        pg.oneof([
+            lf_llms.Claude3Haiku(temperature=0.7, max_tokens=1024),
+            lf_llms.Gpt4(temperature=0.7, max_tokens=1024),
+        ]),
+    )
+    with self.assertRaisesRegex(ValueError, 'Unknown model name'):
+      pg.patch(
+          base.Evaluation(inputs=base.as_inputs([1])),
+          ['lm?gpt2'],
+      )
+if __name__ == '__main__':
+  unittest.main()

langfun/core/eval/scoring.py CHANGED Viewed

@@ -113,8 +113,8 @@ class Scoring(base.Evaluation):
         m.total,
     )
-  def summarize(self) -> pg.Dict:
-    result = super().summarize()
+  def finalize(self) -> pg.Dict:
+    result = super().finalize()
     result.metrics.update(
         num_scored=self.num_scored,
         score_rate=self.score_rate,
@@ -168,7 +168,7 @@ class Scoring(base.Evaluation):
         )
     )
-  def _render_metric(self, s: io.StringIO) -> None:
+  def _render_summary_metrics(self, s: io.StringIO) -> None:
     """Renders metrics in HTML."""
     assert self.result is not None
     m = self.result.metrics
@@ -182,7 +182,7 @@ class Scoring(base.Evaluation):
         )
     )
     s.write(' | ')
-    super()._render_metric(s)
+    super()._render_summary_metrics(s)
   def _render_scored(self, s: io.StringIO) -> None:
     """Formats the matched cases into html."""

langfun/core/eval/scoring_test.py CHANGED Viewed

@@ -98,6 +98,11 @@ class ScoringTest(unittest.TestCase):
                 total=2,
                 failures=0,
                 failure_rate=0.0,
+                oop_failures=0,
+                oop_failure_rate=0.0,
+                non_oop_failures=0,
+                non_oop_failure_rate=0.0,
+                failure_breakdown={},
                 num_scored=2,
                 score_rate=1.0,
                 avg_score=0.5,
@@ -124,7 +129,12 @@ class ScoringTest(unittest.TestCase):
     )
     self.assertTrue(
         os.path.exists(
-            os.path.join(s.dir, scoring.Scoring.FAILURES_JSON)
+            os.path.join(s.dir, scoring.Scoring.OOP_FAILURES_JSON)
+        )
+    )
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(s.dir, scoring.Scoring.NON_OOP_FAILURES_JSON)
         )
     )
     self.assertTrue(
@@ -143,7 +153,14 @@ class ScoringTest(unittest.TestCase):
     self.assertTrue(
         os.path.exists(
             os.path.join(
-                s.dir, scoring.Scoring.FAILURES_HTML
+                s.dir, scoring.Scoring.OOP_FAILURES_HTML
+            )
+        )
+    )
+    self.assertTrue(
+        os.path.exists(
+            os.path.join(
+                s.dir, scoring.Scoring.NON_OOP_FAILURES_HTML
             )
         )
     )

langfun/core/langfunc.py CHANGED Viewed

@@ -14,7 +14,7 @@
 """LangFunc: Language-based functions."""
 import dataclasses
-from typing import Annotated, Type, Union
+from typing import Annotated, Type
 from langfun.core import component
 from langfun.core import language_model
@@ -328,22 +328,6 @@ class LangFunc(
     """Transforms the output message before returning from __call__."""
     return lm_output
-  @classmethod
-  def from_value(
-      cls, value: Union[str, template_lib.Template], **kwargs
-  ) -> 'LangFunc':
-    """Create a LangFunc object from a string or template."""
-    if isinstance(value, LangFunc):
-      return value
-    if isinstance(value, template_lib.Template):
-      lfun = LangFunc(value.template_str, **kwargs)
-      # So lfun could acccess all attributes from value.
-      lfun.sym_setparent(value)
-      return lfun
-    if isinstance(value, str):
-      return LangFunc(template_str=value, **kwargs)
-    return LangFunc('{{input}}', input=value, **kwargs)
 # Register converter from str to LangFunc, therefore we can always
 # pass strs to attributes that accept LangFunc.

langfun/core/langfunc_test.py CHANGED Viewed

@@ -57,6 +57,10 @@ class BasicTest(unittest.TestCase):
     l2 = LangFunc.from_value(l1)
     self.assertIs(l2, l1)
+    l3 = LangFunc.from_value(l1, x=1)
+    self.assertIsNot(l3, l1)
+    self.assertTrue(pg.eq(l3, LangFunc('Hello', x=1)))
     c = template_lib.Template(
         '{{x}} + {{l}}',
         x=1,

langfun/core/language_model.py CHANGED Viewed

@@ -22,6 +22,7 @@ from langfun.core import component
 from langfun.core import concurrent
 from langfun.core import console
 from langfun.core import message as message_lib
 import pyglove as pg
 TOKENS_PER_REQUEST = 250  # Estimated num tokens for a single request
@@ -166,6 +167,11 @@ class LMScoringResult(pg.Object):
       float,
       'The log likelyhood of the requested completion towards the prompt.',
   ]
+  gradients: Annotated[
+      Any | None,
+      '(Optional) gradients from the score method, w.r.t.' +
+      ' prompt.metadata.weights.',
+  ] = None
 class LMCache(pg.Object):

langfun/core/llms/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ from langfun.core.llms.fake import StaticSequence
 # Gemini models.
 from langfun.core.llms.google_genai import GenAI
 from langfun.core.llms.google_genai import GeminiPro
+from langfun.core.llms.google_genai import GeminiPro1_5
 from langfun.core.llms.google_genai import GeminiProVision
 from langfun.core.llms.google_genai import Palm2
 from langfun.core.llms.google_genai import Palm2_IT
@@ -73,6 +74,13 @@ from langfun.core.llms.groq import GroqLlama2_70B
 from langfun.core.llms.groq import GroqMistral_8x7B
 from langfun.core.llms.groq import GroqGemma7B_IT
+from langfun.core.llms.vertexai import VertexAI
+from langfun.core.llms.vertexai import VertexAIGeminiPro1_5
+from langfun.core.llms.vertexai import VertexAIGeminiPro1
+from langfun.core.llms.vertexai import VertexAIGeminiPro1Vision
+from langfun.core.llms.vertexai import VertexAIPalm2
+from langfun.core.llms.vertexai import VertexAIPalm2_32K
 # LLaMA C++ models.
 from langfun.core.llms.llama_cpp import LlamaCppRemote

langfun/core/llms/fake.py CHANGED Viewed

@@ -57,12 +57,12 @@ class StaticResponse(Fake):
   """Language model that always gives the same canned response."""
   response: Annotated[
-      str,
+      str | lf.Message,
       'A canned response that will be returned regardless of the prompt.'
   ]
   def _response_from(self, prompt: lf.Message) -> lf.Message:
-    return lf.AIMessage(self.response)
+    return lf.AIMessage.from_value(self.response)
 @lf.use_init_args(['mapping'])
@@ -70,12 +70,12 @@ class StaticMapping(Fake):
   """A static mapping from prompt to response."""
   mapping: Annotated[
-      dict[str, str],
+      dict[str, str | lf.Message],
       'A mapping from prompt to response.'
   ]
   def _response_from(self, prompt: lf.Message) -> lf.Message:
-    return lf.AIMessage(self.mapping[prompt])
+    return lf.AIMessage.from_value(self.mapping[prompt])
 @lf.use_init_args(['sequence'])
@@ -83,7 +83,7 @@ class StaticSequence(Fake):
   """A static sequence of responses to use."""
   sequence: Annotated[
-      list[str],
+      list[str | lf.Message],
       'A sequence of strings as the response.'
   ]
@@ -92,6 +92,6 @@ class StaticSequence(Fake):
     self._pos = 0
   def _response_from(self, prompt: lf.Message) -> lf.Message:
-    r = lf.AIMessage(self.sequence[self._pos])
+    r = lf.AIMessage.from_value(self.sequence[self._pos])
     self._pos += 1
     return r

langfun/core/llms/google_genai.py CHANGED Viewed

@@ -34,6 +34,7 @@ class GenAI(lf.LanguageModel):
           'gemini-pro-vision',
           'text-bison-001',
           'chat-bison-001',
+          'gemini-1.5-pro-latest',
       ],
       'Model name.',
   ]
@@ -262,6 +263,13 @@ _GOOGLE_GENAI_MODEL_HUB = _ModelHub()
 #
+class GeminiPro1_5(GenAI):  # pylint: disable=invalid-name
+  """Gemini Pro latest model."""
+  model = 'gemini-1.5-pro-latest'
+  multimodal = True
 class GeminiPro(GenAI):
   """Gemini Pro model."""

langfun/core/llms/openai.py CHANGED Viewed

@@ -233,8 +233,9 @@ class OpenAI(lf.LanguageModel):
         for chunk in prompt.chunk():
           if isinstance(chunk, str):
             item = dict(type='text', text=chunk)
-          elif isinstance(chunk, lf_modalities.Image) and chunk.uri:
-            item = dict(type='image_url', image_url=chunk.uri)
+          elif isinstance(chunk, lf_modalities.Image):
+            uri = chunk.uri or chunk.content_uri
+            item = dict(type='image_url', image_url=dict(url=uri))
           else:
             raise ValueError(f'Unsupported modality object: {chunk!r}.')
           content.append(item)

langfun/core/llms/openai_test.py CHANGED Viewed

@@ -66,7 +66,8 @@ def mock_chat_completion_query_vision(messages, *, n=1, **kwargs):
   del kwargs
   choices = []
   urls = [
-      c['image_url'] for c in messages[0]['content'] if c['type'] == 'image_url'
+      c['image_url']['url']
+      for c in messages[0]['content'] if c['type'] == 'image_url'
   ]
   for k in range(n):
     choices.append(pg.Dict(

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.0.2.dev20240511__py3-none-any.whl

Potentially problematic release.

langfun 0.0.2.dev20240429py3-none-any.whl → 0.0.2.dev20240511py3-none-any.whl