PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501150804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501150804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

langfun/__init__.py +20 -2
langfun/core/__init__.py +16 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -21
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +63 -2
langfun/core/component_test.py +53 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +17 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +16 -1
langfun/core/eval/base.py +622 -174
langfun/core/eval/base_test.py +200 -54
langfun/core/eval/matching.py +63 -76
langfun/core/eval/matching_test.py +17 -8
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +26 -26
langfun/core/eval/scoring_test.py +19 -2
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +4 -17
langfun/core/langfunc_test.py +22 -6
langfun/core/language_model.py +577 -39
langfun/core/language_model_test.py +470 -56
langfun/core/llms/__init__.py +87 -16
langfun/core/llms/anthropic.py +312 -87
langfun/core/llms/anthropic_test.py +71 -3
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +53 -2
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +11 -7
langfun/core/llms/fake_test.py +14 -0
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -202
langfun/core/llms/groq.py +160 -144
langfun/core/llms/groq_test.py +31 -137
langfun/core/llms/llama_cpp.py +15 -42
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +395 -203
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +30 -395
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -26
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +12 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +7 -6
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +60 -27
langfun/core/structured/function_generation_test.py +72 -2
langfun/core/structured/mapping.py +97 -47
langfun/core/structured/mapping_test.py +90 -2
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +53 -9
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
langfun/core/structured/schema.py +204 -97
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_test.py +130 -29
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +115 -1
langfun/core/template_test.py +71 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +10 -2
langfun-0.1.2.dev202501150804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501150804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -238
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/top_level.txt +0 -0

langfun/core/structured/schema_test.py CHANGED Viewed

@@ -18,6 +18,7 @@ import inspect
 import typing
 import unittest
+import langfun.core as lf
 from langfun.core.llms import fake
 from langfun.core.structured import schema as schema_lib
 import pyglove as pg
@@ -260,15 +261,18 @@ class ClassDependenciesTest(unittest.TestCase):
     class A(pg.Object):
       foo: tuple[Foo, int]
+    class B(pg.Object):
+      pass
     class X(pg.Object):
-      k: int
+      k: dict[str, B]
-    class B(A):
+    class C(A):
       bar: Bar
       foo2: Foo | X
     a = A(foo=(Foo(1), 0))
-    self.assertEqual(schema_lib.class_dependencies(a), [Foo, A, Bar, X, B])
+    self.assertEqual(schema_lib.class_dependencies(a), [Foo, A, Bar, B, X, C])
     self.assertEqual(schema_lib.class_dependencies(1), [])
@@ -280,9 +284,10 @@ class SchemaPythonReprTest(unittest.TestCase):
       value_spec: pg.typing.ValueSpec,
       expected_annotation: str,
       strict: bool = False,
+      **kwargs,
   ) -> None:
     self.assertEqual(
-        schema_lib.annotation(value_spec, strict=strict),
+        schema_lib.annotation(value_spec, strict=strict, **kwargs),
         expected_annotation,
     )
@@ -358,11 +363,27 @@ class SchemaPythonReprTest(unittest.TestCase):
     self.assert_annotation(
         pg.typing.Object(Activity).noneable(), 'Activity | None'
     )
+    self.assert_annotation(
+        pg.typing.Object(Activity).noneable(), 'Activity | None',
+        allowed_dependencies=set([Activity]),
+    )
+    self.assert_annotation(
+        pg.typing.Object(Activity).noneable(), 'Any | None',
+        allowed_dependencies=set(),
+    )
     # List.
     self.assert_annotation(
         pg.typing.List(pg.typing.Object(Activity)), 'list[Activity]'
     )
+    self.assert_annotation(
+        pg.typing.List(pg.typing.Object(Activity)), 'list[Activity]',
+        allowed_dependencies=set([Activity]),
+    )
+    self.assert_annotation(
+        pg.typing.List(pg.typing.Object(Activity)), 'list[Any]',
+        allowed_dependencies=set(),
+    )
     self.assert_annotation(
         pg.typing.List(pg.typing.Object(Activity)).noneable(),
         'list[Activity] | None',
@@ -374,16 +395,35 @@ class SchemaPythonReprTest(unittest.TestCase):
     # Tuple.
     self.assert_annotation(
-        pg.typing.Tuple([pg.typing.Int(), pg.typing.Str()]), 'tuple[int, str]'
+        pg.typing.Tuple([Activity, pg.typing.Str()]), 'tuple[Activity, str]'
+    )
+    self.assert_annotation(
+        pg.typing.Tuple([Activity, pg.typing.Str()]), 'tuple[Activity, str]',
+        allowed_dependencies=set([Activity]),
+    )
+    self.assert_annotation(
+        pg.typing.Tuple([Activity, pg.typing.Str()]), 'tuple[Any, str]',
+        allowed_dependencies=set(),
     )
     self.assert_annotation(
-        pg.typing.Tuple([pg.typing.Int(), pg.typing.Str()]).noneable(),
-        'tuple[int, str] | None',
+        pg.typing.Tuple([Activity, pg.typing.Str()]).noneable(),
+        'tuple[Activity, str] | None',
     )
     # Dict.
     self.assert_annotation(
-        pg.typing.Dict({'x': int, 'y': str}), '{\'x\': int, \'y\': str}'
+        pg.typing.Dict({'x': Activity, 'y': str}),
+        '{\'x\': Activity, \'y\': str}'
+    )
+    self.assert_annotation(
+        pg.typing.Dict({'x': Activity, 'y': str}),
+        '{\'x\': Activity, \'y\': str}',
+        allowed_dependencies=set([Activity]),
+    )
+    self.assert_annotation(
+        pg.typing.Dict({'x': Activity, 'y': str}),
+        '{\'x\': Any, \'y\': str}',
+        allowed_dependencies=set(),
     )
     self.assert_annotation(
         pg.typing.Dict({'x': int, 'y': str}),
@@ -395,6 +435,15 @@ class SchemaPythonReprTest(unittest.TestCase):
         'dict[str, Any]',
         strict=False,
     )
+    class DictValue(pg.Object):
+      pass
+    self.assert_annotation(
+        pg.typing.Dict([(pg.typing.StrKey(), DictValue)]),
+        'dict[str, DictValue]',
+        strict=False,
+    )
     self.assert_annotation(
         pg.typing.Dict(),
         'dict[str, Any]',
@@ -408,6 +457,13 @@ class SchemaPythonReprTest(unittest.TestCase):
         ).noneable(),
         'Union[Activity, Itinerary, None]',
     )
+    self.assert_annotation(
+        pg.typing.Union(
+            [pg.typing.Object(Activity), pg.typing.Object(Itinerary)]
+        ).noneable(),
+        'Union[Activity, Any, None]',
+        allowed_dependencies=set([Activity]),
+    )
     # Any.
     self.assert_annotation(pg.typing.Any(), 'Any')
@@ -415,13 +471,13 @@ class SchemaPythonReprTest(unittest.TestCase):
   def test_class_definition(self):
     self.assertEqual(
-        schema_lib.class_definition(Activity),
+        schema_lib.class_definition(Activity, allowed_dependencies=set()),
         'class Activity:\n  description: str\n',
     )
     self.assertEqual(
         schema_lib.class_definition(Itinerary),
         inspect.cleandoc("""
-            class Itinerary:
+            class Itinerary(Object):
               \"\"\"A travel itinerary for a day.\"\"\"
               day: int(min=1)
               type: Literal['daytime', 'nighttime']
@@ -431,7 +487,9 @@ class SchemaPythonReprTest(unittest.TestCase):
             """) + '\n',
     )
     self.assertEqual(
-        schema_lib.class_definition(PlaceOfInterest),
+        schema_lib.class_definition(
+            PlaceOfInterest, allowed_dependencies=set()
+        ),
         inspect.cleandoc("""
             class PlaceOfInterest:
               \"\"\"The name of a place of interest.
@@ -447,11 +505,11 @@ class SchemaPythonReprTest(unittest.TestCase):
       pass
     self.assertEqual(
-        schema_lib.class_definition(A),
+        schema_lib.class_definition(A, allowed_dependencies=set()),
         'class A:\n  pass\n',
     )
     self.assertEqual(
-        schema_lib.class_definition(A, include_pg_object_as_base=True),
+        schema_lib.class_definition(A),
         'class A(Object):\n  pass\n',
     )
@@ -459,7 +517,27 @@ class SchemaPythonReprTest(unittest.TestCase):
       x: str
       __kwargs__: typing.Any
-    self.assertEqual(schema_lib.class_definition(C), 'class C:\n  x: str\n')
+    self.assertEqual(
+        schema_lib.class_definition(C), 'class C(Object):\n  x: str\n'
+    )
+    class D(pg.Object):
+      x: str
+      @schema_lib.include_method_in_prompt
+      def __call__(self, y: int) -> int:
+        return len(self.x) + y
+    self.assertEqual(
+        schema_lib.class_definition(D),
+        inspect.cleandoc(
+            """
+            class D(Object):
+              x: str
+              def __call__(self, y: int) -> int:
+                return len(self.x) + y
+            """) + '\n'
+    )
   def test_repr(self):
     class Foo(pg.Object):
@@ -477,10 +555,21 @@ class SchemaPythonReprTest(unittest.TestCase):
     class A(pg.Object):
       foo: Foo
+      @schema_lib.include_method_in_prompt
+      def foo_value(self) -> int:
+        return self.foo.x
+      def baz_value(self) -> str:
+        return 'baz'
     class B(A):
       bar: Bar
       foo2: Foo
+      @schema_lib.include_method_in_prompt
+      def bar_value(self) -> str:
+        return self.bar.y
     schema = schema_lib.Schema([B])
     self.assertEqual(
         schema_lib.SchemaPythonRepr().class_definitions(schema),
@@ -488,9 +577,6 @@ class SchemaPythonReprTest(unittest.TestCase):
             class Foo:
               x: int
-            class A:
-              foo: Foo
             class Bar:
               """Class Bar."""
               y: str
@@ -499,10 +585,16 @@ class SchemaPythonReprTest(unittest.TestCase):
               """Baz(y: str)"""
               y: str
-            class B(A):
+            class B:
               foo: Foo
               bar: Bar
               foo2: Foo
+              def bar_value(self) -> str:
+                return self.bar.y
+              def foo_value(self) -> int:
+                return self.foo.x
             ''') + '\n',
     )
@@ -519,9 +611,6 @@ class SchemaPythonReprTest(unittest.TestCase):
             class Foo:
               x: int
-            class A:
-              foo: Foo
             class Bar:
               """Class Bar."""
               y: str
@@ -530,10 +619,16 @@ class SchemaPythonReprTest(unittest.TestCase):
               """Baz(y: str)"""
               y: str
-            class B(A):
+            class B:
               foo: Foo
               bar: Bar
               foo2: Foo
+              def bar_value(self) -> str:
+                return self.bar.y
+              def foo_value(self) -> int:
+                return self.foo.x
             ```
             '''),
     )
@@ -541,16 +636,12 @@ class SchemaPythonReprTest(unittest.TestCase):
         schema_lib.SchemaPythonRepr().repr(
             schema,
             include_result_definition=False,
-            include_pg_object_as_base=True,
             markdown=False,
         ),
         inspect.cleandoc('''
-            class Foo(Object):
+            class Foo:
               x: int
-            class A(Object):
-              foo: Foo
             class Bar:
               """Class Bar."""
               y: str
@@ -559,10 +650,16 @@ class SchemaPythonReprTest(unittest.TestCase):
               """Baz(y: str)"""
               y: str
-            class B(A):
+            class B:
               foo: Foo
               bar: Bar
               foo2: Foo
+              def bar_value(self) -> str:
+                return self.bar.y
+              def foo_value(self) -> int:
+                return self.foo.x
             '''),
     )
@@ -598,6 +695,10 @@ class ValuePythonReprTest(unittest.TestCase):
         schema_lib.ValuePythonRepr().repr(1, schema_lib.Schema(int)),
         '```python\n1\n```'
     )
+    self.assertEqual(
+        schema_lib.ValuePythonRepr().repr(lf.Template('hi, {{a}}', a='foo')),
+        'hi, foo'
+    )
     self.assertEqual(
         schema_lib.ValuePythonRepr().repr(
             A([Foo(1), Foo(2)], 'bar'), schema_lib.Schema(A), markdown=False,
@@ -610,7 +711,7 @@ class ValuePythonReprTest(unittest.TestCase):
             ```python
             class Foo(Object):
               x: int
             class A(Object):
               foo: list[Foo]
               y: str | None

langfun/core/structured/scoring.py CHANGED Viewed

@@ -17,13 +17,13 @@ from typing import Any, Type, Union
 import langfun.core as lf
 from langfun.core.structured import mapping
-from langfun.core.structured import prompting
+from langfun.core.structured import querying
 from langfun.core.structured import schema as schema_lib
 import pyglove as pg
 def score(
-    prompt: Union[str, pg.Symbolic],
+    prompt: Union[str, pg.Symbolic] | list[str | pg.Symbolic],
     completions: list[str | pg.Symbolic],
     schema: Union[
         schema_lib.Schema, Type[Any], list[Type[Any]], dict[str, Any], None
@@ -32,9 +32,58 @@ def score(
     lm: lf.LanguageModel | None = None,
     examples: list[mapping.MappingExample] | None = None,
     protocol: schema_lib.SchemaProtocol = 'python',
+    return_scoring_results: bool = False,
     **kwargs,
-) -> list[float]:
-  """Scores the outputs based on the prompt."""
+) -> list[float] | list[lf.LMScoringResult]:
+  """Scores the outputs based on the prompt.
+  Examples:
+    ```
+    # Example 1: Scoring text output based on the user prompt.
+    scores = lf.score('{{x}} + {{y}} =', ['1', '2', '3'], lm=lm, x=1, y=2)
+    assert len(scores) == 3
+    # Example 2: Scoring int output based on the formulated OOP prompt.
+    scores = lf.score('1 + 1 =', [1, 2, 3], lm=lm)
+    assert len(scores) == 3
+    class Answer(pg.Object):
+      result: int
+    # Example 3: Scoring object output based on the formulated OOP prompt.
+    scores = lf.score('1 + 1 =', [Answer(1), Answer(2), Answer(3)], lm=lm)
+    assert len(scores) == 3
+    # Example 4: Scoring object field value based on the formulated OOP prompt
+    # and the generated tokens before the first `pg.oneof`.
+    scores = lf.score('1 + 1 =', [Answer(pg.oneof([1, 2, 3]))], lm=lm)
+    assert len(scores) == 3
+    # Example 5: Scoring multiple prompt/completion pairs.
+    scores = lf.score(
+        ['1 + 1=', '2 + 3='],
+        ['2', '4'],
+        lm=lm
+    )
+    assert len(scores) == 2
+    ```
+  Args:
+    prompt: The prompt(s) based on which each completion will be scored.
+    completions: A list of strings or symbolic objects as the output.
+    schema: The schema as the output type. If None, it will be inferred from
+      the completions.
+    lm: The language model used for scoring.
+    examples: Fewshot exemplars used together with the prompt in getting the
+      completions.
+    protocol: The protocol for formulating the prompt based on objects.
+    return_scoring_results: If True, returns a list of `lf.LMScoringResult`,
+      otherwise returns a list of floats as the scores of each completion.
+    **kwargs: Keyword arguments that are referred by the prompt.
+  Returns:
+    A list of floats or `lf.LMScoringResult` as the score of each completion.
+  """
   if not completions:
     raise ValueError('`completions` must not be empty.')
@@ -48,28 +97,85 @@ def score(
             f'{[type(c) for c in completions]}.'
         )
-  input_message = prompting.query(
-      prompt,
-      schema,
-      examples=examples,
-      protocol=protocol,
-      skip_lm=True,
-      returns_message=True,
-      **kwargs,
-  )
+  if isinstance(prompt, list):
+    prompts = []
+    for p in prompt:
+      prompts.append(
+          querying.query_prompt(
+              p,
+              schema,
+              examples=examples,
+              protocol=protocol,
+              **kwargs,
+          )
+      )
+    input_message = prompts
+  else:
+    input_message = querying.query_prompt(
+        prompt,
+        schema,
+        examples=examples,
+        protocol=protocol,
+        **kwargs,
+    )
   if lm is None:
     lm_override = lf.get_contextual_override('lm')
     if lm_override is None:
       raise ValueError('`lm` must be specified or provided from `lf.context`.')
     lm = lm_override.value
+  completion_reprs = []
+  for c in completions:
+    if isinstance(c, mapping.MappingError):
+      completion_reprs.append(c.lm_response)
+    else:
+      rep = mapping.MappingExample.value_repr(
+          c, protocol=protocol, compact=False, verbose=False
+      )
+      # NOTE(daiyip): supporting scenario of scoring object field with
+      # `pg.oneof`.
+      oneof_pos = rep.find('OneOf(')
+      if oneof_pos == -1:
+        completion_reprs.append(rep)
+      else:
+        assert protocol == 'python', protocol
+        if isinstance(input_message, list):
+          raise ValueError(
+              'Scoring on object fields using `pg.oneof` must share the '
+              f'same prompt. Encountered: {prompt}'
+          )
+        input_message.text += '\n' + rep[:oneof_pos]
+        oneof = _get_first_oneof(c)
+        for v in oneof.candidates:
+          completion_reprs.append(
+              pg.format(
+                  v,
+                  python_format=True,
+                  compact=False,
+                  verbose=False,
+                  root_indent=oneof.sym_path.depth
+              )
+          )
   results = lm.score(
       input_message,
-      [
-          mapping.MappingExample.value_repr(
-              c, protocol=protocol, compact=False, verbose=False
-          )
-          for c in completions
-      ],
+      completion_reprs,
   )
+  if return_scoring_results:
+    return results
   return [r.score for r in results]
+def _get_first_oneof(value: Any) -> pg.hyper.OneOf:
+  """Gets the first pg.oneof from a symbolic object."""
+  oneofs = []
+  def select_oneofs(k, v, p):
+    del k, p
+    if isinstance(v, pg.hyper.OneOf):
+      oneofs.append(v)
+      return pg.TraverseAction.CONTINUE
+    return pg.TraverseAction.ENTER
+  pg.traverse(value, select_oneofs)
+  assert oneofs
+  return oneofs[0]

langfun/core/structured/scoring_test.py CHANGED Viewed

@@ -16,6 +16,11 @@ import unittest
 import langfun.core as lf
 from langfun.core.llms import fake
 from langfun.core.structured import scoring
+import pyglove as pg
+class Answer(pg.Object):
+  result: int
 class ScoringTest(unittest.TestCase):
@@ -32,9 +37,34 @@ class ScoringTest(unittest.TestCase):
     with self.assertRaisesRegex(ValueError, '`lm` must be specified'):
       scoring.score('hi', [1, 2])
+    with self.assertRaisesRegex(
+        ValueError,
+        'Scoring on object fields using `pg.oneof` must share the same prompt',
+    ):
+      scoring.score(
+          ['1 + 1=', '2 + 3='],
+          [Answer(pg.oneof([1, 2, 3]))],
+          lm=fake.Echo(),
+      )
   def test_score(self):
     self.assertEqual(scoring.score('hi', [1, 2], lm=fake.Echo()), [0.0, -1.0])
+  def test_score_on_field_values(self):
+    self.assertEqual(
+        scoring.score(
+            '1 + 1=',
+            [Answer(pg.oneof([1, 2, 3]))], lm=fake.Echo()
+        ),
+        [0.0, -1.0, -2.0]
+    )
+  def test_score_returning_scoring_results(self):
+    self.assertEqual(scoring.score(
+        'hi', [1, 2], lm=fake.Echo(), return_scoring_results=True),
+                     [lf.LMScoringResult(score=0.0, gradients=None),
+                      lf.LMScoringResult(score=-1.0, gradients=None)])
   def test_scope_with_lm_from_the_context(self):
     with lf.context(lm=fake.Echo()):
       self.assertEqual(scoring.score('hi', [1, 2]), [0.0, -1.0])

langfun/core/structured/tokenization.py ADDED Viewed

@@ -0,0 +1,64 @@
+# Copyright 2023 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenize the prompt for `lf.query`."""
+from typing import Any, Type, Union
+import langfun.core as lf
+from langfun.core.structured import mapping
+from langfun.core.structured import querying
+from langfun.core.structured import schema as schema_lib
+import pyglove as pg
+def tokenize(
+    prompt: Union[str, pg.Symbolic] | list[str | pg.Symbolic],
+    schema: Union[
+        schema_lib.Schema, Type[Any], list[Type[Any]], dict[str, Any], None
+    ] = None,
+    *,
+    lm: lf.LanguageModel | None = None,
+    examples: list[mapping.MappingExample] | None = None,
+    protocol: schema_lib.SchemaProtocol = 'python',
+    **kwargs,
+) -> list[tuple[str | bytes, int]]:
+  """Tokenize the prompt for `lf.query`.
+  Args:
+    prompt: The prompt(s) based on which each completion will be scored.
+    schema: The schema as the output type. If None, it will be inferred from
+      the completions.
+    lm: The language model used for scoring.
+    examples: Fewshot exemplars used together with the prompt in getting the
+      completions.
+    protocol: The protocol for formulating the prompt based on objects.
+    **kwargs: Keyword arguments that are referred by the prompt.
+  Returns:
+    A list of (text, token_id) tuples.
+  """
+  input_message = querying.query_prompt(
+      prompt,
+      schema,
+      examples=examples,
+      protocol=protocol,
+      **kwargs,
+  )
+  if lm is None:
+    lm_override = lf.get_contextual_override('lm')
+    if lm_override is None:
+      raise ValueError('`lm` must be specified or provided from `lf.context`.')
+    lm = lm_override.value
+  return lm.tokenize(input_message)

langfun/core/structured/tokenization_test.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright 2023 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import langfun.core as lf
+from langfun.core.llms import fake
+from langfun.core.structured import tokenization
+import pyglove as pg
+class Answer(pg.Object):
+  result: int
+class TokenizationTest(unittest.TestCase):
+  def test_bad_call(self):
+    with self.assertRaisesRegex(ValueError, '`lm` must be specified'):
+      tokenization.tokenize('hi')
+  def test_tokenize(self):
+    self.assertEqual(
+        tokenization.tokenize('hi', lm=fake.Echo()),
+        [('hi', 0)]
+    )
+  def test_tokenize_with_lm_from_the_context(self):
+    with lf.context(lm=fake.Echo()):
+      self.assertEqual(
+          tokenization.tokenize('hi'),
+          [('hi', 0)]
+      )
+if __name__ == '__main__':
+  unittest.main()

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501150804__py3-none-any.whl

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501150804py3-none-any.whl