PyPI - langfun - Versions diffs - 0.0.2.dev20240425__tar.gz → 0.0.2.dev20240428__tar.gz - Mend

langfun 0.0.2.dev20240425tar.gz → 0.0.2.dev20240428tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

{langfun-0.0.2.dev20240425 → langfun-0.0.2.dev20240428}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langfun
-Version: 0.0.2.dev20240425
+Version: 0.0.2.dev20240428
 Summary: Langfun: Language as Functions.
 Home-page: https://github.com/google/langfun
 Author: Langfun Authors

{langfun-0.0.2.dev20240425 → langfun-0.0.2.dev20240428}/langfun/core/eval/base.py RENAMED Viewed

@@ -540,7 +540,7 @@ class Evaluable(lf.Component):
           f'<div style="color: {text_color}; white-space: pre-wrap;'
           'padding: 10px; border: 1px solid; margin-top: 10px">'
       )
-      s.write(m.text)
+      s.write(m.get('formatted_text', m.text))
       if m.result is not None:
         s.write(
             '<div style="color: magenta; white-space: pre-wrap;'

{langfun-0.0.2.dev20240425 → langfun-0.0.2.dev20240428}/langfun/core/langfunc.py RENAMED Viewed

@@ -261,7 +261,6 @@ class LangFunc(
         if lm_input is None:
           lm_input = self.render(**kwargs)
-        lm_input.tag(message_lib.Message.TAG_LM_INPUT)
         if skip_lm:
           return lm_input
@@ -270,10 +269,6 @@ class LangFunc(
         # Send rendered text to LM.
         lm_output = self.lm(lm_input, cache_seed=cache_seed)
-        # Track the input as the source of the output.
-        lm_output.source = lm_input
-        lm_output.tag(message_lib.Message.TAG_LM_RESPONSE)
         # Transform the output message.
         lm_output = self.transform_output(lm_output)
         lm_output.tag(message_lib.Message.TAG_LM_OUTPUT)

{langfun-0.0.2.dev20240425 → langfun-0.0.2.dev20240428}/langfun/core/language_model.py RENAMED Viewed

@@ -346,9 +346,42 @@ class LanguageModel(component.Component):
     with component.context(override_attrs=True, **kwargs):
       if self.cache is None:
-        return self._sample(prompts)
+        results = self._sample(prompts)
       else:
-        return self._sample_with_cache_lookup(prompts, cache_seed)
+        results = self._sample_with_cache_lookup(prompts, cache_seed)
+      for prompt, result in zip(prompts, results):
+        # Tag LM input.
+        prompt.tag(message_lib.Message.TAG_LM_INPUT)
+        for sample in result.samples:
+          # Update metadata for response message.
+          response = sample.response
+          response.metadata.score = sample.score
+          response.metadata.logprobs = sample.logprobs
+          # NOTE(daiyip): Current usage is computed at per-result level,
+          # which is accurate when n=1. For n > 1, we average the usage across
+          # multiple samples.
+          usage = result.usage
+          if len(result.samples) == 1 or usage is None:
+            response.metadata.usage = usage
+          else:
+            n = len(result.samples)
+            response.metadata.usage = LMSamplingUsage(
+                prompt_tokens=usage.prompt_tokens // n,
+                completion_tokens=usage.completion_tokens // n,
+                total_tokens=usage.total_tokens // n,
+            )
+          # Track the prompt for corresponding response.
+          response.source = prompt
+          # Tag LM response.
+          response.tag(message_lib.Message.TAG_LM_RESPONSE)
+      return results
   def _sample_with_cache_lookup(
       self, prompts: list[str | message_lib.Message], cache_seed: int
@@ -436,13 +469,8 @@ class LanguageModel(component.Component):
       result = self.sample(
           [prompt], sampling_options=sampling_options, cache_seed=cache_seed
       )[0]
-      response = result.samples[0].response
-      logprobs = result.samples[0].logprobs
-      response.set('score', result.samples[0].score)
-      response.metadata.logprobs = logprobs
-      response.metadata.usage = result.usage
       elapse = time.time() - request_start
+      response = result.samples[0].response
       self._debug(prompt, response, call_counter, result.usage, elapse)
       return response
@@ -494,7 +522,9 @@ class LanguageModel(component.Component):
       title_suffix = console.colored(f' ({usage.prompt_tokens} tokens)', 'red')
     console.write(
-        prompt,
+        # We use metadata 'formatted_text' for scenarios where the prompt text
+        # is formatted by the LM.
+        prompt.get('formatted_text', prompt.text),
         title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
         color='green',
     )

{langfun-0.0.2.dev20240425 → langfun-0.0.2.dev20240428}/langfun/core/language_model_test.py RENAMED Viewed

@@ -111,11 +111,35 @@ class LanguageModelTest(unittest.TestCase):
         lm.sample(prompts=['foo', 'bar']),
         [
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('foo', score=-1.0)],
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo',
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('bar', score=-1.0)],
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar',
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
         ],
@@ -128,41 +152,119 @@ class LanguageModelTest(unittest.TestCase):
         ),
         [
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('foo' * 2, score=0.5)],
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo' * 2,
+                            score=0.5,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.5,
+                        logprobs=None,
+                    ),
+                ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('bar' * 2, score=0.5)],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar' * 2,
+                            score=0.5,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.5,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                ),
             ),
-        ],
+        ]
     )
     # Test override individual flags within sampling_options.
     self.assertEqual(
         lm.sample(prompts=['foo', 'bar'], temperature=1.0),
         [
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('foo', score=1.0)],
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    ),
+                ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('bar', score=1.0)],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                ),
             ),
-        ],
+        ]
     )
     self.assertEqual(
         lm.sample(prompts=['foo', 'bar'], top_k=2, temperature=0.7),
         [
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('foo' * 2, score=0.7)],
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo' * 2,
+                            score=0.7,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.7,
+                        logprobs=None,
+                    ),
+                ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('bar' * 2, score=0.7)],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar' * 2,
+                            score=0.7,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.7,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                ),
             ),
-        ],
+        ]
     )
   def test_call(self):
@@ -189,7 +291,16 @@ class LanguageModelTest(unittest.TestCase):
             lm_lib.LMSamplingResult(
                 [
                     lm_lib.LMSample(
-                        message_lib.AIMessage('foo', cache_seed=0), score=-1.0
+                        message_lib.AIMessage(
+                            'foo',
+                            cache_seed=0,
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
                     )
                 ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
@@ -197,7 +308,16 @@ class LanguageModelTest(unittest.TestCase):
             lm_lib.LMSamplingResult(
                 [
                     lm_lib.LMSample(
-                        message_lib.AIMessage('bar', cache_seed=0), score=-1.0
+                        message_lib.AIMessage(
+                            'bar',
+                            cache_seed=0,
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
                     )
                 ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
@@ -225,7 +345,16 @@ class LanguageModelTest(unittest.TestCase):
             lm_lib.LMSamplingResult(
                 [
                     lm_lib.LMSample(
-                        message_lib.AIMessage('foo', cache_seed=0), score=1.0
+                        message_lib.AIMessage(
+                            'foo',
+                            cache_seed=0,
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
                     )
                 ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),
@@ -233,7 +362,16 @@ class LanguageModelTest(unittest.TestCase):
             lm_lib.LMSamplingResult(
                 [
                     lm_lib.LMSample(
-                        message_lib.AIMessage('baz', cache_seed=0), score=1.0
+                        message_lib.AIMessage(
+                            'baz',
+                            cache_seed=0,
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
                     )
                 ],
                 usage=lm_lib.LMSamplingUsage(100, 100, 200),

{langfun-0.0.2.dev20240425 → langfun-0.0.2.dev20240428}/langfun/core/llms/fake_test.py RENAMED Viewed

@@ -28,7 +28,19 @@ class EchoTest(unittest.TestCase):
         lm.sample(['hi']),
         [
             lf.LMSamplingResult(
-                [lf.LMSample('hi', 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            'hi',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(2, 2, 4),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 lf.LMSamplingUsage(2, 2, 4))
         ]
     )
@@ -60,7 +72,19 @@ class StaticResponseTest(unittest.TestCase):
         lm.sample(['hi']),
         [
             lf.LMSamplingResult(
-                [lf.LMSample(canned_response, 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            canned_response,
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(2, 38, 40),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lf.LMSamplingUsage(2, 38, 40)
             )
         ],
@@ -69,7 +93,19 @@ class StaticResponseTest(unittest.TestCase):
         lm.sample(['Tell me a joke.']),
         [
             lf.LMSamplingResult(
-                [lf.LMSample(canned_response, 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            canned_response,
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(15, 38, 53),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lf.LMSamplingUsage(15, 38, 53)
             )
         ],
@@ -101,11 +137,35 @@ class StaticMappingTest(unittest.TestCase):
         lm.sample(['Hi', 'How are you?']),
         [
             lf.LMSamplingResult(
-                [lf.LMSample('Hello', 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            'Hello',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(2, 5, 7),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lf.LMSamplingUsage(2, 5, 7)
             ),
             lf.LMSamplingResult(
-                [lf.LMSample('I am fine, how about you?', 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            'I am fine, how about you?',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(12, 25, 37),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lf.LMSamplingUsage(12, 25, 37)
             )
         ]
@@ -126,11 +186,35 @@ class StaticSequenceTest(unittest.TestCase):
         lm.sample(['Hi', 'How are you?']),
         [
             lf.LMSamplingResult(
-                [lf.LMSample('Hello', 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            'Hello',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(2, 5, 7),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lf.LMSamplingUsage(2, 5, 7)
             ),
             lf.LMSamplingResult(
-                [lf.LMSample('I am fine, how about you?', 1.0)],
+                [
+                    lf.LMSample(
+                        lf.AIMessage(
+                            'I am fine, how about you?',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lf.LMSamplingUsage(12, 25, 37),
+                            tags=[lf.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
                 usage=lf.LMSamplingUsage(12, 25, 37)
             )
         ]

langfun 0.0.2.dev20240425__tar.gz → 0.0.2.dev20240428__tar.gz

langfun 0.0.2.dev20240425tar.gz → 0.0.2.dev20240428tar.gz