PyPI - langfun - Versions diffs - 0.0.2.dev20240319__py3-none-any.whl → 0.0.2.dev20240429__py3-none-any.whl - Mend

langfun 0.0.2.dev20240319py3-none-any.whl → 0.0.2.dev20240429py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

langfun/__init__.py +2 -0
langfun/core/__init__.py +1 -0
langfun/core/coding/python/correction.py +0 -7
langfun/core/component.py +6 -0
langfun/core/component_test.py +1 -0
langfun/core/eval/__init__.py +2 -0
langfun/core/eval/base.py +240 -37
langfun/core/eval/base_test.py +52 -18
langfun/core/eval/matching.py +26 -9
langfun/core/eval/matching_test.py +3 -4
langfun/core/eval/scoring.py +15 -6
langfun/core/eval/scoring_test.py +2 -2
langfun/core/langfunc.py +0 -5
langfun/core/langfunc_test.py +6 -4
langfun/core/language_model.py +124 -24
langfun/core/language_model_test.py +249 -26
langfun/core/llms/__init__.py +24 -5
langfun/core/llms/anthropic.py +263 -0
langfun/core/llms/anthropic_test.py +167 -0
langfun/core/llms/cache/in_memory_test.py +37 -28
langfun/core/llms/fake.py +31 -22
langfun/core/llms/fake_test.py +122 -11
langfun/core/llms/{gemini.py → google_genai.py} +117 -15
langfun/core/llms/{gemini_test.py → google_genai_test.py} +83 -15
langfun/core/llms/groq.py +260 -0
langfun/core/llms/groq_test.py +170 -0
langfun/core/llms/llama_cpp.py +3 -1
langfun/core/llms/openai.py +97 -79
langfun/core/llms/openai_test.py +285 -59
langfun/core/modalities/video.py +5 -2
langfun/core/structured/__init__.py +3 -0
langfun/core/structured/completion_test.py +2 -2
langfun/core/structured/function_generation.py +245 -0
langfun/core/structured/function_generation_test.py +329 -0
langfun/core/structured/mapping.py +59 -3
langfun/core/structured/mapping_test.py +17 -0
langfun/core/structured/parsing.py +2 -1
langfun/core/structured/parsing_test.py +18 -13
langfun/core/structured/prompting.py +27 -6
langfun/core/structured/prompting_test.py +79 -12
langfun/core/structured/schema.py +25 -22
langfun/core/structured/schema_generation.py +2 -3
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +42 -27
langfun/core/template.py +125 -10
langfun/core/template_test.py +75 -0
langfun/core/templates/selfplay_test.py +6 -2
{langfun-0.0.2.dev20240319.dist-info → langfun-0.0.2.dev20240429.dist-info}/METADATA +3 -2
{langfun-0.0.2.dev20240319.dist-info → langfun-0.0.2.dev20240429.dist-info}/RECORD +52 -46
{langfun-0.0.2.dev20240319.dist-info → langfun-0.0.2.dev20240429.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240319.dist-info → langfun-0.0.2.dev20240429.dist-info}/WHEEL +0 -0
{langfun-0.0.2.dev20240319.dist-info → langfun-0.0.2.dev20240429.dist-info}/top_level.txt +0 -0

langfun/core/language_model.py CHANGED Viewed

@@ -24,6 +24,9 @@ from langfun.core import console
 from langfun.core import message as message_lib
 import pyglove as pg
+TOKENS_PER_REQUEST = 250  # Estimated num tokens for a single request
+DEFAULT_MAX_CONCURRENCY = 1  # Use this as max concurrency if no RPM or TPM data
 class LMSample(pg.Object):
   """Response candidate."""
@@ -47,6 +50,14 @@ class LMSample(pg.Object):
   ] = None
+class LMSamplingUsage(pg.Object):
+  """Usage information per completion."""
+  prompt_tokens: int
+  completion_tokens: int
+  total_tokens: int
 class LMSamplingResult(pg.Object):
   """Language model response."""
@@ -58,19 +69,34 @@ class LMSamplingResult(pg.Object):
       ),
   ] = []
+  usage: Annotated[
+      LMSamplingUsage | None,
+      'Usage information. Currently only OpenAI models are supported.',
+  ] = None
 class LMSamplingOptions(component.Component):
   """Language model sampling options."""
   temperature: Annotated[
-      float,
+      float | None,
       (
           'Model temperature, which is usually between 0 and 1.0. '
-          'OpenAI models have temperature range from 0.0 to 2.0.'
+          'OpenAI models have temperature range from 0.0 to 2.0. '
+          'If None (default), honor the model\'s default behavior. '
       )
-  ] = 0.0
-  max_tokens: Annotated[int, 'Per example max tokens to generate.'] = 1024
+  ] = None
+  max_tokens: Annotated[
+      int | None,
+      (
+          'Per example max tokens to generate. '
+          'If None, use the model default.'
+      )
+  ] = None
   n: Annotated[int | None, 'Max number of samples to return.'] = 1
   top_k: Annotated[
       int | None,
       (
@@ -78,6 +104,7 @@ class LMSamplingOptions(component.Component):
           'Not applicable to OpenAI models.'
       )
   ] = 40
   top_p: Annotated[
       float | None,
       (
@@ -86,6 +113,7 @@ class LMSamplingOptions(component.Component):
           '`top_p` but not both.'
       ),
   ] = None
   stop: Annotated[
       list[str] | None,
       (
@@ -95,9 +123,11 @@ class LMSamplingOptions(component.Component):
           '`Model:` is reached.'
       ),
   ] = None
   random_seed: Annotated[
       int | None, 'A fixed random seed used during model inference.'
   ] = None
   logprobs: Annotated[
       bool,
       (
@@ -106,6 +136,7 @@ class LMSamplingOptions(component.Component):
           'in the content of message.'
       ),
   ] = False
   top_logprobs: Annotated[
       int | None,
       (
@@ -315,9 +346,42 @@ class LanguageModel(component.Component):
     with component.context(override_attrs=True, **kwargs):
       if self.cache is None:
-        return self._sample(prompts)
+        results = self._sample(prompts)
       else:
-        return self._sample_with_cache_lookup(prompts, cache_seed)
+        results = self._sample_with_cache_lookup(prompts, cache_seed)
+      for prompt, result in zip(prompts, results):
+        # Tag LM input.
+        prompt.tag(message_lib.Message.TAG_LM_INPUT)
+        for sample in result.samples:
+          # Update metadata for response message.
+          response = sample.response
+          response.metadata.score = sample.score
+          response.metadata.logprobs = sample.logprobs
+          # NOTE(daiyip): Current usage is computed at per-result level,
+          # which is accurate when n=1. For n > 1, we average the usage across
+          # multiple samples.
+          usage = result.usage
+          if len(result.samples) == 1 or usage is None:
+            response.metadata.usage = usage
+          else:
+            n = len(result.samples)
+            response.metadata.usage = LMSamplingUsage(
+                prompt_tokens=usage.prompt_tokens // n,
+                completion_tokens=usage.completion_tokens // n,
+                total_tokens=usage.total_tokens // n,
+            )
+          # Track the prompt for corresponding response.
+          response.source = prompt
+          # Tag LM response.
+          response.tag(message_lib.Message.TAG_LM_RESPONSE)
+      return results
   def _sample_with_cache_lookup(
       self, prompts: list[str | message_lib.Message], cache_seed: int
@@ -405,12 +469,9 @@ class LanguageModel(component.Component):
       result = self.sample(
           [prompt], sampling_options=sampling_options, cache_seed=cache_seed
       )[0]
-      response = result.samples[0].response
-      logprobs = result.samples[0].logprobs
-      response.set('score', result.samples[0].score)
-      response.metadata.logprobs = logprobs
       elapse = time.time() - request_start
-      self._debug(prompt, response, call_counter, elapse)
+      response = result.samples[0].response
+      self._debug(prompt, response, call_counter, result.usage, elapse)
       return response
   def _debug(
@@ -418,35 +479,53 @@ class LanguageModel(component.Component):
       prompt: message_lib.Message,
       response: message_lib.Message,
       call_counter: int,
+      usage: LMSamplingUsage | None,
       elapse: float,
-  ):
+  ) -> None:
     """Outputs debugging information."""
     debug = self.debug
     if isinstance(debug, bool):
       debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
     if debug & LMDebugMode.INFO:
-      self._debug_model_info(call_counter)
+      self._debug_model_info(call_counter, usage)
     if debug & LMDebugMode.PROMPT:
-      self._debug_prompt(prompt, call_counter)
+      self._debug_prompt(prompt, call_counter, usage)
     if debug & LMDebugMode.RESPONSE:
-      self._debug_response(response, call_counter, elapse)
+      self._debug_response(response, call_counter, usage, elapse)
-  def _debug_model_info(self, call_counter: int):
+  def _debug_model_info(
+      self, call_counter: int, usage: LMSamplingUsage | None) -> None:
     """Outputs debugging information about the model."""
+    title_suffix = ''
+    if usage and usage.total_tokens != 0:
+      title_suffix = console.colored(
+          f' (total {usage.total_tokens} tokens)', 'red')
     console.write(
         self.format(compact=True, use_inferred=True),
-        title=f'[{call_counter}] LM INFO:',
+        title=f'[{call_counter}] LM INFO{title_suffix}:',
         color='magenta',
     )
-  def _debug_prompt(self, prompt: message_lib.Message, call_counter: int):
+  def _debug_prompt(
+      self,
+      prompt: message_lib.Message,
+      call_counter: int,
+      usage: LMSamplingUsage | None,
+  ) -> None:
     """Outputs debugging information about the prompt."""
+    title_suffix = ''
+    if usage and usage.prompt_tokens != 0:
+      title_suffix = console.colored(f' ({usage.prompt_tokens} tokens)', 'red')
     console.write(
-        prompt,
-        title=f'\n[{call_counter}] PROMPT SENT TO LM:',
+        # We use metadata 'formatted_text' for scenarios where the prompt text
+        # is formatted by the LM.
+        prompt.get('formatted_text', prompt.text),
+        title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
         color='green',
     )
     referred_modalities = prompt.referred_modalities()
@@ -460,12 +539,22 @@ class LanguageModel(component.Component):
       )
   def _debug_response(
-      self, response: message_lib.Message, call_counter: int, elapse: float
-  ):
+      self,
+      response: message_lib.Message,
+      call_counter: int,
+      usage: LMSamplingUsage | None,
+      elapse: float
+  ) -> None:
     """Outputs debugging information about the response."""
+    title_suffix = ' ('
+    if usage and usage.completion_tokens != 0:
+      title_suffix += f'{usage.completion_tokens} tokens '
+    title_suffix += f'in {elapse:.2f} seconds)'
+    title_suffix = console.colored(title_suffix, 'red')
     console.write(
         str(response) + '\n',
-        title=f'\n[{call_counter}] LM RESPONSE (in {elapse:.2f} seconds):',
+        title=f'\n[{call_counter}] LM RESPONSE{title_suffix}:',
         color='blue',
     )
@@ -512,7 +601,7 @@ class LanguageModel(component.Component):
       debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
     if debug & LMDebugMode.INFO:
-      self._debug_model_info(call_counter)
+      self._debug_model_info(call_counter, None)
     if debug & LMDebugMode.PROMPT:
       console.write(
@@ -548,3 +637,14 @@ class LanguageModel(component.Component):
             f'score: {r.score}',
             color='blue',
         )
+  def rate_to_max_concurrency(
+      self, requests_per_min: float = 0, tokens_per_min: float = 0
+  ) -> int:
+    """Converts a rate to a max concurrency."""
+    if tokens_per_min > 0:
+      return max(int(tokens_per_min / TOKENS_PER_REQUEST / 60), 1)
+    elif requests_per_min > 0:
+      return max(int(requests_per_min / 60), 1)  # Max concurrency can't be zero
+    else:
+      return DEFAULT_MAX_CONCURRENCY  # Default of 1

langfun/core/language_model_test.py CHANGED Viewed

@@ -38,9 +38,19 @@ class MockModel(lm_lib.LanguageModel):
     def fake_sample(prompts):
       if context.attempt >= self.failures_before_attempt:
         return [
-            lm_lib.LMSamplingResult([lm_lib.LMSample(  # pylint: disable=g-complex-comprehension
-                response=prompt.text * self.sampling_options.top_k,
-                score=self.sampling_options.temperature)])
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(  # pylint: disable=g-complex-comprehension
+                        response=prompt.text * self.sampling_options.top_k,
+                        score=self.sampling_options.temperature or -1.0,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100,
+                    completion_tokens=100,
+                    total_tokens=200,
+                ),
+            )
             for prompt in prompts
         ]
       context.attempt += 1
@@ -73,13 +83,13 @@ class LMSamplingOptionsTest(unittest.TestCase):
   def test_cache_key(self):
     options = lm_lib.LMSamplingOptions()
     key1 = options.cache_key()
-    self.assertEqual(key1, (0.0, 1024, 1, 40, None, None))
+    self.assertEqual(key1, (None, None, 1, 40, None, None))
     with options.override(temperature=1.0, max_tokens=256):
       key2 = options.cache_key()
       self.assertEqual(key2, (1.0, 256, 1, 40, None, None))
       # Make sure key1 does not change upon override.
-      self.assertEqual(key1, (0.0, 1024, 1, 40, None, None))
+      self.assertEqual(key1, (None, None, 1, 40, None, None))
 class LanguageModelTest(unittest.TestCase):
@@ -100,8 +110,38 @@ class LanguageModelTest(unittest.TestCase):
     self.assertEqual(
         lm.sample(prompts=['foo', 'bar']),
         [
-            lm_lib.LMSamplingResult([lm_lib.LMSample('foo', score=0.0)]),
-            lm_lib.LMSamplingResult([lm_lib.LMSample('bar', score=0.0)]),
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo',
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar',
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
         ],
     )
     # Test override sampling_options.
@@ -112,38 +152,128 @@ class LanguageModelTest(unittest.TestCase):
         ),
         [
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('foo' * 2, score=0.5)]
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo' * 2,
+                            score=0.5,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.5,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('bar' * 2, score=0.5)]
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar' * 2,
+                            score=0.5,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.5,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                ),
             ),
-        ],
+        ]
     )
     # Test override individual flags within sampling_options.
     self.assertEqual(
         lm.sample(prompts=['foo', 'bar'], temperature=1.0),
         [
-            lm_lib.LMSamplingResult([lm_lib.LMSample('foo', score=1.0)]),
-            lm_lib.LMSamplingResult([lm_lib.LMSample('bar', score=1.0)]),
-        ],
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar',
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                ),
+            ),
+        ]
     )
     self.assertEqual(
         lm.sample(prompts=['foo', 'bar'], top_k=2, temperature=0.7),
         [
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('foo' * 2, score=0.7)]
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo' * 2,
+                            score=0.7,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.7,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
             ),
             lm_lib.LMSamplingResult(
-                [lm_lib.LMSample('bar' * 2, score=0.7)]
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar' * 2,
+                            score=0.7,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=0.7,
+                        logprobs=None,
+                    ),
+                ],
+                usage=lm_lib.LMSamplingUsage(
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                ),
             ),
-        ],
+        ]
     )
   def test_call(self):
     lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
     response = lm(prompt='foo')
     self.assertEqual(response.text, 'foo')
-    self.assertEqual(response.score, 0.0)
+    self.assertEqual(response.score, -1.0)
+    self.assertIsNone(response.logprobs)
+    self.assertEqual(response.usage, lm_lib.LMSamplingUsage(100, 100, 200))
     # Test override sampling_options.
     self.assertEqual(
@@ -158,11 +288,42 @@ class LanguageModelTest(unittest.TestCase):
     self.assertEqual(
         lm.sample(prompts=['foo', 'bar']),
         [
-            lm_lib.LMSamplingResult([lm_lib.LMSample(
-                message_lib.AIMessage('foo', cache_seed=0), score=0.0)]),
-            lm_lib.LMSamplingResult([lm_lib.LMSample(
-                message_lib.AIMessage('bar', cache_seed=0), score=0.0)]),
-        ])
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo',
+                            cache_seed=0,
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'bar',
+                            cache_seed=0,
+                            score=-1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=-1.0,
+                        logprobs=None,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
+        ],
+    )
     self.assertEqual(cache.stats.num_queries, 2)
     self.assertEqual(cache.stats.num_hits, 0)
     self.assertEqual(cache.stats.num_updates, 2)
@@ -181,10 +342,40 @@ class LanguageModelTest(unittest.TestCase):
     self.assertEqual(
         lm.sample(prompts=['foo', 'baz'], temperature=1.0),
         [
-            lm_lib.LMSamplingResult([lm_lib.LMSample(
-                message_lib.AIMessage('foo', cache_seed=0), score=1.0)]),
-            lm_lib.LMSamplingResult([lm_lib.LMSample(
-                message_lib.AIMessage('baz', cache_seed=0), score=1.0)]),
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'foo',
+                            cache_seed=0,
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
+            lm_lib.LMSamplingResult(
+                [
+                    lm_lib.LMSample(
+                        message_lib.AIMessage(
+                            'baz',
+                            cache_seed=0,
+                            score=1.0,
+                            logprobs=None,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            tags=[message_lib.Message.TAG_LM_RESPONSE],
+                        ),
+                        score=1.0,
+                        logprobs=None,
+                    )
+                ],
+                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+            ),
         ],
     )
     self.assertEqual(cache.stats.num_queries, 6)
@@ -341,6 +532,38 @@ class LanguageModelTest(unittest.TestCase):
     with self.assertRaises(NotImplementedError):
       MockModel().score('hi', ['1', '2'])
+  def test_rate_to_max_concurrency_no_rpm_no_tpm(self) -> None:
+    lm = MockModel()
+    self.assertEqual(
+        lm_lib.DEFAULT_MAX_CONCURRENCY,
+        lm.rate_to_max_concurrency(requests_per_min=0, tokens_per_min=0),
+    )
+    self.assertEqual(
+        lm_lib.DEFAULT_MAX_CONCURRENCY,
+        lm.rate_to_max_concurrency(requests_per_min=-1, tokens_per_min=-1),
+    )
+  def test_rate_to_max_concurrency_only_rpm_specified_uses_rpm(self) -> None:
+    lm = MockModel()
+    test_rpm = 1e4
+    self.assertEqual(
+        lm.rate_to_max_concurrency(requests_per_min=test_rpm),
+        int(test_rpm / 60)
+    )
+  def test_rate_to_max_concurrency_tpm_specified_uses_tpm(self) -> None:
+    lm = MockModel()
+    test_tpm = 1e7
+    self.assertEqual(
+        lm.rate_to_max_concurrency(requests_per_min=1, tokens_per_min=test_tpm),
+        int(test_tpm / lm_lib.TOKENS_PER_REQUEST / 60),
+    )
+  def test_rate_to_max_concurrency_small_rate_returns_one(self) -> None:
+    lm = MockModel()
+    self.assertEqual(lm.rate_to_max_concurrency(requests_per_min=1), 1)
+    self.assertEqual(lm.rate_to_max_concurrency(tokens_per_min=1), 1)
 if __name__ == '__main__':
   unittest.main()

langfun/core/llms/__init__.py CHANGED Viewed

@@ -25,16 +25,22 @@ from langfun.core.llms.fake import StaticResponse
 from langfun.core.llms.fake import StaticSequence
 # Gemini models.
-from langfun.core.llms.gemini import Gemini
-from langfun.core.llms.gemini import GeminiPro
-from langfun.core.llms.gemini import GeminiProVision
+from langfun.core.llms.google_genai import GenAI
+from langfun.core.llms.google_genai import GeminiPro
+from langfun.core.llms.google_genai import GeminiProVision
+from langfun.core.llms.google_genai import Palm2
+from langfun.core.llms.google_genai import Palm2_IT
 # OpenAI models.
 from langfun.core.llms.openai import OpenAI
 from langfun.core.llms.openai import Gpt4Turbo
-from langfun.core.llms.openai import Gpt4Turbo_0125
-from langfun.core.llms.openai import Gpt4TurboVision
+from langfun.core.llms.openai import Gpt4Turbo_20240409
+from langfun.core.llms.openai import Gpt4TurboPreview
+from langfun.core.llms.openai import Gpt4TurboPreview_0125
+from langfun.core.llms.openai import Gpt4TurboPreview_1106
+from langfun.core.llms.openai import Gpt4VisionPreview
+from langfun.core.llms.openai import Gpt4VisionPreview_1106
 from langfun.core.llms.openai import Gpt4
 from langfun.core.llms.openai import Gpt4_0613
 from langfun.core.llms.openai import Gpt4_32K
@@ -55,6 +61,19 @@ from langfun.core.llms.openai import Gpt3Curie
 from langfun.core.llms.openai import Gpt3Babbage
 from langfun.core.llms.openai import Gpt3Ada
+from langfun.core.llms.anthropic import Anthropic
+from langfun.core.llms.anthropic import Claude3Opus
+from langfun.core.llms.anthropic import Claude3Sonnet
+from langfun.core.llms.anthropic import Claude3Haiku
+from langfun.core.llms.groq import Groq
+from langfun.core.llms.groq import GroqLlama3_70B
+from langfun.core.llms.groq import GroqLlama3_8B
+from langfun.core.llms.groq import GroqLlama2_70B
+from langfun.core.llms.groq import GroqMistral_8x7B
+from langfun.core.llms.groq import GroqGemma7B_IT
 # LLaMA C++ models.
 from langfun.core.llms.llama_cpp import LlamaCppRemote

langfun 0.0.2.dev20240319__py3-none-any.whl → 0.0.2.dev20240429__py3-none-any.whl

langfun 0.0.2.dev20240319py3-none-any.whl → 0.0.2.dev20240429py3-none-any.whl