PyPI - langfun - Versions diffs - 0.1.2.dev202502110804__py3-none-any.whl → 0.1.2.dev202502120804__py3-none-any.whl - Mend

langfun 0.1.2.dev202502110804py3-none-any.whl → 0.1.2.dev202502120804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

langfun/core/__init__.py +6 -2
langfun/core/language_model.py +365 -22
langfun/core/language_model_test.py +123 -35
langfun/core/llms/__init__.py +50 -57
langfun/core/llms/anthropic.py +434 -163
langfun/core/llms/anthropic_test.py +20 -1
langfun/core/llms/deepseek.py +90 -51
langfun/core/llms/deepseek_test.py +15 -16
langfun/core/llms/fake.py +6 -0
langfun/core/llms/gemini.py +480 -390
langfun/core/llms/gemini_test.py +27 -7
langfun/core/llms/google_genai.py +80 -50
langfun/core/llms/google_genai_test.py +11 -4
langfun/core/llms/groq.py +268 -167
langfun/core/llms/groq_test.py +9 -3
langfun/core/llms/openai.py +839 -328
langfun/core/llms/openai_compatible.py +3 -18
langfun/core/llms/openai_compatible_test.py +20 -5
langfun/core/llms/openai_test.py +14 -4
langfun/core/llms/rest.py +11 -6
langfun/core/llms/vertexai.py +238 -240
langfun/core/llms/vertexai_test.py +35 -8
{langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/RECORD +27 -27
{langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202502110804.dist-info → langfun-0.1.2.dev202502120804.dist-info}/top_level.txt +0 -0

langfun/core/llms/groq.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 The Langfun Authors
+# Copyright 2025 The Langfun Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,84 +13,249 @@
 # limitations under the License.
 """Language models from Groq."""
+import datetime
+import functools
 import os
-from typing import Annotated, Any
+from typing import Annotated, Any, Final
 import langfun.core as lf
 from langfun.core.llms import openai_compatible
 import pyglove as pg
-SUPPORTED_MODELS_AND_SETTINGS = {
-    # Refer https://console.groq.com/docs/models
-    # Price in US dollars at https://groq.com/pricing/ as of 2024-10-10.
-    'llama-3.2-3b-preview': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=64,
-        cost_per_1k_input_tokens=0.00006,
-        cost_per_1k_output_tokens=0.00006,
+class GroqModelInfo(lf.ModelInfo):
+  """Groq model info."""
+  LINKS = dict(
+      models='https://console.groq.com/docs/models',
+      pricing='https://groq.com/pricing/',
+      rate_limits='https://console.groq.com/docs/rate-limits',
+      error_codes='https://console.groq.com/docs/errors',
+  )
+  provider: Final[str] = 'Groq'  # pylint: disable=invalid-name
+SUPPORTED_MODELS = [
+    #
+    # Llama models.
+    #
+    GroqModelInfo(
+        model_id='llama-3.3-70b-versatile',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Llama 3.3 70B model on Groq (Production)',
+        url='https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct',
+        release_date=datetime.datetime(2024, 12, 6),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=8_192,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.59,
+            cost_per_1m_output_tokens=0.79,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=1_000,
+            max_tokens_per_minute=120_000,
+        ),
     ),
-    'llama-3.2-1b-preview': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=64,
-        cost_per_1k_input_tokens=0.00004,
-        cost_per_1k_output_tokens=0.00004,
+    GroqModelInfo(
+        model_id='llama-3.3-70b-specdec',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Llama 3.3 70B model on Groq (Production)',
+        url='https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct',
+        release_date=datetime.datetime(2024, 12, 6),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=8_192,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.59,
+            cost_per_1m_output_tokens=0.99,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=30_000,
+        ),
     ),
-    'llama-3.1-70b-versatile': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=16,
-        cost_per_1k_input_tokens=0.00059,
-        cost_per_1k_output_tokens=0.00079,
+    GroqModelInfo(
+        model_id='llama-3.2-1b-preview',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Llama 3.2 1B model on Groq (Preview)',
+        url='https://huggingface.co/meta-llama/Llama-3.2-1B',
+        release_date=datetime.datetime(2024, 12, 6),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.04,
+            cost_per_1m_output_tokens=0.04,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=30_000,
+        ),
     ),
-    'llama-3.1-8b-instant': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=32,
-        cost_per_1k_input_tokens=0.00005,
-        cost_per_1k_output_tokens=0.00008,
+    GroqModelInfo(
+        model_id='llama-3.2-3b-preview',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Llama 3.2 3B model on Groq (Preview)',
+        url='https://huggingface.co/meta-llama/Llama-3.2-3B',
+        release_date=datetime.datetime(2024, 12, 6),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.06,
+            cost_per_1m_output_tokens=0.06,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=30_000,
+        ),
     ),
-    'llama3-70b-8192': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=16,
-        cost_per_1k_input_tokens=0.00059,
-        cost_per_1k_output_tokens=0.00079,
+    GroqModelInfo(
+        model_id='llama-3.2-11b-vision-preview',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Llama 3.2 11B vision model on Groq (Preview)',
+        url='https://huggingface.co/meta-llama/Llama-3.2-11B-Vision',
+        release_date=datetime.datetime(2024, 12, 6),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.18,
+            cost_per_1m_output_tokens=0.18,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=30_000,
+        ),
     ),
-    'llama3-8b-8192': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=32,
-        cost_per_1k_input_tokens=0.00005,
-        cost_per_1k_output_tokens=0.00008,
+    GroqModelInfo(
+        model_id='llama-3.2-90b-vision-preview',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Llama 3.2 90B vision model on Groq (Preview)',
+        url='https://huggingface.co/meta-llama/Llama-3.2-90B-Vision',
+        release_date=datetime.datetime(2024, 12, 6),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.9,
+            cost_per_1m_output_tokens=0.9,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=30_000,
+        ),
     ),
-    'llama2-70b-4096': pg.Dict(
-        max_tokens=4096,
-        max_concurrency=16,
+    #
+    # DeepSeek models
+    #
+    GroqModelInfo(
+        model_id='deepseek-r1-distill-llama-70b',
+        in_service=True,
+        model_type='thinking',
+        description='DeepSeek R1 distilled from Llama 70B (Preview)',
+        url='https://console.groq.com/docs/model/deepseek-r1-distill-llama-70b',
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=16_384,
+        ),
+        # TODO(daiyip): Pricing needs to be computed based on the number of
+        # input/output tokens.
+        pricing=None,
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=1_000,
+            max_tokens_per_minute=120_000,
+        ),
     ),
-    'mixtral-8x7b-32768': pg.Dict(
-        max_tokens=32768,
-        max_concurrency=16,
-        cost_per_1k_input_tokens=0.00024,
-        cost_per_1k_output_tokens=0.00024,
+    GroqModelInfo(
+        model_id='deepseek-r1-distill-llama-70b-specdec',
+        in_service=True,
+        model_type='thinking',
+        description='DeepSeek R1 distilled from Llama 70B (Preview)',
+        url='https://console.groq.com/docs/model/deepseek-r1-distill-llama-70b',
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=128_000,
+            max_output_tokens=16_384,
+        ),
+        # TODO(daiyip): Pricing needs to be computed based on the number of
+        # input/output tokens.
+        pricing=None,
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=60_000,
+        ),
     ),
-    'gemma2-9b-it': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=32,
-        cost_per_1k_input_tokens=0.0002,
-        cost_per_1k_output_tokens=0.0002,
+    #
+    # Gemma models.
+    #
+    GroqModelInfo(
+        model_id='gemma2-9b-it',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Google Gemma 2 9B model on Groq.',
+        url='https://huggingface.co/google/gemma-2-9b-it',
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=8_192,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.2,
+            cost_per_1m_output_tokens=0.2,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=200,
+            max_tokens_per_minute=30_000,
+        ),
     ),
-    'gemma-7b-it': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=32,
-        cost_per_1k_input_tokens=0.00007,
-        cost_per_1k_output_tokens=0.00007,
+    #
+    # Mixtral models.
+    #
+    GroqModelInfo(
+        model_id='mixtral-8x7b-32768',
+        in_service=True,
+        model_type='instruction-tuned',
+        description='Mixtral 8x7B model on Groq (Production)',
+        url='https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1',
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=32_768,
+            max_output_tokens=None,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_input_tokens=0.24,
+            cost_per_1m_output_tokens=0.24,
+        ),
+        rate_limits=lf.ModelInfo.RateLimits(
+            # Developer tier.
+            max_requests_per_minute=100,
+            max_tokens_per_minute=25_000,
+        ),
     ),
-    'whisper-large-v3': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=16,
-    ),
-    'whisper-large-v3-turbo': pg.Dict(
-        max_tokens=8192,
-        max_concurrency=16,
-    )
-}
+]
+_SUPPORTED_MODELS_BY_ID = {m.model_id: m for m in SUPPORTED_MODELS}
 @lf.use_init_args(['model'])
@@ -102,7 +267,7 @@ class Groq(openai_compatible.OpenAICompatible):
   model: pg.typing.Annotated[
       pg.typing.Enum(
-          pg.MISSING_VALUE, list(SUPPORTED_MODELS_AND_SETTINGS.keys())
+          pg.MISSING_VALUE, [m.model_id for m in SUPPORTED_MODELS]
       ),
       'The name of the model to use.',
   ]
@@ -117,6 +282,10 @@ class Groq(openai_compatible.OpenAICompatible):
   api_endpoint: str = 'https://api.groq.com/openai/v1/chat/completions'
+  @functools.cached_property
+  def model_info(self) -> lf.ModelInfo:
+    return _SUPPORTED_MODELS_BY_ID[self.model]
   @property
   def headers(self) -> dict[str, Any]:
     api_key = self.api_key or os.environ.get('GROQ_API_KEY', None)
@@ -131,34 +300,6 @@ class Groq(openai_compatible.OpenAICompatible):
     })
     return headers
-  @property
-  def model_id(self) -> str:
-    """Returns a string to identify the model."""
-    return self.model
-  @property
-  def max_concurrency(self) -> int:
-    return SUPPORTED_MODELS_AND_SETTINGS[self.model].max_concurrency
-  def estimate_cost(
-      self,
-      num_input_tokens: int,
-      num_output_tokens: int
-  ) -> float | None:
-    """Estimate the cost based on usage."""
-    cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
-        'cost_per_1k_input_tokens', None
-    )
-    cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
-        'cost_per_1k_output_tokens', None
-    )
-    if cost_per_1k_input_tokens is None or cost_per_1k_output_tokens is None:
-      return None
-    return (
-        cost_per_1k_input_tokens * num_input_tokens
-        + cost_per_1k_output_tokens * num_output_tokens
-    ) / 1000
   def _request_args(self, options: lf.LMSamplingOptions) -> dict[str, Any]:
     """Returns a dict as request arguments."""
     # `logprobs` and `top_logprobs` flags are not supported on Groq yet.
@@ -168,109 +309,69 @@ class Groq(openai_compatible.OpenAICompatible):
     return args
-class GroqLlama3_2_3B(Groq):  # pylint: disable=invalid-name
-  """Llama3.2-3B with 8K context window.
+class GroqLlama33_70B_Versatile(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-3B with 128K context window."""
+  model = 'llama-3.3-70b-versatile'
-  See: https://huggingface.co/meta-llama/Llama-3.2-3B
-  """
-  model = 'llama-3.2-3b-preview'
+class GroqLlama33_70B_SpecDec(Groq):  # pylint: disable=invalid-name
+  """Llama3.3-70B with 8K context window."""
+  model = 'llama-3.3-70b-specdec'
-class GroqLlama3_2_1B(Groq):  # pylint: disable=invalid-name
-  """Llama3.2-1B with 8K context window.
+class GroqLlama32_1B(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-1B."""
+  model = 'llama-3.2-1b-preview'
-  See: https://huggingface.co/meta-llama/Llama-3.2-1B
-  """
+class GroqLlama32_3B(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-3B."""
   model = 'llama-3.2-3b-preview'
-class GroqLlama3_8B(Groq):  # pylint: disable=invalid-name
-  """Llama3-8B with 8K context window.
-  See: https://huggingface.co/meta-llama/Meta-Llama-3-8B
-  """
-  model = 'llama3-8b-8192'
+class GroqLlama32_11B_Vision(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-11B vision."""
+  model = 'llama-3.2-11b-vision-preview'
-class GroqLlama3_1_70B(Groq):  # pylint: disable=invalid-name
-  """Llama3.1-70B with 8K context window.
-  See: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md   # pylint: disable=line-too-long
-  """
-  model = 'llama-3.1-70b-versatile'
+class GroqLlama32_90B_Vision(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-90B vision."""
+  model = 'llama-3.2-90b-vision-preview'
-class GroqLlama3_1_8B(Groq):  # pylint: disable=invalid-name
-  """Llama3.1-8B with 8K context window.
+class GroqDeepSeekR1_DistillLlama_70B(Groq):  # pylint: disable=invalid-name
+  """DeepSeek R1 distilled from Llama 70B."""
+  model = 'deepseek-r1-distill-llama-70b'
-  See: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md   # pylint: disable=line-too-long
-  """
-  model = 'llama-3.1-8b-instant'
-class GroqLlama3_70B(Groq):  # pylint: disable=invalid-name
-  """Llama3-70B with 8K context window.
-  See: https://huggingface.co/meta-llama/Meta-Llama-3-70B
-  """
-  model = 'llama3-70b-8192'
-class GroqLlama2_70B(Groq):  # pylint: disable=invalid-name
-  """Llama2-70B with 4K context window.
-  See: https://huggingface.co/meta-llama/Llama-2-70b
-  """
-  model = 'llama2-70b-4096'
+class GroqDeepSeekR1_DistillLlama_70B_SpecDec(Groq):  # pylint: disable=invalid-name
+  """DeepSeek R1 distilled from Llama 70B (SpecDec)."""
+  model = 'deepseek-r1-distill-llama-70b-specdec'
 class GroqMistral_8x7B(Groq):  # pylint: disable=invalid-name
-  """Mixtral 8x7B with 32K context window.
-  See: https://huggingface.co/meta-llama/Llama-2-70b
-  """
+  """Mixtral 8x7B."""
   model = 'mixtral-8x7b-32768'
 class GroqGemma2_9B_IT(Groq):  # pylint: disable=invalid-name
-  """Gemma2 9B with 8K context window.
-  See: https://huggingface.co/google/gemma-2-9b-it
-  """
+  """Gemma2 9B."""
   model = 'gemma2-9b-it'
-class GroqGemma_7B_IT(Groq):  # pylint: disable=invalid-name
-  """Gemma 7B with 8K context window.
-  See: https://huggingface.co/google/gemma-1.1-7b-it
-  """
-  model = 'gemma-7b-it'
-class GroqWhisper_Large_v3(Groq):  # pylint: disable=invalid-name
-  """Whisper Large V3 with 8K context window.
-  See: https://huggingface.co/openai/whisper-large-v3
-  """
+#
+# Register Groq models so they can be retrieved with LanguageModel.get().
+#
-  model = 'whisper-large-v3'
+def _groq_model(model: str, *args, **kwargs):
+  model = model.removeprefix('groq://')
+  return Groq(model, *args, **kwargs)
-class GroqWhisper_Large_v3Turbo(Groq):  # pylint: disable=invalid-name
-  """Whisper Large V3 Turbo with 8K context window.
-  See: https://huggingface.co/openai/whisper-large-v3-turbo
-  """
+def _register_groq_models():
+  """Registers Groq models."""
+  for m in SUPPORTED_MODELS:
+    lf.LanguageModel.register('groq://' + m.model_id, _groq_model)
-  model = 'whisper-large-v3-turbo'
+_register_groq_models()

langfun/core/llms/groq_test.py CHANGED Viewed

@@ -17,12 +17,13 @@ import langfun.core as lf
 from langfun.core.llms import groq
-class AuthropicTest(unittest.TestCase):
+class GroqTest(unittest.TestCase):
   def test_basics(self):
     self.assertEqual(groq.GroqMistral_8x7B().model_id, 'mixtral-8x7b-32768')
-    self.assertEqual(groq.GroqMistral_8x7B().max_concurrency, 16)
-    self.assertEqual(groq.GroqMistral_8x7B().estimate_cost(100, 100), 4.8e-5)
+    self.assertEqual(
+        groq.GroqMistral_8x7B().resource_id, 'groq://mixtral-8x7b-32768'
+    )
   def test_request_args(self):
     args = groq.GroqMistral_8x7B()._request_args(
@@ -59,6 +60,11 @@ class AuthropicTest(unittest.TestCase):
     )
     del os.environ['GROQ_API_KEY']
+  def test_lm_get(self):
+    self.assertIsInstance(
+        lf.LanguageModel.get('groq://gemma2-9b-it'),
+        groq.Groq,
+    )
 if __name__ == '__main__':
   unittest.main()

langfun 0.1.2.dev202502110804__py3-none-any.whl → 0.1.2.dev202502120804__py3-none-any.whl

langfun 0.1.2.dev202502110804py3-none-any.whl → 0.1.2.dev202502120804py3-none-any.whl