PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

langfun/__init__.py +20 -2
langfun/core/__init__.py +16 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -21
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +63 -2
langfun/core/component_test.py +53 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +16 -1
langfun/core/eval/base.py +622 -174
langfun/core/eval/base_test.py +200 -54
langfun/core/eval/matching.py +63 -76
langfun/core/eval/matching_test.py +17 -8
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +26 -26
langfun/core/eval/scoring_test.py +19 -2
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +4 -17
langfun/core/langfunc_test.py +22 -6
langfun/core/language_model.py +577 -39
langfun/core/language_model_test.py +470 -56
langfun/core/llms/__init__.py +87 -16
langfun/core/llms/anthropic.py +312 -87
langfun/core/llms/anthropic_test.py +71 -3
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +53 -2
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +11 -7
langfun/core/llms/fake_test.py +14 -0
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -202
langfun/core/llms/groq.py +160 -144
langfun/core/llms/groq_test.py +31 -137
langfun/core/llms/llama_cpp.py +15 -42
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +395 -203
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +30 -395
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -26
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +12 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +7 -6
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +60 -27
langfun/core/structured/function_generation_test.py +72 -2
langfun/core/structured/mapping.py +97 -47
langfun/core/structured/mapping_test.py +90 -2
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +53 -9
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
langfun/core/structured/schema.py +204 -97
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_test.py +130 -29
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +115 -1
langfun/core/template_test.py +71 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +10 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -238
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/llms/google_genai_test.py CHANGED Viewed

@@ -11,221 +11,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tests for Gemini models."""
+"""Tests for Google GenAI models."""
 import os
 import unittest
-from unittest import mock
-from google import generativeai as genai
-import langfun.core as lf
-from langfun.core import modalities as lf_modalities
 from langfun.core.llms import google_genai
-import pyglove as pg
-example_image = (
-    b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x18\x00\x00\x00\x18\x04'
-    b'\x03\x00\x00\x00\x12Y \xcb\x00\x00\x00\x18PLTE\x00\x00'
-    b'\x00fff_chaag_cg_ch^ci_ciC\xedb\x94\x00\x00\x00\x08tRNS'
-    b'\x00\n\x9f*\xd4\xff_\xf4\xe4\x8b\xf3a\x00\x00\x00>IDATx'
-    b'\x01c \x05\x08)"\xd8\xcc\xae!\x06pNz\x88k\x19\\Q\xa8"\x10'
-    b'\xc1\x14\x95\x01%\xc1\n\xa143Ta\xa8"D-\x84\x03QM\x98\xc3'
-    b'\x1a\x1a\x1a@5\x0e\x04\xa0q\x88\x05\x00\x07\xf8\x18\xf9'
-    b'\xdao\xd0|\x00\x00\x00\x00IEND\xaeB`\x82'
-)
-def mock_get_model(model_name, *args, **kwargs):
-  del args, kwargs
-  if 'gemini' in model_name:
-    method = 'generateContent'
-  elif 'chat' in model_name:
-    method = 'generateMessage'
-  else:
-    method = 'generateText'
-  return pg.Dict(supported_generation_methods=[method])
-def mock_generate_text(*, model, prompt, **kwargs):
-  return pg.Dict(
-      candidates=[pg.Dict(output=f'{prompt} to {model} with {kwargs}')]
-  )
-def mock_chat(*, model, messages, **kwargs):
-  return pg.Dict(
-      candidates=[pg.Dict(content=f'{messages} to {model} with {kwargs}')]
-  )
-def mock_generate_content(content, generation_config, **kwargs):
-  del kwargs
-  c = generation_config
-  return genai.types.GenerateContentResponse(
-      done=True,
-      iterator=None,
-      chunks=[],
-      result=pg.Dict(
-          prompt_feedback=pg.Dict(block_reason=None),
-          candidates=[
-              pg.Dict(
-                  content=pg.Dict(
-                      parts=[
-                          pg.Dict(
-                              text=(
-                                  f'This is a response to {content[0]} with '
-                                  f'n={c.candidate_count}, '
-                                  f'temperature={c.temperature}, '
-                                  f'top_p={c.top_p}, '
-                                  f'top_k={c.top_k}, '
-                                  f'max_tokens={c.max_output_tokens}, '
-                                  f'stop={c.stop_sequences}.'
-                              )
-                          )
-                      ]
-                  ),
-              ),
-          ],
-      ),
-  )
 class GenAITest(unittest.TestCase):
-  """Tests for Google GenAI model."""
-  def test_content_from_message_text_only(self):
-    text = 'This is a beautiful day'
-    model = google_genai.GeminiPro()
-    chunks = model._content_from_message(lf.UserMessage(text))
-    self.assertEqual(chunks, [text])
-  def test_content_from_message_mm(self):
-    message = lf.UserMessage(
-        'This is an {{image}}, what is it?',
-        image=lf_modalities.Image.from_bytes(example_image),
-    )
+  """Tests for GenAI model."""
-    # Non-multimodal model.
-    with self.assertRaisesRegex(ValueError, 'Unsupported modality'):
-      google_genai.GeminiPro()._content_from_message(message)
-    model = google_genai.GeminiProVision()
-    chunks = model._content_from_message(message)
-    self.maxDiff = None
-    self.assertEqual(
-        chunks,
-        [
-            'This is an',
-            genai.types.BlobDict(mime_type='image/png', data=example_image),
-            ', what is it?',
-        ],
-    )
-  def test_response_to_result_text_only(self):
-    response = genai.types.GenerateContentResponse(
-        done=True,
-        iterator=None,
-        chunks=[],
-        result=pg.Dict(
-            prompt_feedback=pg.Dict(block_reason=None),
-            candidates=[
-                pg.Dict(
-                    content=pg.Dict(
-                        parts=[pg.Dict(text='This is response 1.')]
-                    ),
-                ),
-                pg.Dict(
-                    content=pg.Dict(parts=[pg.Dict(text='This is response 2.')])
-                ),
-            ],
-        ),
-    )
-    model = google_genai.GeminiProVision()
-    result = model._response_to_result(response)
-    self.assertEqual(
-        result,
-        lf.LMSamplingResult([
-            lf.LMSample(lf.AIMessage('This is response 1.'), score=0.0),
-            lf.LMSample(lf.AIMessage('This is response 2.'), score=0.0),
-        ]),
-    )
-  def test_model_hub(self):
-    orig_get_model = genai.get_model
-    genai.get_model = mock_get_model
-    model = google_genai._GOOGLE_GENAI_MODEL_HUB.get('gemini-pro')
-    self.assertIsNotNone(model)
-    self.assertIs(google_genai._GOOGLE_GENAI_MODEL_HUB.get('gemini-pro'), model)
-    genai.get_model = orig_get_model
-  def test_api_key_check(self):
+  def test_basics(self):
     with self.assertRaisesRegex(ValueError, 'Please specify `api_key`'):
-      _ = google_genai.GeminiPro()._api_initialized
+      _ = google_genai.GeminiPro1_5().api_endpoint
+    self.assertIsNotNone(google_genai.GeminiPro1_5(api_key='abc').api_endpoint)
-    self.assertTrue(google_genai.GeminiPro(api_key='abc')._api_initialized)
     os.environ['GOOGLE_API_KEY'] = 'abc'
-    self.assertTrue(google_genai.GeminiPro()._api_initialized)
+    lm = google_genai.GeminiPro1_5()
+    self.assertIsNotNone(lm.api_endpoint)
+    self.assertTrue(lm.model_id.startswith('GenAI('))
     del os.environ['GOOGLE_API_KEY']
-  def test_call(self):
-    with mock.patch(
-        'google.generativeai.GenerativeModel.generate_content',
-    ) as mock_generate:
-      orig_get_model = genai.get_model
-      genai.get_model = mock_get_model
-      mock_generate.side_effect = mock_generate_content
-      lm = google_genai.GeminiPro(api_key='test_key')
-      self.maxDiff = None
-      self.assertEqual(
-          lm('hello', temperature=2.0, top_k=20, max_tokens=1024).text,
-          (
-              'This is a response to hello with n=1, temperature=2.0, '
-              'top_p=None, top_k=20, max_tokens=1024, stop=None.'
-          ),
-      )
-      genai.get_model = orig_get_model
-  def test_call_with_legacy_completion_model(self):
-    orig_get_model = genai.get_model
-    genai.get_model = mock_get_model
-    orig_generate_text = genai.generate_text
-    genai.generate_text = mock_generate_text
-    lm = google_genai.Palm2(api_key='test_key')
-    self.maxDiff = None
-    self.assertEqual(
-        lm('hello', temperature=2.0, top_k=20).text,
-        (
-            "hello to models/text-bison-001 with {'temperature': 2.0, "
-            "'top_k': 20, 'top_p': None, 'candidate_count': 1, "
-            "'max_output_tokens': None, 'stop_sequences': None}"
-        ),
-    )
-    genai.get_model = orig_get_model
-    genai.generate_text = orig_generate_text
-  def test_call_with_legacy_chat_model(self):
-    orig_get_model = genai.get_model
-    genai.get_model = mock_get_model
-    orig_chat = genai.chat
-    genai.chat = mock_chat
-    lm = google_genai.Palm2_IT(api_key='test_key')
-    self.maxDiff = None
-    self.assertEqual(
-        lm('hello', temperature=2.0, top_k=20).text,
-        (
-            "hello to models/chat-bison-001 with {'temperature': 2.0, "
-            "'top_k': 20, 'top_p': None, 'candidate_count': 1}"
-        ),
-    )
-    genai.get_model = orig_get_model
-    genai.chat = orig_chat
 if __name__ == '__main__':
   unittest.main()

langfun/core/llms/groq.py CHANGED Viewed

@@ -13,43 +13,88 @@
 # limitations under the License.
 """Language models from Groq."""
-import functools
 import os
 from typing import Annotated, Any
 import langfun.core as lf
-from langfun.core import modalities as lf_modalities
+from langfun.core.llms import openai_compatible
 import pyglove as pg
-import requests
 SUPPORTED_MODELS_AND_SETTINGS = {
     # Refer https://console.groq.com/docs/models
-    'llama3-8b-8192': pg.Dict(max_tokens=8192, max_concurrency=16),
-    'llama3-70b-8192': pg.Dict(max_tokens=8192, max_concurrency=16),
-    'llama2-70b-4096': pg.Dict(max_tokens=4096, max_concurrency=16),
-    'mixtral-8x7b-32768': pg.Dict(max_tokens=32768, max_concurrency=16),
-    'gemma-7b-it': pg.Dict(max_tokens=8192, max_concurrency=16),
+    # Price in US dollars at https://groq.com/pricing/ as of 2024-10-10.
+    'llama-3.2-3b-preview': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=64,
+        cost_per_1k_input_tokens=0.00006,
+        cost_per_1k_output_tokens=0.00006,
+    ),
+    'llama-3.2-1b-preview': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=64,
+        cost_per_1k_input_tokens=0.00004,
+        cost_per_1k_output_tokens=0.00004,
+    ),
+    'llama-3.1-70b-versatile': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+        cost_per_1k_input_tokens=0.00059,
+        cost_per_1k_output_tokens=0.00079,
+    ),
+    'llama-3.1-8b-instant': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.00005,
+        cost_per_1k_output_tokens=0.00008,
+    ),
+    'llama3-70b-8192': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+        cost_per_1k_input_tokens=0.00059,
+        cost_per_1k_output_tokens=0.00079,
+    ),
+    'llama3-8b-8192': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.00005,
+        cost_per_1k_output_tokens=0.00008,
+    ),
+    'llama2-70b-4096': pg.Dict(
+        max_tokens=4096,
+        max_concurrency=16,
+    ),
+    'mixtral-8x7b-32768': pg.Dict(
+        max_tokens=32768,
+        max_concurrency=16,
+        cost_per_1k_input_tokens=0.00024,
+        cost_per_1k_output_tokens=0.00024,
+    ),
+    'gemma2-9b-it': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.0002,
+        cost_per_1k_output_tokens=0.0002,
+    ),
+    'gemma-7b-it': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.00007,
+        cost_per_1k_output_tokens=0.00007,
+    ),
+    'whisper-large-v3': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+    ),
+    'whisper-large-v3-turbo': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+    )
 }
-class GroqError(Exception):  # pylint: disable=g-bad-exception-name
-  """Base class for Groq errors."""
-class RateLimitError(GroqError):
-  """Error for rate limit reached."""
-class OverloadedError(GroqError):
-  """Groq's server is temporarily overloaded."""
-_CHAT_COMPLETE_API_ENDPOINT = 'https://api.groq.com/openai/v1/chat/completions'
 @lf.use_init_args(['model'])
-class Groq(lf.LanguageModel):
+class Groq(openai_compatible.OpenAICompatible):
   """Groq LLMs through REST APIs (OpenAI compatible).
   See https://platform.openai.com/docs/api-reference/chat
@@ -62,10 +107,6 @@ class Groq(lf.LanguageModel):
       'The name of the model to use.',
   ]
-  multimodal: Annotated[bool, 'Whether this model has multimodal support.'] = (
-      False
-  )
   api_key: Annotated[
       str | None,
       (
@@ -74,32 +115,21 @@ class Groq(lf.LanguageModel):
       ),
   ] = None
-  def _on_bound(self):
-    super()._on_bound()
-    self._api_key = None
-    self.__dict__.pop('_api_initialized', None)
-    self.__dict__.pop('_session', None)
+  api_endpoint: str = 'https://api.groq.com/openai/v1/chat/completions'
-  @functools.cached_property
-  def _api_initialized(self):
+  @property
+  def headers(self) -> dict[str, Any]:
     api_key = self.api_key or os.environ.get('GROQ_API_KEY', None)
     if not api_key:
       raise ValueError(
           'Please specify `api_key` during `__init__` or set environment '
           'variable `GROQ_API_KEY` with your Groq API key.'
       )
-    self._api_key = api_key
-    return True
-  @functools.cached_property
-  def _session(self) -> requests.Session:
-    assert self._api_initialized
-    s = requests.Session()
-    s.headers.update({
-        'Authorization': f'Bearer {self._api_key}',
-        'Content-Type': 'application/json',
+    headers = super().headers
+    headers.update({
+        'Authorization': f'Bearer {api_key}',
     })
-    return s
+    return headers
   @property
   def model_id(self) -> str:
@@ -110,109 +140,50 @@ class Groq(lf.LanguageModel):
   def max_concurrency(self) -> int:
     return SUPPORTED_MODELS_AND_SETTINGS[self.model].max_concurrency
-  def _get_request_args(self, options: lf.LMSamplingOptions) -> dict[str, Any]:
+  def estimate_cost(
+      self,
+      num_input_tokens: int,
+      num_output_tokens: int
+  ) -> float | None:
+    """Estimate the cost based on usage."""
+    cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_input_tokens', None
+    )
+    cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_output_tokens', None
+    )
+    if cost_per_1k_input_tokens is None or cost_per_1k_output_tokens is None:
+      return None
+    return (
+        cost_per_1k_input_tokens * num_input_tokens
+        + cost_per_1k_output_tokens * num_output_tokens
+    ) / 1000
+  def _request_args(self, options: lf.LMSamplingOptions) -> dict[str, Any]:
     """Returns a dict as request arguments."""
     # `logprobs` and `top_logprobs` flags are not supported on Groq yet.
-    args = dict(
-        model=self.model,
-        n=options.n,
-        stream=False,
-    )
-    if options.temperature is not None:
-      args['temperature'] = options.temperature
-    if options.max_tokens is not None:
-      args['max_tokens'] = options.max_tokens
-    if options.top_p is not None:
-      args['top_p'] = options.top_p
-    if options.stop:
-      args['stop'] = options.stop
+    args = super()._request_args(options)
+    args.pop('logprobs', None)
+    args.pop('top_logprobs', None)
     return args
-  def _content_from_message(self, prompt: lf.Message) -> list[dict[str, Any]]:
-    """Converts an message to Groq's content protocol (list of dicts)."""
-    # Refer: https://platform.openai.com/docs/api-reference/chat/create
-    content = []
-    for chunk in prompt.chunk():
-      if isinstance(chunk, str):
-        item = dict(type='text', text=chunk)
-      elif (
-          self.multimodal
-          and isinstance(chunk, lf_modalities.Image)
-          and chunk.uri
-      ):
-        # NOTE(daiyip): Groq only support image URL.
-        item = dict(type='image_url', image_url=chunk.uri)
-      else:
-        raise ValueError(f'Unsupported modality object: {chunk!r}.')
-      content.append(item)
-    return content
-  def _message_from_choice(self, choice: dict[str, Any]) -> lf.Message:
-    """Converts Groq's content protocol to message."""
-    # Refer: https://platform.openai.com/docs/api-reference/chat/create
-    content = choice['message']['content']
-    if isinstance(content, str):
-      return lf.AIMessage(content)
-    return lf.AIMessage.from_chunks(
-        [x['text'] for x in content if x['type'] == 'text']
-    )
-  def _parse_response(self, response: requests.Response) -> lf.LMSamplingResult:
-    """Parses Groq's response."""
-    # Refer: https://platform.openai.com/docs/api-reference/chat/object
-    if response.status_code == 200:
-      output = response.json()
-      samples = [
-          lf.LMSample(self._message_from_choice(choice), score=0.0)
-          for choice in output['choices']
-      ]
-      usage = output['usage']
-      return lf.LMSamplingResult(
-          samples,
-          usage=lf.LMSamplingUsage(
-              prompt_tokens=usage['prompt_tokens'],
-              completion_tokens=usage['completion_tokens'],
-              total_tokens=usage['total_tokens'],
-          ),
-      )
-    else:
-      # https://platform.openai.com/docs/guides/error-codes/api-errors
-      if response.status_code == 429:
-        error_cls = RateLimitError
-      elif response.status_code in (500, 502, 503):
-        error_cls = OverloadedError
-      else:
-        error_cls = GroqError
-      raise error_cls(f'{response.status_code}: {response.content}')
-  def _sample(self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
-    assert self._api_initialized
-    return self._parallel_execute_with_currency_control(
-        self._sample_single,
-        prompts,
-        retry_on_errors=(RateLimitError, OverloadedError),
-    )
+class GroqLlama3_2_3B(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-3B with 8K context window.
-  def _sample_single(self, prompt: lf.Message) -> lf.LMSamplingResult:
-    request = dict()
-    request.update(self._get_request_args(self.sampling_options))
-    request.update(
-        dict(
-            messages=[
-                dict(role='user', content=self._content_from_message(prompt))
-            ]
-        )
-    )
-    try:
-      response = self._session.post(
-          _CHAT_COMPLETE_API_ENDPOINT,
-          json=request,
-          timeout=self.timeout,
-      )
-      return self._parse_response(response)
-    except ConnectionError as e:
-      raise OverloadedError(str(e)) from e
+  See: https://huggingface.co/meta-llama/Llama-3.2-3B
+  """
+  model = 'llama-3.2-3b-preview'
+class GroqLlama3_2_1B(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-1B with 8K context window.
+  See: https://huggingface.co/meta-llama/Llama-3.2-1B
+  """
+  model = 'llama-3.2-3b-preview'
 class GroqLlama3_8B(Groq):  # pylint: disable=invalid-name
@@ -224,6 +195,24 @@ class GroqLlama3_8B(Groq):  # pylint: disable=invalid-name
   model = 'llama3-8b-8192'
+class GroqLlama3_1_70B(Groq):  # pylint: disable=invalid-name
+  """Llama3.1-70B with 8K context window.
+  See: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md   # pylint: disable=line-too-long
+  """
+  model = 'llama-3.1-70b-versatile'
+class GroqLlama3_1_8B(Groq):  # pylint: disable=invalid-name
+  """Llama3.1-8B with 8K context window.
+  See: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md   # pylint: disable=line-too-long
+  """
+  model = 'llama-3.1-8b-instant'
 class GroqLlama3_70B(Groq):  # pylint: disable=invalid-name
   """Llama3-70B with 8K context window.
@@ -251,10 +240,37 @@ class GroqMistral_8x7B(Groq):  # pylint: disable=invalid-name
   model = 'mixtral-8x7b-32768'
-class GroqGemma7B_IT(Groq):  # pylint: disable=invalid-name
+class GroqGemma2_9B_IT(Groq):  # pylint: disable=invalid-name
+  """Gemma2 9B with 8K context window.
+  See: https://huggingface.co/google/gemma-2-9b-it
+  """
+  model = 'gemma2-9b-it'
+class GroqGemma_7B_IT(Groq):  # pylint: disable=invalid-name
   """Gemma 7B with 8K context window.
   See: https://huggingface.co/google/gemma-1.1-7b-it
   """
   model = 'gemma-7b-it'
+class GroqWhisper_Large_v3(Groq):  # pylint: disable=invalid-name
+  """Whisper Large V3 with 8K context window.
+  See: https://huggingface.co/openai/whisper-large-v3
+  """
+  model = 'whisper-large-v3'
+class GroqWhisper_Large_v3Turbo(Groq):  # pylint: disable=invalid-name
+  """Whisper Large V3 Turbo with 8K context window.
+  See: https://huggingface.co/openai/whisper-large-v3-turbo
+  """
+  model = 'whisper-large-v3-turbo'

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl