PyPI - langfun - Versions diffs - 0.1.2.dev202410100804__py3-none-any.whl → 0.1.2.dev202410120803__py3-none-any.whl - Mend

langfun 0.1.2.dev202410100804py3-none-any.whl → 0.1.2.dev202410120803py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

langfun/core/__init__.py +1 -0
langfun/core/eval/base_test.py +1 -0
langfun/core/langfunc_test.py +2 -2
langfun/core/language_model.py +140 -24
langfun/core/language_model_test.py +166 -36
langfun/core/llms/__init__.py +8 -1
langfun/core/llms/anthropic.py +72 -7
langfun/core/llms/cache/in_memory_test.py +3 -2
langfun/core/llms/fake_test.py +7 -0
langfun/core/llms/groq.py +154 -6
langfun/core/llms/openai.py +300 -42
langfun/core/llms/openai_test.py +35 -8
langfun/core/llms/vertexai.py +121 -16
langfun/core/logging.py +150 -43
langfun/core/logging_test.py +33 -0
langfun/core/message.py +249 -70
langfun/core/message_test.py +70 -45
langfun/core/modalities/audio.py +1 -1
langfun/core/modalities/audio_test.py +1 -1
langfun/core/modalities/image.py +1 -1
langfun/core/modalities/image_test.py +9 -3
langfun/core/modalities/mime.py +39 -3
langfun/core/modalities/mime_test.py +39 -0
langfun/core/modalities/ms_office.py +2 -5
langfun/core/modalities/ms_office_test.py +1 -1
langfun/core/modalities/pdf_test.py +1 -1
langfun/core/modalities/video.py +1 -1
langfun/core/modalities/video_test.py +2 -2
langfun/core/structured/completion_test.py +1 -0
langfun/core/structured/mapping.py +38 -0
langfun/core/structured/mapping_test.py +55 -0
langfun/core/structured/parsing_test.py +2 -1
langfun/core/structured/prompting_test.py +1 -0
langfun/core/structured/schema.py +34 -0
langfun/core/template.py +110 -1
langfun/core/template_test.py +37 -0
langfun/core/templates/selfplay_test.py +4 -2
{langfun-0.1.2.dev202410100804.dist-info → langfun-0.1.2.dev202410120803.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202410100804.dist-info → langfun-0.1.2.dev202410120803.dist-info}/RECORD +42 -42
{langfun-0.1.2.dev202410100804.dist-info → langfun-0.1.2.dev202410120803.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202410100804.dist-info → langfun-0.1.2.dev202410120803.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202410100804.dist-info → langfun-0.1.2.dev202410120803.dist-info}/top_level.txt +0 -0

langfun/core/llms/__init__.py CHANGED Viewed

@@ -95,11 +95,18 @@ from langfun.core.llms.anthropic import Claude3Sonnet
 from langfun.core.llms.anthropic import Claude3Haiku
 from langfun.core.llms.groq import Groq
+from langfun.core.llms.groq import GroqLlama3_2_3B
+from langfun.core.llms.groq import GroqLlama3_2_1B
+from langfun.core.llms.groq import GroqLlama3_1_70B
+from langfun.core.llms.groq import GroqLlama3_1_8B
 from langfun.core.llms.groq import GroqLlama3_70B
 from langfun.core.llms.groq import GroqLlama3_8B
 from langfun.core.llms.groq import GroqLlama2_70B
 from langfun.core.llms.groq import GroqMistral_8x7B
-from langfun.core.llms.groq import GroqGemma7B_IT
+from langfun.core.llms.groq import GroqGemma2_9B_IT
+from langfun.core.llms.groq import GroqGemma_7B_IT
+from langfun.core.llms.groq import GroqWhisper_Large_v3
+from langfun.core.llms.groq import GroqWhisper_Large_v3Turbo
 from langfun.core.llms.vertexai import VertexAI
 from langfun.core.llms.vertexai import VertexAIGemini1_5

langfun/core/llms/anthropic.py CHANGED Viewed

@@ -28,15 +28,57 @@ SUPPORTED_MODELS_AND_SETTINGS = {
     # Rate limits from https://docs.anthropic.com/claude/reference/rate-limits
     #     RPM/TPM for Claude-2.1, Claude-2.0, and Claude-Instant-1.2 estimated
     #     as RPM/TPM of the largest-available model (Claude-3-Opus).
+    # Price in US dollars at https://www.anthropic.com/pricing
+    # as of 2024-10-10.
     'claude-3-5-sonnet-20240620': pg.Dict(
-        max_tokens=4096, rpm=4000, tpm=400000
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.015,
+    ),
+    'claude-3-opus-20240229': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.015,
+        cost_per_1k_output_tokens=0.075,
+    ),
+    'claude-3-sonnet-20240229': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.015,
+    ),
+    'claude-3-haiku-20240307': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.00025,
+        cost_per_1k_output_tokens=0.00125,
+    ),
+    'claude-2.1': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.008,
+        cost_per_1k_output_tokens=0.024,
+    ),
+    'claude-2.0': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.008,
+        cost_per_1k_output_tokens=0.024,
+    ),
+    'claude-instant-1.2': pg.Dict(
+        max_tokens=4096,
+        rpm=4000,
+        tpm=400000,
+        cost_per_1k_input_tokens=0.0008,
+        cost_per_1k_output_tokens=0.0024,
     ),
-    'claude-3-opus-20240229': pg.Dict(max_tokens=4096, rpm=4000, tpm=400000),
-    'claude-3-sonnet-20240229': pg.Dict(max_tokens=4096, rpm=4000, tpm=400000),
-    'claude-3-haiku-20240307': pg.Dict(max_tokens=4096, rpm=4000, tpm=400000),
-    'claude-2.1': pg.Dict(max_tokens=4096, rpm=4000, tpm=400000),
-    'claude-2.0': pg.Dict(max_tokens=4096, rpm=4000, tpm=400000),
-    'claude-instant-1.2': pg.Dict(max_tokens=4096, rpm=4000, tpm=400000),
 }
@@ -107,6 +149,25 @@ class Anthropic(rest.REST):
         requests_per_min=rpm, tokens_per_min=tpm
     )
+  def estimate_cost(
+      self,
+      num_input_tokens: int,
+      num_output_tokens: int
+  ) -> float | None:
+    """Estimate the cost based on usage."""
+    cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_input_tokens', None
+    )
+    cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_output_tokens', None
+    )
+    if cost_per_1k_output_tokens is None or cost_per_1k_input_tokens is None:
+      return None
+    return (
+        cost_per_1k_input_tokens * num_input_tokens
+        + cost_per_1k_output_tokens * num_output_tokens
+    ) / 1000
   def request(
       self,
       prompt: lf.Message,
@@ -181,6 +242,10 @@ class Anthropic(rest.REST):
             prompt_tokens=input_tokens,
             completion_tokens=output_tokens,
             total_tokens=input_tokens + output_tokens,
+            estimated_cost=self.estimate_cost(
+                num_input_tokens=input_tokens,
+                num_output_tokens=output_tokens,
+            ),
         ),
     )

langfun/core/llms/cache/in_memory_test.py CHANGED Viewed

@@ -66,14 +66,15 @@ class InMemoryLMCacheTest(unittest.TestCase):
               [
                   lf.LMSample(
                       lf.AIMessage(response_text, cache_seed=cache_seed),
-                      score=1.0
+                      score=1.0,
                   )
               ],
               usage=lf.LMSamplingUsage(
                   1,
                   len(response_text),
                   len(response_text) + 1,
-              )
+              ),
+              is_cached=True,
           )
       )

langfun/core/llms/fake_test.py CHANGED Viewed

@@ -34,6 +34,7 @@ class EchoTest(unittest.TestCase):
                             'hi',
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(2, 2, 4),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),
@@ -85,6 +86,7 @@ class StaticResponseTest(unittest.TestCase):
                             canned_response,
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(2, 38, 40),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),
@@ -106,6 +108,7 @@ class StaticResponseTest(unittest.TestCase):
                             canned_response,
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(15, 38, 53),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),
@@ -150,6 +153,7 @@ class StaticMappingTest(unittest.TestCase):
                             'Hello',
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(2, 5, 7),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),
@@ -166,6 +170,7 @@ class StaticMappingTest(unittest.TestCase):
                             'I am fine, how about you?',
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(12, 25, 37),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),
@@ -199,6 +204,7 @@ class StaticSequenceTest(unittest.TestCase):
                             'Hello',
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(2, 5, 7),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),
@@ -215,6 +221,7 @@ class StaticSequenceTest(unittest.TestCase):
                             'I am fine, how about you?',
                             score=1.0,
                             logprobs=None,
+                            is_cached=False,
                             usage=lf.LMSamplingUsage(12, 25, 37),
                             tags=[lf.Message.TAG_LM_RESPONSE],
                         ),

langfun/core/llms/groq.py CHANGED Viewed

@@ -24,11 +24,73 @@ import pyglove as pg
 SUPPORTED_MODELS_AND_SETTINGS = {
     # Refer https://console.groq.com/docs/models
-    'llama3-8b-8192': pg.Dict(max_tokens=8192, max_concurrency=16),
-    'llama3-70b-8192': pg.Dict(max_tokens=8192, max_concurrency=16),
-    'llama2-70b-4096': pg.Dict(max_tokens=4096, max_concurrency=16),
-    'mixtral-8x7b-32768': pg.Dict(max_tokens=32768, max_concurrency=16),
-    'gemma-7b-it': pg.Dict(max_tokens=8192, max_concurrency=16),
+    # Price in US dollars at https://groq.com/pricing/ as of 2024-10-10.
+    'llama-3.2-3b-preview': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=64,
+        cost_per_1k_input_tokens=0.00006,
+        cost_per_1k_output_tokens=0.00006,
+    ),
+    'llama-3.2-1b-preview': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=64,
+        cost_per_1k_input_tokens=0.00004,
+        cost_per_1k_output_tokens=0.00004,
+    ),
+    'llama-3.1-70b-versatile': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+        cost_per_1k_input_tokens=0.00059,
+        cost_per_1k_output_tokens=0.00079,
+    ),
+    'llama-3.1-8b-instant': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.00005,
+        cost_per_1k_output_tokens=0.00008,
+    ),
+    'llama3-70b-8192': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+        cost_per_1k_input_tokens=0.00059,
+        cost_per_1k_output_tokens=0.00079,
+    ),
+    'llama3-8b-8192': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.00005,
+        cost_per_1k_output_tokens=0.00008,
+    ),
+    'llama2-70b-4096': pg.Dict(
+        max_tokens=4096,
+        max_concurrency=16,
+    ),
+    'mixtral-8x7b-32768': pg.Dict(
+        max_tokens=32768,
+        max_concurrency=16,
+        cost_per_1k_input_tokens=0.00024,
+        cost_per_1k_output_tokens=0.00024,
+    ),
+    'gemma2-9b-it': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.0002,
+        cost_per_1k_output_tokens=0.0002,
+    ),
+    'gemma-7b-it': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=32,
+        cost_per_1k_input_tokens=0.00007,
+        cost_per_1k_output_tokens=0.00007,
+    ),
+    'whisper-large-v3': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+    ),
+    'whisper-large-v3-turbo': pg.Dict(
+        max_tokens=8192,
+        max_concurrency=16,
+    )
 }
@@ -89,6 +151,25 @@ class Groq(rest.REST):
   def max_concurrency(self) -> int:
     return SUPPORTED_MODELS_AND_SETTINGS[self.model].max_concurrency
+  def estimate_cost(
+      self,
+      num_input_tokens: int,
+      num_output_tokens: int
+  ) -> float | None:
+    """Estimate the cost based on usage."""
+    cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_input_tokens', None
+    )
+    cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_output_tokens', None
+    )
+    if cost_per_1k_input_tokens is None or cost_per_1k_output_tokens is None:
+      return None
+    return (
+        cost_per_1k_input_tokens * num_input_tokens
+        + cost_per_1k_output_tokens * num_output_tokens
+    ) / 1000
   def request(
       self,
       prompt: lf.Message,
@@ -156,6 +237,10 @@ class Groq(rest.REST):
             prompt_tokens=usage['prompt_tokens'],
             completion_tokens=usage['completion_tokens'],
             total_tokens=usage['total_tokens'],
+            estimated_cost=self.estimate_cost(
+                num_input_tokens=usage['prompt_tokens'],
+                num_output_tokens=usage['completion_tokens'],
+            ),
         ),
     )
@@ -170,6 +255,24 @@ class Groq(rest.REST):
     )
+class GroqLlama3_2_3B(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-3B with 8K context window.
+  See: https://huggingface.co/meta-llama/Llama-3.2-3B
+  """
+  model = 'llama-3.2-3b-preview'
+class GroqLlama3_2_1B(Groq):  # pylint: disable=invalid-name
+  """Llama3.2-1B with 8K context window.
+  See: https://huggingface.co/meta-llama/Llama-3.2-1B
+  """
+  model = 'llama-3.2-3b-preview'
 class GroqLlama3_8B(Groq):  # pylint: disable=invalid-name
   """Llama3-8B with 8K context window.
@@ -179,6 +282,24 @@ class GroqLlama3_8B(Groq):  # pylint: disable=invalid-name
   model = 'llama3-8b-8192'
+class GroqLlama3_1_70B(Groq):  # pylint: disable=invalid-name
+  """Llama3.1-70B with 8K context window.
+  See: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md   # pylint: disable=line-too-long
+  """
+  model = 'llama-3.1-70b-versatile'
+class GroqLlama3_1_8B(Groq):  # pylint: disable=invalid-name
+  """Llama3.1-8B with 8K context window.
+  See: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md   # pylint: disable=line-too-long
+  """
+  model = 'llama-3.1-8b-instant'
 class GroqLlama3_70B(Groq):  # pylint: disable=invalid-name
   """Llama3-70B with 8K context window.
@@ -206,10 +327,37 @@ class GroqMistral_8x7B(Groq):  # pylint: disable=invalid-name
   model = 'mixtral-8x7b-32768'
-class GroqGemma7B_IT(Groq):  # pylint: disable=invalid-name
+class GroqGemma2_9B_IT(Groq):  # pylint: disable=invalid-name
+  """Gemma2 9B with 8K context window.
+  See: https://huggingface.co/google/gemma-2-9b-it
+  """
+  model = 'gemma2-9b-it'
+class GroqGemma_7B_IT(Groq):  # pylint: disable=invalid-name
   """Gemma 7B with 8K context window.
   See: https://huggingface.co/google/gemma-1.1-7b-it
   """
   model = 'gemma-7b-it'
+class GroqWhisper_Large_v3(Groq):  # pylint: disable=invalid-name
+  """Whisper Large V3 with 8K context window.
+  See: https://huggingface.co/openai/whisper-large-v3
+  """
+  model = 'whisper-large-v3'
+class GroqWhisper_Large_v3Turbo(Groq):  # pylint: disable=invalid-name
+  """Whisper Large V3 Turbo with 8K context window.
+  See: https://huggingface.co/openai/whisper-large-v3-turbo
+  """
+  model = 'whisper-large-v3-turbo'

langfun 0.1.2.dev202410100804__py3-none-any.whl → 0.1.2.dev202410120803__py3-none-any.whl

langfun 0.1.2.dev202410100804py3-none-any.whl → 0.1.2.dev202410120803py3-none-any.whl