PyPI - langfun - Versions diffs - 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240511__py3-none-any.whl - Mend

langfun 0.0.2.dev20240330py3-none-any.whl → 0.0.2.dev20240511py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (59) hide show

langfun/__init__.py +7 -0
langfun/core/__init__.py +1 -0
langfun/core/coding/python/correction.py +0 -7
langfun/core/component.py +6 -0
langfun/core/component_test.py +1 -0
langfun/core/eval/__init__.py +15 -0
langfun/core/eval/base.py +665 -95
langfun/core/eval/base_test.py +224 -53
langfun/core/eval/matching.py +48 -30
langfun/core/eval/matching_test.py +25 -3
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +19 -10
langfun/core/eval/scoring_test.py +21 -3
langfun/core/langfunc.py +1 -22
langfun/core/langfunc_test.py +10 -4
langfun/core/language_model.py +130 -24
langfun/core/language_model_test.py +249 -26
langfun/core/llms/__init__.py +27 -2
langfun/core/llms/anthropic.py +263 -0
langfun/core/llms/anthropic_test.py +167 -0
langfun/core/llms/cache/in_memory_test.py +37 -28
langfun/core/llms/fake.py +34 -25
langfun/core/llms/fake_test.py +122 -11
langfun/core/llms/google_genai.py +8 -0
langfun/core/llms/google_genai_test.py +8 -3
langfun/core/llms/groq.py +260 -0
langfun/core/llms/groq_test.py +170 -0
langfun/core/llms/llama_cpp.py +3 -1
langfun/core/llms/openai.py +100 -81
langfun/core/llms/openai_test.py +287 -60
langfun/core/llms/vertexai.py +291 -0
langfun/core/llms/vertexai_test.py +233 -0
langfun/core/modalities/image.py +1 -3
langfun/core/modalities/mime.py +6 -0
langfun/core/modalities/video.py +6 -5
langfun/core/structured/__init__.py +5 -0
langfun/core/structured/completion_test.py +2 -2
langfun/core/structured/function_generation.py +245 -0
langfun/core/structured/function_generation_test.py +329 -0
langfun/core/structured/mapping.py +61 -3
langfun/core/structured/mapping_test.py +17 -0
langfun/core/structured/parsing_test.py +18 -13
langfun/core/structured/prompting.py +61 -12
langfun/core/structured/prompting_test.py +122 -12
langfun/core/structured/schema.py +38 -6
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +36 -7
langfun/core/structured/scoring.py +4 -1
langfun/core/structured/scoring_test.py +6 -0
langfun/core/template.py +147 -11
langfun/core/template_test.py +75 -0
langfun/core/templates/selfplay_test.py +6 -2
{langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/METADATA +3 -2
langfun-0.0.2.dev20240511.dist-info/RECORD +112 -0
langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
{langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/WHEEL +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.0.2.dev20240511.dist-info}/top_level.txt +0 -0

langfun/core/llms/openai.py CHANGED Viewed

@@ -26,65 +26,55 @@ from openai import openai_object
 import pyglove as pg
-class Usage(pg.Object):
-  """Usage information per completion."""
-  prompt_tokens: int
-  completion_tokens: int
-  total_tokens: int
-class LMSamplingResult(lf.LMSamplingResult):
-  """LMSamplingResult with usage information."""
-  usage: Usage | None = None
-SUPPORTED_MODELS_AND_SETTINGS = [
-    # Model name, max concurrent requests.
-    # The concurrent requests is estimated by TPM/RPM from
-    # https://platform.openai.com/account/limits
-    # GPT-4 Turbo models.
-    ('gpt-4-turbo-preview', 1),  # GPT-4 Turbo.
-    ('gpt-4-0125-preview', 1),  # GPT-4 Turbo
-    ('gpt-4-1106-preview', 1),  # GPT-4 Turbo
-    ('gpt-4-vision-preview', 1),  # GPT-4 Turbo with Vision.
-    # GPT-4 models.
-    ('gpt-4', 4),
-    ('gpt-4-0613', 4),
-    ('gpt-4-0314', 4),
-    ('gpt-4-32k', 4),
-    ('gpt-4-32k-0613', 4),
-    ('gpt-4-32k-0314', 4),
-    # GPT-3.5 Turbo models.
-    ('gpt-3.5-turbo', 16),
-    ('gpt-3.5-turbo-0125', 16),
-    ('gpt-3.5-turbo-1106', 16),
-    ('gpt-3.5-turbo-0613', 16),
-    ('gpt-3.5-turbo-0301', 16),
-    ('gpt-3.5-turbo-16k', 16),
-    ('gpt-3.5-turbo-16k-0613', 16),
-    ('gpt-3.5-turbo-16k-0301', 16),
-    # GPT-3.5 models.
-    ('text-davinci-003', 8),  # GPT-3.5, trained with RHLF.
-    ('text-davinci-002', 4),  # Trained with SFT but no RHLF.
-    ('code-davinci-002', 4),
-    # GPT-3 instruction-tuned models.
-    ('text-curie-001', 4),
-    ('text-babbage-001', 4),
-    ('text-ada-001', 4),
-    ('davinci', 4),
-    ('curie', 4),
-    ('babbage', 4),
-    ('ada', 4),
-    # GPT-3 base models without instruction tuning.
-    ('babbage-002', 4),
-    ('davinci-002', 4),
-]
-# Model concurreny setting.
-_MODEL_CONCURRENCY = {m[0]: m[1] for m in SUPPORTED_MODELS_AND_SETTINGS}
+# From https://platform.openai.com/settings/organization/limits
+_DEFAULT_TPM = 250000
+_DEFAULT_RPM = 3000
+SUPPORTED_MODELS_AND_SETTINGS = {
+    # Models from https://platform.openai.com/docs/models
+    # RPM is from https://platform.openai.com/docs/guides/rate-limits
+    # GPT-4-Turbo models
+    'gpt-4-turbo': pg.Dict(rpm=10000, tpm=1500000),
+    'gpt-4-turbo-2024-04-09': pg.Dict(rpm=10000, tpm=1500000),
+    'gpt-4-turbo-preview': pg.Dict(rpm=10000, tpm=1500000),
+    'gpt-4-0125-preview': pg.Dict(rpm=10000, tpm=1500000),
+    'gpt-4-1106-preview': pg.Dict(rpm=10000, tpm=1500000),
+    'gpt-4-vision-preview': pg.Dict(rpm=10000, tpm=1500000),
+    'gpt-4-1106-vision-preview': pg.Dict(
+        rpm=10000, tpm=1500000
+    ),
+    # GPT-4 models
+    'gpt-4': pg.Dict(rpm=10000, tpm=300000),
+    'gpt-4-0613': pg.Dict(rpm=10000, tpm=300000),
+    'gpt-4-0314': pg.Dict(rpm=10000, tpm=300000),
+    'gpt-4-32k': pg.Dict(rpm=10000, tpm=300000),
+    'gpt-4-32k-0613': pg.Dict(rpm=10000, tpm=300000),
+    'gpt-4-32k-0314': pg.Dict(rpm=10000, tpm=300000),
+    # GPT-3.5-Turbo models
+    'gpt-3.5-turbo': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-0125': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-1106': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-0613': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-0301': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-16k': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-16k-0613': pg.Dict(rpm=10000, tpm=2000000),
+    'gpt-3.5-turbo-16k-0301': pg.Dict(rpm=10000, tpm=2000000),
+    # GPT-3.5 models
+    'text-davinci-003': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'text-davinci-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'code-davinci-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    # GPT-3 instruction-tuned models
+    'text-curie-001': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'text-babbage-001': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'text-ada-001': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'davinci': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'curie': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'babbage': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'ada': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    # GPT-3 base models
+    'babbage-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'davinci-002': pg.Dict(rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+}
 @lf.use_init_args(['model'])
@@ -93,7 +83,7 @@ class OpenAI(lf.LanguageModel):
   model: pg.typing.Annotated[
       pg.typing.Enum(
-          pg.MISSING_VALUE, [m[0] for m in SUPPORTED_MODELS_AND_SETTINGS]
+          pg.MISSING_VALUE, list(SUPPORTED_MODELS_AND_SETTINGS.keys())
       ),
       'The name of the model to use.',
   ] = 'gpt-3.5-turbo'
@@ -145,7 +135,11 @@ class OpenAI(lf.LanguageModel):
   @property
   def max_concurrency(self) -> int:
-    return _MODEL_CONCURRENCY[self.model]
+    rpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('rpm', 0)
+    tpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('tpm', 0)
+    return self.rate_to_max_concurrency(
+        requests_per_min=rpm, tokens_per_min=tpm
+    )
   @classmethod
   def dir(cls):
@@ -163,8 +157,6 @@ class OpenAI(lf.LanguageModel):
     # NOTE(daiyip): options.top_k is not applicable.
     args = dict(
         n=options.n,
-        temperature=options.temperature,
-        max_tokens=options.max_tokens,
         stream=False,
         timeout=self.timeout,
         logprobs=options.logprobs,
@@ -173,13 +165,17 @@ class OpenAI(lf.LanguageModel):
     # Completion and ChatCompletion uses different parameter name for model.
     args['model' if self.is_chat_model else 'engine'] = self.model
+    if options.temperature is not None:
+      args['temperature'] = options.temperature
+    if options.max_tokens is not None:
+      args['max_tokens'] = options.max_tokens
     if options.top_p is not None:
       args['top_p'] = options.top_p
     if options.stop:
       args['stop'] = options.stop
     return args
-  def _sample(self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
+  def _sample(self, prompts: list[lf.Message]) -> list[lf.LMSamplingResult]:
     assert self._api_initialized
     if self.is_chat_model:
       return self._chat_complete_batch(prompts)
@@ -187,7 +183,8 @@ class OpenAI(lf.LanguageModel):
       return self._complete_batch(prompts)
   def _complete_batch(
-      self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
+      self, prompts: list[lf.Message]
+  ) -> list[lf.LMSamplingResult]:
     def _open_ai_completion(prompts):
       response = openai.Completion.create(
@@ -202,13 +199,13 @@ class OpenAI(lf.LanguageModel):
             lf.LMSample(choice.text.strip(), score=choice.logprobs or 0.0)
         )
-      usage = Usage(
+      usage = lf.LMSamplingUsage(
           prompt_tokens=response.usage.prompt_tokens,
           completion_tokens=response.usage.completion_tokens,
           total_tokens=response.usage.total_tokens,
       )
       return [
-          LMSamplingResult(
+          lf.LMSamplingResult(
               samples_by_index[index], usage=usage if index == 0 else None
           )
           for index in sorted(samples_by_index.keys())
@@ -220,20 +217,25 @@ class OpenAI(lf.LanguageModel):
         retry_on_errors=(
             openai_error.ServiceUnavailableError,
             openai_error.RateLimitError,
+            # Handling transient OpenAI server error (code 500). Check out
+            # https://platform.openai.com/docs/guides/error-codes/error-codes
+            (openai_error.APIError,
+             '.*The server had an error processing your request'),
         ),
     )[0]
   def _chat_complete_batch(
       self, prompts: list[lf.Message]
-  ) -> list[LMSamplingResult]:
+  ) -> list[lf.LMSamplingResult]:
     def _open_ai_chat_completion(prompt: lf.Message):
       if self.multimodal:
         content = []
         for chunk in prompt.chunk():
           if isinstance(chunk, str):
             item = dict(type='text', text=chunk)
-          elif isinstance(chunk, lf_modalities.Image) and chunk.uri:
-            item = dict(type='image_url', image_url=chunk.uri)
+          elif isinstance(chunk, lf_modalities.Image):
+            uri = chunk.uri or chunk.content_uri
+            item = dict(type='image_url', image_url=dict(url=uri))
           else:
             raise ValueError(f'Unsupported modality object: {chunk!r}.')
           content.append(item)
@@ -266,9 +268,9 @@ class OpenAI(lf.LanguageModel):
             )
         )
-      return LMSamplingResult(
+      return lf.LMSamplingResult(
           samples=samples,
-          usage=Usage(
+          usage=lf.LMSamplingUsage(
               prompt_tokens=response.usage.prompt_tokens,
               completion_tokens=response.usage.completion_tokens,
               total_tokens=response.usage.total_tokens,
@@ -291,26 +293,43 @@ class Gpt4(OpenAI):
 class Gpt4Turbo(Gpt4):
-  """GPT-4 Turbo with 128K context window size. Knowledge up to 4-2023."""
-  model = 'gpt-4-turbo-preview'
+  """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
+  model = 'gpt-4-turbo'
+  multimodal = True
-class Gpt4TurboVision(Gpt4Turbo):
-  """GPT-4 Turbo with vision."""
-  model = 'gpt-4-vision-preview'
+class Gpt4Turbo_20240409(Gpt4Turbo):  # pylint:disable=invalid-name
+  """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
+  model = 'gpt-4-turbo-2024-04-09'
   multimodal = True
-class Gpt4Turbo_0125(Gpt4Turbo):   # pylint:disable=invalid-name
-  """GPT-4 Turbo with 128K context window size. Knowledge up to 4-2023."""
+class Gpt4TurboPreview(Gpt4):
+  """GPT-4 Turbo Preview with 128k context window. Knowledge up to Dec. 2023."""
+  model = 'gpt-4-turbo-preview'
+class Gpt4TurboPreview_0125(Gpt4TurboPreview):  # pylint: disable=invalid-name
+  """GPT-4 Turbo Preview with 128k context window. Knowledge up to Dec. 2023."""
   model = 'gpt-4-0125-preview'
-class Gpt4Turbo_1106(Gpt4Turbo):   # pylint:disable=invalid-name
-  """GPT-4 Turbo @20231106. 128K context window. Knowledge up to 4-2023."""
+class Gpt4TurboPreview_1106(Gpt4TurboPreview):  # pylint: disable=invalid-name
+  """GPT-4 Turbo Preview with 128k context window. Knowledge up to Apr. 2023."""
   model = 'gpt-4-1106-preview'
+class Gpt4VisionPreview(Gpt4):
+  """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
+  model = 'gpt-4-vision-preview'
+  multimodal = True
+class Gpt4VisionPreview_1106(Gpt4):  # pylint: disable=invalid-name
+  """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
+  model = 'gpt-4-1106-vision-preview'
 class Gpt4_0613(Gpt4):    # pylint:disable=invalid-name
   """GPT-4 @20230613. 8K context window. Knowledge up to 9-2021."""
   model = 'gpt-4-0613'

langfun 0.0.2.dev20240330__py3-none-any.whl → 0.0.2.dev20240511__py3-none-any.whl

Potentially problematic release.

langfun 0.0.2.dev20240330py3-none-any.whl → 0.0.2.dev20240511py3-none-any.whl