PyPI - langfun - Versions diffs - 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

langfun/__init__.py +22 -2
langfun/core/__init__.py +17 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -28
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +69 -2
langfun/core/component_test.py +54 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +17 -0
langfun/core/eval/base.py +767 -140
langfun/core/eval/base_test.py +238 -53
langfun/core/eval/matching.py +80 -76
langfun/core/eval/matching_test.py +19 -9
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +37 -28
langfun/core/eval/scoring_test.py +21 -3
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +3 -21
langfun/core/langfunc_test.py +26 -8
langfun/core/language_model.py +686 -48
langfun/core/language_model_test.py +681 -44
langfun/core/llms/__init__.py +100 -12
langfun/core/llms/anthropic.py +488 -0
langfun/core/llms/anthropic_test.py +235 -0
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +88 -28
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +39 -26
langfun/core/llms/fake_test.py +136 -11
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -197
langfun/core/llms/groq.py +276 -0
langfun/core/llms/groq_test.py +64 -0
langfun/core/llms/llama_cpp.py +15 -40
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +436 -226
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +35 -174
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -23
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +15 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +9 -8
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +278 -0
langfun/core/structured/function_generation_test.py +399 -0
langfun/core/structured/mapping.py +150 -46
langfun/core/structured/mapping_test.py +105 -0
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +71 -22
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +545 -60
langfun/core/structured/schema.py +208 -99
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +133 -34
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +240 -11
langfun/core/template_test.py +146 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +14 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -217
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240330.dist-info/METADATA +0 -99
langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/llms/openai.py CHANGED Viewed

@@ -13,95 +13,302 @@
 # limitations under the License.
 """Language models from OpenAI."""
-import collections
-import functools
 import os
-from typing import Annotated, Any, cast
+from typing import Annotated, Any
 import langfun.core as lf
-from langfun.core import modalities as lf_modalities
-import openai
-from openai import error as openai_error
-from openai import openai_object
+from langfun.core.llms import openai_compatible
 import pyglove as pg
-class Usage(pg.Object):
-  """Usage information per completion."""
-  prompt_tokens: int
-  completion_tokens: int
-  total_tokens: int
-class LMSamplingResult(lf.LMSamplingResult):
-  """LMSamplingResult with usage information."""
-  usage: Usage | None = None
-SUPPORTED_MODELS_AND_SETTINGS = [
-    # Model name, max concurrent requests.
-    # The concurrent requests is estimated by TPM/RPM from
-    # https://platform.openai.com/account/limits
-    # GPT-4 Turbo models.
-    ('gpt-4-turbo-preview', 1),  # GPT-4 Turbo.
-    ('gpt-4-0125-preview', 1),  # GPT-4 Turbo
-    ('gpt-4-1106-preview', 1),  # GPT-4 Turbo
-    ('gpt-4-vision-preview', 1),  # GPT-4 Turbo with Vision.
-    # GPT-4 models.
-    ('gpt-4', 4),
-    ('gpt-4-0613', 4),
-    ('gpt-4-0314', 4),
-    ('gpt-4-32k', 4),
-    ('gpt-4-32k-0613', 4),
-    ('gpt-4-32k-0314', 4),
-    # GPT-3.5 Turbo models.
-    ('gpt-3.5-turbo', 16),
-    ('gpt-3.5-turbo-0125', 16),
-    ('gpt-3.5-turbo-1106', 16),
-    ('gpt-3.5-turbo-0613', 16),
-    ('gpt-3.5-turbo-0301', 16),
-    ('gpt-3.5-turbo-16k', 16),
-    ('gpt-3.5-turbo-16k-0613', 16),
-    ('gpt-3.5-turbo-16k-0301', 16),
-    # GPT-3.5 models.
-    ('text-davinci-003', 8),  # GPT-3.5, trained with RHLF.
-    ('text-davinci-002', 4),  # Trained with SFT but no RHLF.
-    ('code-davinci-002', 4),
-    # GPT-3 instruction-tuned models.
-    ('text-curie-001', 4),
-    ('text-babbage-001', 4),
-    ('text-ada-001', 4),
-    ('davinci', 4),
-    ('curie', 4),
-    ('babbage', 4),
-    ('ada', 4),
-    # GPT-3 base models without instruction tuning.
-    ('babbage-002', 4),
-    ('davinci-002', 4),
-]
-# Model concurreny setting.
-_MODEL_CONCURRENCY = {m[0]: m[1] for m in SUPPORTED_MODELS_AND_SETTINGS}
+# From https://platform.openai.com/settings/organization/limits
+_DEFAULT_TPM = 250000
+_DEFAULT_RPM = 3000
+SUPPORTED_MODELS_AND_SETTINGS = {
+    # Models from https://platform.openai.com/docs/models
+    # RPM is from https://platform.openai.com/docs/guides/rate-limits
+    # o1 (preview) models.
+    # Pricing in US dollars, from https://openai.com/api/pricing/
+    # as of 2024-10-10.
+    'o1': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.015,
+        cost_per_1k_output_tokens=0.06,
+    ),
+    'o1-preview': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.015,
+        cost_per_1k_output_tokens=0.06,
+    ),
+    'o1-preview-2024-09-12': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.015,
+        cost_per_1k_output_tokens=0.06,
+    ),
+    'o1-mini': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.012,
+    ),
+    'o1-mini-2024-09-12': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.012,
+    ),
+    # GPT-4o models
+    'gpt-4o-mini': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.00015,
+        cost_per_1k_output_tokens=0.0006,
+    ),
+    'gpt-4o-mini-2024-07-18': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.00015,
+        cost_per_1k_output_tokens=0.0006,
+    ),
+    'gpt-4o': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.0025,
+        cost_per_1k_output_tokens=0.01,
+    ),
+    'gpt-4o-2024-11-20': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.0025,
+        cost_per_1k_output_tokens=0.01,
+    ),
+    'gpt-4o-2024-08-06': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.0025,
+        cost_per_1k_output_tokens=0.01,
+    ),
+    'gpt-4o-2024-05-13': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=5000000,
+        cost_per_1k_input_tokens=0.005,
+        cost_per_1k_output_tokens=0.015,
+    ),
+    # GPT-4-Turbo models
+    'gpt-4-turbo': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    'gpt-4-turbo-2024-04-09': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    'gpt-4-turbo-preview': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    'gpt-4-0125-preview': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    'gpt-4-1106-preview': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    'gpt-4-vision-preview': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    'gpt-4-1106-vision-preview': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.01,
+        cost_per_1k_output_tokens=0.03,
+    ),
+    # GPT-4 models
+    'gpt-4': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=300000,
+        cost_per_1k_input_tokens=0.03,
+        cost_per_1k_output_tokens=0.06,
+    ),
+    'gpt-4-0613': pg.Dict(
+        in_service=False,
+        rpm=10000,
+        tpm=300000,
+        cost_per_1k_input_tokens=0.03,
+        cost_per_1k_output_tokens=0.06,
+    ),
+    'gpt-4-0314': pg.Dict(
+        in_service=False,
+        rpm=10000,
+        tpm=300000,
+        cost_per_1k_input_tokens=0.03,
+        cost_per_1k_output_tokens=0.06,
+    ),
+    'gpt-4-32k': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=300000,
+        cost_per_1k_input_tokens=0.06,
+        cost_per_1k_output_tokens=0.12,
+    ),
+    'gpt-4-32k-0613': pg.Dict(
+        in_service=False,
+        rpm=10000,
+        tpm=300000,
+        cost_per_1k_input_tokens=0.06,
+        cost_per_1k_output_tokens=0.12,
+    ),
+    'gpt-4-32k-0314': pg.Dict(
+        in_service=False,
+        rpm=10000,
+        tpm=300000,
+        cost_per_1k_input_tokens=0.06,
+        cost_per_1k_output_tokens=0.12,
+    ),
+    # GPT-3.5-Turbo models
+    'gpt-3.5-turbo': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.0005,
+        cost_per_1k_output_tokens=0.0015,
+    ),
+    'gpt-3.5-turbo-0125': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.0005,
+        cost_per_1k_output_tokens=0.0015,
+    ),
+    'gpt-3.5-turbo-1106': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.001,
+        cost_per_1k_output_tokens=0.002,
+    ),
+    'gpt-3.5-turbo-0613': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.0015,
+        cost_per_1k_output_tokens=0.002,
+    ),
+    'gpt-3.5-turbo-0301': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.0015,
+        cost_per_1k_output_tokens=0.002,
+    ),
+    'gpt-3.5-turbo-16k': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.004,
+    ),
+    'gpt-3.5-turbo-16k-0613': pg.Dict(
+        in_service=True,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.004,
+    ),
+    'gpt-3.5-turbo-16k-0301': pg.Dict(
+        in_service=False,
+        rpm=10000,
+        tpm=2000000,
+        cost_per_1k_input_tokens=0.003,
+        cost_per_1k_output_tokens=0.004,
+    ),
+    # GPT-3.5 models
+    'text-davinci-003': pg.Dict(
+        in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
+    ),
+    'text-davinci-002': pg.Dict(
+        in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
+    ),
+    'code-davinci-002': pg.Dict(
+        in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
+    ),
+    # GPT-3 instruction-tuned models (Deprecated)
+    'text-curie-001': pg.Dict(
+        in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM
+    ),
+    'text-babbage-001': pg.Dict(
+        in_service=False,
+        rpm=_DEFAULT_RPM,
+        tpm=_DEFAULT_TPM,
+    ),
+    'text-ada-001': pg.Dict(
+        in_service=False,
+        rpm=_DEFAULT_RPM,
+        tpm=_DEFAULT_TPM,
+    ),
+    'davinci': pg.Dict(
+        in_service=False,
+        rpm=_DEFAULT_RPM,
+        tpm=_DEFAULT_TPM,
+    ),
+    'curie': pg.Dict(in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'babbage': pg.Dict(in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'ada': pg.Dict(in_service=False, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    # GPT-3 base models that are still in service.
+    'babbage-002': pg.Dict(in_service=True, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+    'davinci-002': pg.Dict(in_service=True, rpm=_DEFAULT_RPM, tpm=_DEFAULT_TPM),
+}
 @lf.use_init_args(['model'])
-class OpenAI(lf.LanguageModel):
+class OpenAI(openai_compatible.OpenAICompatible):
   """OpenAI model."""
   model: pg.typing.Annotated[
       pg.typing.Enum(
-          pg.MISSING_VALUE, [m[0] for m in SUPPORTED_MODELS_AND_SETTINGS]
+          pg.MISSING_VALUE, list(SUPPORTED_MODELS_AND_SETTINGS.keys())
       ),
       'The name of the model to use.',
-  ] = 'gpt-3.5-turbo'
+  ]
-  multimodal: Annotated[
-      bool,
-      'Whether this model has multimodal support.'
-  ] = False
+  api_endpoint: str = 'https://api.openai.com/v1/chat/completions'
   api_key: Annotated[
       str | None,
@@ -120,23 +327,44 @@ class OpenAI(lf.LanguageModel):
       ),
   ] = None
+  project: Annotated[
+      str | None,
+      (
+          'Project. If None, the key will be read from environment '
+          "variable 'OPENAI_PROJECT'. Based on the value, usages from "
+          "these API requests will count against the project's quota. "
+      ),
+  ] = None
   def _on_bound(self):
     super()._on_bound()
-    self.__dict__.pop('_api_initialized', None)
+    self._api_key = None
+    self._organization = None
+    self._project = None
-  @functools.cached_property
-  def _api_initialized(self):
+  def _initialize(self):
     api_key = self.api_key or os.environ.get('OPENAI_API_KEY', None)
     if not api_key:
       raise ValueError(
           'Please specify `api_key` during `__init__` or set environment '
           'variable `OPENAI_API_KEY` with your OpenAI API key.'
       )
-    openai.api_key = api_key
-    org = self.organization or os.environ.get('OPENAI_ORGANIZATION', None)
-    if org:
-      openai.organization = org
-    return True
+    self._api_key = api_key
+    self._organization = self.organization or os.environ.get(
+        'OPENAI_ORGANIZATION', None
+    )
+    self._project = self.project or os.environ.get('OPENAI_PROJECT', None)
+  @property
+  def headers(self) -> dict[str, Any]:
+    assert self._api_initialized
+    headers = super().headers
+    headers['Authorization'] = f'Bearer {self._api_key}'
+    if self._organization:
+      headers['OpenAI-Organization'] = self._organization
+    if self._project:
+      headers['OpenAI-Project'] = self._project
+    return headers
   @property
   def model_id(self) -> str:
@@ -145,144 +373,73 @@ class OpenAI(lf.LanguageModel):
   @property
   def max_concurrency(self) -> int:
-    return _MODEL_CONCURRENCY[self.model]
+    rpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('rpm', 0)
+    tpm = SUPPORTED_MODELS_AND_SETTINGS[self.model].get('tpm', 0)
+    return self.rate_to_max_concurrency(
+        requests_per_min=rpm, tokens_per_min=tpm
+    )
+  def estimate_cost(
+      self,
+      num_input_tokens: int,
+      num_output_tokens: int
+  ) -> float | None:
+    """Estimate the cost based on usage."""
+    cost_per_1k_input_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_input_tokens', None
+    )
+    cost_per_1k_output_tokens = SUPPORTED_MODELS_AND_SETTINGS[self.model].get(
+        'cost_per_1k_output_tokens', None
+    )
+    if cost_per_1k_output_tokens is None or cost_per_1k_input_tokens is None:
+      return None
+    return (
+        cost_per_1k_input_tokens * num_input_tokens
+        + cost_per_1k_output_tokens * num_output_tokens
+    ) / 1000
   @classmethod
   def dir(cls):
-    return openai.Model.list()
-  @property
-  def is_chat_model(self):
-    """Returns True if the model is a chat model."""
-    return self.model.startswith(('gpt-4', 'gpt-3.5-turbo'))
+    return [k for k, v in SUPPORTED_MODELS_AND_SETTINGS.items() if v.in_service]
-  def _get_request_args(
+  def _request_args(
       self, options: lf.LMSamplingOptions) -> dict[str, Any]:
-    # Reference:
-    # https://platform.openai.com/docs/api-reference/completions/create
-    # NOTE(daiyip): options.top_k is not applicable.
-    args = dict(
-        n=options.n,
-        temperature=options.temperature,
-        max_tokens=options.max_tokens,
-        stream=False,
-        timeout=self.timeout,
-        logprobs=options.logprobs,
-        top_logprobs=options.top_logprobs,
-    )
-    # Completion and ChatCompletion uses different parameter name for model.
-    args['model' if self.is_chat_model else 'engine'] = self.model
+    # Reasoning models (o1 series) does not support `logprobs` by 2024/09/12.
+    if options.logprobs and self.model.startswith(('o1-', 'o3-')):
+      raise RuntimeError('`logprobs` is not supported on {self.model!r}.')
+    return super()._request_args(options)
-    if options.top_p is not None:
-      args['top_p'] = options.top_p
-    if options.stop:
-      args['stop'] = options.stop
-    return args
-  def _sample(self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
-    assert self._api_initialized
-    if self.is_chat_model:
-      return self._chat_complete_batch(prompts)
-    else:
-      return self._complete_batch(prompts)
-  def _complete_batch(
-      self, prompts: list[lf.Message]) -> list[LMSamplingResult]:
-    def _open_ai_completion(prompts):
-      response = openai.Completion.create(
-          prompt=[p.text for p in prompts],
-          **self._get_request_args(self.sampling_options),
-      )
-      response = cast(openai_object.OpenAIObject, response)
-      # Parse response.
-      samples_by_index = collections.defaultdict(list)
-      for choice in response.choices:
-        samples_by_index[choice.index].append(
-            lf.LMSample(choice.text.strip(), score=choice.logprobs or 0.0)
-        )
-      usage = Usage(
-          prompt_tokens=response.usage.prompt_tokens,
-          completion_tokens=response.usage.completion_tokens,
-          total_tokens=response.usage.total_tokens,
-      )
-      return [
-          LMSamplingResult(
-              samples_by_index[index], usage=usage if index == 0 else None
-          )
-          for index in sorted(samples_by_index.keys())
-      ]
-    return self._parallel_execute_with_currency_control(
-        _open_ai_completion,
-        [prompts],
-        retry_on_errors=(
-            openai_error.ServiceUnavailableError,
-            openai_error.RateLimitError,
-        ),
-    )[0]
-  def _chat_complete_batch(
-      self, prompts: list[lf.Message]
-  ) -> list[LMSamplingResult]:
-    def _open_ai_chat_completion(prompt: lf.Message):
-      if self.multimodal:
-        content = []
-        for chunk in prompt.chunk():
-          if isinstance(chunk, str):
-            item = dict(type='text', text=chunk)
-          elif isinstance(chunk, lf_modalities.Image) and chunk.uri:
-            item = dict(type='image_url', image_url=chunk.uri)
-          else:
-            raise ValueError(f'Unsupported modality object: {chunk!r}.')
-          content.append(item)
-      else:
-        content = prompt.text
-      response = openai.ChatCompletion.create(
-          # TODO(daiyip): support conversation history and system prompt.
-          messages=[{'role': 'user', 'content': content}],
-          **self._get_request_args(self.sampling_options),
-      )
-      response = cast(openai_object.OpenAIObject, response)
-      samples = []
-      for choice in response.choices:
-        logprobs = None
-        if choice.logprobs:
-          logprobs = [
-              (
-                  t.token,
-                  t.logprob,
-                  [(tt.token, tt.logprob) for tt in t.top_logprobs],
-              )
-              for t in choice.logprobs.content
-          ]
-        samples.append(
-            lf.LMSample(
-                choice.message.content,
-                score=0.0,
-                logprobs=logprobs,
-            )
-        )
-      return LMSamplingResult(
-          samples=samples,
-          usage=Usage(
-              prompt_tokens=response.usage.prompt_tokens,
-              completion_tokens=response.usage.completion_tokens,
-              total_tokens=response.usage.total_tokens,
-          ),
-      )
+class GptO1(OpenAI):
+  """GPT-O1."""
-    return self._parallel_execute_with_currency_control(
-        _open_ai_chat_completion,
-        prompts,
-        retry_on_errors=(
-            openai_error.ServiceUnavailableError,
-            openai_error.RateLimitError,
-        ),
-    )
+  model = 'o1'
+  multimodal = True
+  timeout = None
+class GptO1Preview(OpenAI):
+  """GPT-O1."""
+  model = 'o1-preview'
+  timeout = None
+class GptO1Preview_20240912(OpenAI):   # pylint: disable=invalid-name
+  """GPT O1."""
+  model = 'o1-preview-2024-09-12'
+  timeout = None
+class GptO1Mini(OpenAI):
+  """GPT O1-mini."""
+  model = 'o1-mini'
+  timeout = None
+class GptO1Mini_20240912(OpenAI):   # pylint: disable=invalid-name
+  """GPT O1-mini."""
+  model = 'o1-mini-2024-09-12'
+  timeout = None
 class Gpt4(OpenAI):
@@ -291,27 +448,44 @@ class Gpt4(OpenAI):
 class Gpt4Turbo(Gpt4):
-  """GPT-4 Turbo with 128K context window size. Knowledge up to 4-2023."""
-  model = 'gpt-4-turbo-preview'
+  """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
+  model = 'gpt-4-turbo'
+  multimodal = True
-class Gpt4TurboVision(Gpt4Turbo):
-  """GPT-4 Turbo with vision."""
-  model = 'gpt-4-vision-preview'
+class Gpt4Turbo_20240409(Gpt4Turbo):  # pylint:disable=invalid-name
+  """GPT-4 Turbo with 128K context window. Knowledge up to Dec. 2023."""
+  model = 'gpt-4-turbo-2024-04-09'
   multimodal = True
-class Gpt4Turbo_0125(Gpt4Turbo):   # pylint:disable=invalid-name
-  """GPT-4 Turbo with 128K context window size. Knowledge up to 4-2023."""
+class Gpt4TurboPreview(Gpt4):
+  """GPT-4 Turbo Preview with 128k context window. Knowledge up to Dec. 2023."""
+  model = 'gpt-4-turbo-preview'
+class Gpt4TurboPreview_20240125(Gpt4TurboPreview):  # pylint: disable=invalid-name
+  """GPT-4 Turbo Preview with 128k context window. Knowledge up to Dec. 2023."""
   model = 'gpt-4-0125-preview'
-class Gpt4Turbo_1106(Gpt4Turbo):   # pylint:disable=invalid-name
-  """GPT-4 Turbo @20231106. 128K context window. Knowledge up to 4-2023."""
+class Gpt4TurboPreview_20231106(Gpt4TurboPreview):  # pylint: disable=invalid-name
+  """GPT-4 Turbo Preview with 128k context window. Knowledge up to Apr. 2023."""
   model = 'gpt-4-1106-preview'
-class Gpt4_0613(Gpt4):    # pylint:disable=invalid-name
+class Gpt4VisionPreview(Gpt4):
+  """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
+  model = 'gpt-4-vision-preview'
+  multimodal = True
+class Gpt4VisionPreview_20231106(Gpt4):  # pylint: disable=invalid-name
+  """GPT-4 Turbo vision preview. 128k context window. Knowledge to Apr. 2023."""
+  model = 'gpt-4-1106-vision-preview'
+class Gpt4_20230613(Gpt4):    # pylint:disable=invalid-name
   """GPT-4 @20230613. 8K context window. Knowledge up to 9-2021."""
   model = 'gpt-4-0613'
@@ -321,11 +495,47 @@ class Gpt4_32K(Gpt4):       # pylint:disable=invalid-name
   model = 'gpt-4-32k'
-class Gpt4_32K_0613(Gpt4_32K):    # pylint:disable=invalid-name
+class Gpt4_32K_20230613(Gpt4_32K):    # pylint:disable=invalid-name
   """GPT-4 @20230613. 32K context window. Knowledge up to 9-2021."""
   model = 'gpt-4-32k-0613'
+class Gpt4oMini(OpenAI):
+  """GPT-4o Mini."""
+  model = 'gpt-4o-mini'
+  multimodal = True
+class Gpt4oMini_20240718(OpenAI):  # pylint:disable=invalid-name
+  """GPT-4o Mini."""
+  model = 'gpt-4o-mini-2024-07-18'
+  multimodal = True
+class Gpt4o(OpenAI):
+  """GPT-4o."""
+  model = 'gpt-4o'
+  multimodal = True
+class Gpt4o_20241120(OpenAI):     # pylint:disable=invalid-name
+  """GPT-4o version 2024-11-20."""
+  model = 'gpt-4o-2024-11-20'
+  multimodal = True
+class Gpt4o_20240806(OpenAI):     # pylint:disable=invalid-name
+  """GPT-4o version 2024-08-06."""
+  model = 'gpt-4o-2024-08-06'
+  multimodal = True
+class Gpt4o_20240513(OpenAI):     # pylint:disable=invalid-name
+  """GPT-4o version 2024-05-13."""
+  model = 'gpt-4o-2024-05-13'
+  multimodal = True
 class Gpt35(OpenAI):
   """GPT-3.5. 4K max tokens, trained up on data up to Sep, 2021."""
   model = 'text-davinci-003'
@@ -336,17 +546,17 @@ class Gpt35Turbo(Gpt35):
   model = 'gpt-3.5-turbo'
-class Gpt35Turbo_0125(Gpt35Turbo):   # pylint:disable=invalid-name
+class Gpt35Turbo_20240125(Gpt35Turbo):   # pylint:disable=invalid-name
   """GPT-3.5 Turbo @20240125. 16K context window. Knowledge up to 09/2021."""
   model = 'gpt-3.5-turbo-0125'
-class Gpt35Turbo_1106(Gpt35Turbo):   # pylint:disable=invalid-name
+class Gpt35Turbo_20231106(Gpt35Turbo):   # pylint:disable=invalid-name
   """Gpt3.5 Turbo @20231106. 16K context window. Knowledge up to 09/2021."""
   model = 'gpt-3.5-turbo-1106'
-class Gpt35Turbo_0613(Gpt35Turbo):   # pylint:disable=invalid-name
+class Gpt35Turbo_20230613(Gpt35Turbo):   # pylint:disable=invalid-name
   """Gpt3.5 Turbo snapshot at 2023/06/13, with 4K context window size."""
   model = 'gpt-3.5-turbo-0613'
@@ -356,7 +566,7 @@ class Gpt35Turbo16K(Gpt35Turbo):
   model = 'gpt-3.5-turbo-16k'
-class Gpt35Turbo16K_0613(Gpt35Turbo):   # pylint:disable=invalid-name
+class Gpt35Turbo16K_20230613(Gpt35Turbo):   # pylint:disable=invalid-name
   """Gtp 3.5 Turbo 16K 0613."""
   model = 'gpt-3.5-turbo-16k-0613'

langfun 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl