langfun 0.1.2.dev202501170804__tar.gz → 0.1.2.dev202501180803__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/PKG-INFO +1 -1
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/__init__.py +32 -12
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/gemini.py +1 -1
- langfun-0.1.2.dev202501180803/langfun/core/llms/vertexai.py +561 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun.egg-info/PKG-INFO +1 -1
- langfun-0.1.2.dev202501170804/langfun/core/llms/vertexai.py +0 -283
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/LICENSE +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/README.md +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/agentic/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/agentic/action.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/agentic/action_eval.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/agentic/action_eval_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/agentic/action_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/correction.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/correction_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/execution.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/execution_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/generation.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/generation_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/parsing.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/coding/python/parsing_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/component.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/component_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/concurrent.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/concurrent_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/console.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/console_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/base.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/base_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/matching.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/matching_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/patching.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/patching_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/scoring.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/scoring_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/checkpointing.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/checkpointing_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/eval_test_helper.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/evaluation.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/evaluation_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/example.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/example_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/experiment.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/experiment_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/metric_values.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/metric_values_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/metrics.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/metrics_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/progress.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/progress_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/progress_tracking.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/progress_tracking_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/reporting.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/reporting_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/runners.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/eval/v2/runners_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/langfunc.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/langfunc_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/language_model.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/language_model_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/anthropic.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/anthropic_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/cache/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/cache/base.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/cache/in_memory.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/cache/in_memory_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/compositional.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/compositional_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/deepseek.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/deepseek_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/fake.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/fake_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/gemini_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/google_genai.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/google_genai_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/groq.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/groq_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/llama_cpp.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/llama_cpp_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/openai.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/openai_compatible.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/openai_compatible_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/openai_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/rest.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/rest_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/vertexai_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/logging.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/logging_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/memories/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/memories/conversation_history.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/memories/conversation_history_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/memory.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/message.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/message_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/audio.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/audio_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/image.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/image_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/mime.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/mime_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/ms_office.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/ms_office_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/pdf.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/pdf_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/video.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modalities/video_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modality.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/modality_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/natural_language.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/natural_language_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/sampling.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/sampling_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/completion.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/completion_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/description.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/description_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/function_generation.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/function_generation_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/mapping.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/mapping_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/parsing.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/parsing_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/querying.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/querying_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/schema.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/schema_generation.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/schema_generation_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/schema_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/scoring.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/scoring_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/tokenization.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/structured/tokenization_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/subscription.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/subscription_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/template.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/template_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/__init__.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/completion.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/completion_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/conversation.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/conversation_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/demonstration.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/demonstration_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/selfplay.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/templates/selfplay_test.py +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun.egg-info/SOURCES.txt +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun.egg-info/dependency_links.txt +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun.egg-info/requires.txt +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun.egg-info/top_level.txt +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/setup.cfg +0 -0
- {langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/setup.py +0 -0
{langfun-0.1.2.dev202501170804 → langfun-0.1.2.dev202501180803}/langfun/core/llms/__init__.py
RENAMED
@@ -27,11 +27,15 @@ from langfun.core.llms.fake import StaticSequence
|
|
27
27
|
# Compositional models.
|
28
28
|
from langfun.core.llms.compositional import RandomChoice
|
29
29
|
|
30
|
-
#
|
30
|
+
# Base models by request/response protocol.
|
31
31
|
from langfun.core.llms.rest import REST
|
32
|
+
from langfun.core.llms.openai_compatible import OpenAICompatible
|
33
|
+
from langfun.core.llms.gemini import Gemini
|
34
|
+
from langfun.core.llms.anthropic import Anthropic
|
32
35
|
|
33
|
-
#
|
36
|
+
# Base models by serving platforms.
|
34
37
|
from langfun.core.llms.vertexai import VertexAI
|
38
|
+
from langfun.core.llms.groq import Groq
|
35
39
|
|
36
40
|
# Gemini models.
|
37
41
|
from langfun.core.llms.google_genai import GenAI
|
@@ -60,9 +64,6 @@ from langfun.core.llms.vertexai import VertexAIGeminiFlash1_5_002
|
|
60
64
|
from langfun.core.llms.vertexai import VertexAIGeminiFlash1_5_001
|
61
65
|
from langfun.core.llms.vertexai import VertexAIGeminiPro1
|
62
66
|
|
63
|
-
# Base for OpenAI-compatible models.
|
64
|
-
from langfun.core.llms.openai_compatible import OpenAICompatible
|
65
|
-
|
66
67
|
# OpenAI models.
|
67
68
|
from langfun.core.llms.openai import OpenAI
|
68
69
|
|
@@ -119,7 +120,6 @@ from langfun.core.llms.openai import Gpt3Ada
|
|
119
120
|
|
120
121
|
# Anthropic models.
|
121
122
|
|
122
|
-
from langfun.core.llms.anthropic import Anthropic
|
123
123
|
from langfun.core.llms.anthropic import Claude35Sonnet
|
124
124
|
from langfun.core.llms.anthropic import Claude35Sonnet20241022
|
125
125
|
from langfun.core.llms.anthropic import Claude35Sonnet20240620
|
@@ -135,7 +135,17 @@ from langfun.core.llms.vertexai import VertexAIClaude3_Opus_20240229
|
|
135
135
|
|
136
136
|
# Misc open source models.
|
137
137
|
|
138
|
-
|
138
|
+
# Gemma models.
|
139
|
+
from langfun.core.llms.groq import GroqGemma2_9B_IT
|
140
|
+
from langfun.core.llms.groq import GroqGemma_7B_IT
|
141
|
+
|
142
|
+
# Llama models.
|
143
|
+
from langfun.core.llms.vertexai import VertexAILlama
|
144
|
+
from langfun.core.llms.vertexai import VertexAILlama3_2_90B
|
145
|
+
from langfun.core.llms.vertexai import VertexAILlama3_1_405B
|
146
|
+
from langfun.core.llms.vertexai import VertexAILlama3_1_70B
|
147
|
+
from langfun.core.llms.vertexai import VertexAILlama3_1_8B
|
148
|
+
|
139
149
|
from langfun.core.llms.groq import GroqLlama3_2_3B
|
140
150
|
from langfun.core.llms.groq import GroqLlama3_2_1B
|
141
151
|
from langfun.core.llms.groq import GroqLlama3_1_70B
|
@@ -143,18 +153,28 @@ from langfun.core.llms.groq import GroqLlama3_1_8B
|
|
143
153
|
from langfun.core.llms.groq import GroqLlama3_70B
|
144
154
|
from langfun.core.llms.groq import GroqLlama3_8B
|
145
155
|
from langfun.core.llms.groq import GroqLlama2_70B
|
156
|
+
|
157
|
+
# Mistral models.
|
158
|
+
from langfun.core.llms.vertexai import VertexAIMistral
|
159
|
+
from langfun.core.llms.vertexai import VertexAIMistralLarge_20241121
|
160
|
+
from langfun.core.llms.vertexai import VertexAIMistralLarge_20240724
|
161
|
+
from langfun.core.llms.vertexai import VertexAIMistralNemo_20240724
|
162
|
+
from langfun.core.llms.vertexai import VertexAICodestral_20250113
|
163
|
+
from langfun.core.llms.vertexai import VertexAICodestral_20240529
|
164
|
+
|
146
165
|
from langfun.core.llms.groq import GroqMistral_8x7B
|
147
|
-
|
148
|
-
|
166
|
+
|
167
|
+
# DeepSeek models.
|
168
|
+
from langfun.core.llms.deepseek import DeepSeek
|
169
|
+
from langfun.core.llms.deepseek import DeepSeekChat
|
170
|
+
|
171
|
+
# Whisper models.
|
149
172
|
from langfun.core.llms.groq import GroqWhisper_Large_v3
|
150
173
|
from langfun.core.llms.groq import GroqWhisper_Large_v3Turbo
|
151
174
|
|
152
175
|
# LLaMA C++ models.
|
153
176
|
from langfun.core.llms.llama_cpp import LlamaCppRemote
|
154
177
|
|
155
|
-
# DeepSeek models.
|
156
|
-
from langfun.core.llms.deepseek import DeepSeek
|
157
|
-
from langfun.core.llms.deepseek import DeepSeekChat
|
158
178
|
|
159
179
|
# Placeholder for Google-internal imports.
|
160
180
|
|
@@ -0,0 +1,561 @@
|
|
1
|
+
# Copyright 2025 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Vertex AI generative models."""
|
15
|
+
|
16
|
+
import functools
|
17
|
+
import os
|
18
|
+
from typing import Annotated, Any, Literal
|
19
|
+
|
20
|
+
import langfun.core as lf
|
21
|
+
from langfun.core.llms import anthropic
|
22
|
+
from langfun.core.llms import gemini
|
23
|
+
from langfun.core.llms import openai_compatible
|
24
|
+
from langfun.core.llms import rest
|
25
|
+
import pyglove as pg
|
26
|
+
|
27
|
+
try:
|
28
|
+
# pylint: disable=g-import-not-at-top
|
29
|
+
from google import auth as google_auth
|
30
|
+
from google.auth import credentials as credentials_lib
|
31
|
+
from google.auth.transport import requests as auth_requests
|
32
|
+
# pylint: enable=g-import-not-at-top
|
33
|
+
|
34
|
+
Credentials = credentials_lib.Credentials
|
35
|
+
except ImportError:
|
36
|
+
google_auth = None
|
37
|
+
credentials_lib = None
|
38
|
+
auth_requests = None
|
39
|
+
Credentials = Any
|
40
|
+
|
41
|
+
|
42
|
+
@pg.use_init_args(['api_endpoint'])
|
43
|
+
class VertexAI(rest.REST):
|
44
|
+
"""Base class for VertexAI models.
|
45
|
+
|
46
|
+
This class handles the authentication of vertex AI models. Subclasses
|
47
|
+
should implement `request` and `result` methods, as well as the `api_endpoint`
|
48
|
+
property. Or let users to provide them as __init__ arguments.
|
49
|
+
|
50
|
+
Please check out VertexAIGemini in `gemini.py` as an example.
|
51
|
+
"""
|
52
|
+
|
53
|
+
model: Annotated[
|
54
|
+
str | None,
|
55
|
+
'Model ID.'
|
56
|
+
] = None
|
57
|
+
|
58
|
+
project: Annotated[
|
59
|
+
str | None,
|
60
|
+
(
|
61
|
+
'Vertex AI project ID. Or set from environment variable '
|
62
|
+
'VERTEXAI_PROJECT.'
|
63
|
+
),
|
64
|
+
] = None
|
65
|
+
|
66
|
+
location: Annotated[
|
67
|
+
str | None,
|
68
|
+
(
|
69
|
+
'Vertex AI service location. Or set from environment variable '
|
70
|
+
'VERTEXAI_LOCATION.'
|
71
|
+
),
|
72
|
+
] = None
|
73
|
+
|
74
|
+
credentials: Annotated[
|
75
|
+
Credentials | None,
|
76
|
+
(
|
77
|
+
'Credentials to use. If None, the default credentials to the '
|
78
|
+
'environment will be used.'
|
79
|
+
),
|
80
|
+
] = None
|
81
|
+
|
82
|
+
def _on_bound(self):
|
83
|
+
super()._on_bound()
|
84
|
+
if google_auth is None:
|
85
|
+
raise ValueError(
|
86
|
+
'Please install "langfun[llm-google-vertex]" to use Vertex AI models.'
|
87
|
+
)
|
88
|
+
self._project = None
|
89
|
+
self._credentials = None
|
90
|
+
|
91
|
+
def _initialize(self):
|
92
|
+
project = self.project or os.environ.get('VERTEXAI_PROJECT', None)
|
93
|
+
if not project:
|
94
|
+
raise ValueError(
|
95
|
+
'Please specify `project` during `__init__` or set environment '
|
96
|
+
'variable `VERTEXAI_PROJECT` with your Vertex AI project ID.'
|
97
|
+
)
|
98
|
+
|
99
|
+
location = self.location or os.environ.get('VERTEXAI_LOCATION', None)
|
100
|
+
if not location:
|
101
|
+
raise ValueError(
|
102
|
+
'Please specify `location` during `__init__` or set environment '
|
103
|
+
'variable `VERTEXAI_LOCATION` with your Vertex AI service location.'
|
104
|
+
)
|
105
|
+
|
106
|
+
self._project = project
|
107
|
+
self._location = location
|
108
|
+
|
109
|
+
credentials = self.credentials
|
110
|
+
if credentials is None:
|
111
|
+
# Use default credentials.
|
112
|
+
credentials, _ = google_auth.default(
|
113
|
+
scopes=['https://www.googleapis.com/auth/cloud-platform']
|
114
|
+
)
|
115
|
+
self._credentials = credentials
|
116
|
+
|
117
|
+
@property
|
118
|
+
def model_id(self) -> str:
|
119
|
+
"""Returns a string to identify the model."""
|
120
|
+
return f'VertexAI({self.model})'
|
121
|
+
|
122
|
+
@functools.cached_property
|
123
|
+
def _session(self):
|
124
|
+
assert self._api_initialized
|
125
|
+
assert self._credentials is not None
|
126
|
+
assert auth_requests is not None
|
127
|
+
s = auth_requests.AuthorizedSession(self._credentials)
|
128
|
+
s.headers.update(self.headers or {})
|
129
|
+
return s
|
130
|
+
|
131
|
+
|
132
|
+
#
|
133
|
+
# Gemini models served by Vertex AI.
|
134
|
+
#
|
135
|
+
|
136
|
+
|
137
|
+
@pg.use_init_args(['model'])
|
138
|
+
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
139
|
+
class VertexAIGemini(VertexAI, gemini.Gemini):
|
140
|
+
"""Gemini models served by Vertex AI.."""
|
141
|
+
|
142
|
+
@property
|
143
|
+
def api_endpoint(self) -> str:
|
144
|
+
assert self._api_initialized
|
145
|
+
return (
|
146
|
+
f'https://{self._location}-aiplatform.googleapis.com/v1/projects/'
|
147
|
+
f'{self._project}/locations/{self._location}/publishers/google/'
|
148
|
+
f'models/{self.model}:generateContent'
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
class VertexAIGeminiFlash2_0ThinkingExp_20241219(VertexAIGemini): # pylint: disable=invalid-name
|
153
|
+
"""Vertex AI Gemini Flash 2.0 Thinking model launched on 12/19/2024."""
|
154
|
+
|
155
|
+
api_version = 'v1alpha'
|
156
|
+
model = 'gemini-2.0-flash-thinking-exp-1219'
|
157
|
+
timeout = None
|
158
|
+
|
159
|
+
|
160
|
+
class VertexAIGeminiFlash2_0Exp(VertexAIGemini): # pylint: disable=invalid-name
|
161
|
+
"""Vertex AI Gemini 2.0 Flash model."""
|
162
|
+
|
163
|
+
model = 'gemini-2.0-flash-exp'
|
164
|
+
|
165
|
+
|
166
|
+
class VertexAIGeminiExp_20241206(VertexAIGemini): # pylint: disable=invalid-name
|
167
|
+
"""Vertex AI Gemini Experimental model launched on 12/06/2024."""
|
168
|
+
|
169
|
+
model = 'gemini-exp-1206'
|
170
|
+
|
171
|
+
|
172
|
+
class VertexAIGeminiExp_20241114(VertexAIGemini): # pylint: disable=invalid-name
|
173
|
+
"""Vertex AI Gemini Experimental model launched on 11/14/2024."""
|
174
|
+
|
175
|
+
model = 'gemini-exp-1114'
|
176
|
+
|
177
|
+
|
178
|
+
class VertexAIGeminiPro1_5(VertexAIGemini): # pylint: disable=invalid-name
|
179
|
+
"""Vertex AI Gemini 1.5 Pro model."""
|
180
|
+
|
181
|
+
model = 'gemini-1.5-pro-latest'
|
182
|
+
|
183
|
+
|
184
|
+
class VertexAIGeminiPro1_5_002(VertexAIGemini): # pylint: disable=invalid-name
|
185
|
+
"""Vertex AI Gemini 1.5 Pro model."""
|
186
|
+
|
187
|
+
model = 'gemini-1.5-pro-002'
|
188
|
+
|
189
|
+
|
190
|
+
class VertexAIGeminiPro1_5_001(VertexAIGemini): # pylint: disable=invalid-name
|
191
|
+
"""Vertex AI Gemini 1.5 Pro model."""
|
192
|
+
|
193
|
+
model = 'gemini-1.5-pro-001'
|
194
|
+
|
195
|
+
|
196
|
+
class VertexAIGeminiFlash1_5(VertexAIGemini): # pylint: disable=invalid-name
|
197
|
+
"""Vertex AI Gemini 1.5 Flash model."""
|
198
|
+
|
199
|
+
model = 'gemini-1.5-flash'
|
200
|
+
|
201
|
+
|
202
|
+
class VertexAIGeminiFlash1_5_002(VertexAIGemini): # pylint: disable=invalid-name
|
203
|
+
"""Vertex AI Gemini 1.5 Flash model."""
|
204
|
+
|
205
|
+
model = 'gemini-1.5-flash-002'
|
206
|
+
|
207
|
+
|
208
|
+
class VertexAIGeminiFlash1_5_001(VertexAIGemini): # pylint: disable=invalid-name
|
209
|
+
"""Vertex AI Gemini 1.5 Flash model."""
|
210
|
+
|
211
|
+
model = 'gemini-1.5-flash-001'
|
212
|
+
|
213
|
+
|
214
|
+
class VertexAIGeminiPro1(VertexAIGemini): # pylint: disable=invalid-name
|
215
|
+
"""Vertex AI Gemini 1.0 Pro model."""
|
216
|
+
|
217
|
+
model = 'gemini-1.0-pro'
|
218
|
+
|
219
|
+
|
220
|
+
#
|
221
|
+
# Anthropic models on Vertex AI.
|
222
|
+
#
|
223
|
+
|
224
|
+
|
225
|
+
@pg.use_init_args(['model'])
|
226
|
+
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
227
|
+
class VertexAIAnthropic(VertexAI, anthropic.Anthropic):
|
228
|
+
"""Anthropic models on VertexAI."""
|
229
|
+
|
230
|
+
location: Annotated[
|
231
|
+
Literal['us-east5', 'europe-west1'],
|
232
|
+
'GCP location with Anthropic models hosted.'
|
233
|
+
] = 'us-east5'
|
234
|
+
|
235
|
+
api_version = 'vertex-2023-10-16'
|
236
|
+
|
237
|
+
@property
|
238
|
+
def headers(self):
|
239
|
+
return {
|
240
|
+
'Content-Type': 'application/json; charset=utf-8',
|
241
|
+
}
|
242
|
+
|
243
|
+
@property
|
244
|
+
def api_endpoint(self) -> str:
|
245
|
+
return (
|
246
|
+
f'https://{self.location}-aiplatform.googleapis.com/v1/projects/'
|
247
|
+
f'{self._project}/locations/{self.location}/publishers/anthropic/'
|
248
|
+
f'models/{self.model}:streamRawPredict'
|
249
|
+
)
|
250
|
+
|
251
|
+
def request(
|
252
|
+
self,
|
253
|
+
prompt: lf.Message,
|
254
|
+
sampling_options: lf.LMSamplingOptions
|
255
|
+
):
|
256
|
+
request = super().request(prompt, sampling_options)
|
257
|
+
request['anthropic_version'] = self.api_version
|
258
|
+
del request['model']
|
259
|
+
return request
|
260
|
+
|
261
|
+
|
262
|
+
# pylint: disable=invalid-name
|
263
|
+
|
264
|
+
|
265
|
+
class VertexAIClaude3_Opus_20240229(VertexAIAnthropic):
|
266
|
+
"""Anthropic's Claude 3 Opus model on VertexAI."""
|
267
|
+
model = 'claude-3-opus@20240229'
|
268
|
+
|
269
|
+
|
270
|
+
class VertexAIClaude3_5_Sonnet_20241022(VertexAIAnthropic):
|
271
|
+
"""Anthropic's Claude 3.5 Sonnet model on VertexAI."""
|
272
|
+
model = 'claude-3-5-sonnet-v2@20241022'
|
273
|
+
|
274
|
+
|
275
|
+
class VertexAIClaude3_5_Sonnet_20240620(VertexAIAnthropic):
|
276
|
+
"""Anthropic's Claude 3.5 Sonnet model on VertexAI."""
|
277
|
+
model = 'claude-3-5-sonnet@20240620'
|
278
|
+
|
279
|
+
|
280
|
+
class VertexAIClaude3_5_Haiku_20241022(VertexAIAnthropic):
|
281
|
+
"""Anthropic's Claude 3.5 Haiku model on VertexAI."""
|
282
|
+
model = 'claude-3-5-haiku@20241022'
|
283
|
+
|
284
|
+
# pylint: enable=invalid-name
|
285
|
+
|
286
|
+
#
|
287
|
+
# Llama models on Vertex AI.
|
288
|
+
# pylint: disable=line-too-long
|
289
|
+
# Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing?_gl=1*ukuk6u*_ga*MjEzMjc4NjM2My4xNzMzODg4OTg3*_ga_WH2QY8WWF5*MTczNzEzNDU1Mi4xMjQuMS4xNzM3MTM0NzczLjU5LjAuMA..#meta-models
|
290
|
+
# pylint: enable=line-too-long
|
291
|
+
|
292
|
+
LLAMA_MODELS = {
|
293
|
+
'llama-3.2-90b-vision-instruct-maas': pg.Dict(
|
294
|
+
latest_update='2024-09-25',
|
295
|
+
in_service=True,
|
296
|
+
rpm=0,
|
297
|
+
tpm=0,
|
298
|
+
# Free during preview.
|
299
|
+
cost_per_1m_input_tokens=None,
|
300
|
+
cost_per_1m_output_tokens=None,
|
301
|
+
),
|
302
|
+
'llama-3.1-405b-instruct-maas': pg.Dict(
|
303
|
+
latest_update='2024-09-25',
|
304
|
+
in_service=True,
|
305
|
+
rpm=0,
|
306
|
+
tpm=0,
|
307
|
+
# GA.
|
308
|
+
cost_per_1m_input_tokens=5,
|
309
|
+
cost_per_1m_output_tokens=16,
|
310
|
+
),
|
311
|
+
'llama-3.1-70b-instruct-maas': pg.Dict(
|
312
|
+
latest_update='2024-09-25',
|
313
|
+
in_service=True,
|
314
|
+
rpm=0,
|
315
|
+
tpm=0,
|
316
|
+
# Free during preview.
|
317
|
+
cost_per_1m_input_tokens=None,
|
318
|
+
cost_per_1m_output_tokens=None,
|
319
|
+
),
|
320
|
+
'llama-3.1-8b-instruct-maas': pg.Dict(
|
321
|
+
latest_update='2024-09-25',
|
322
|
+
in_service=True,
|
323
|
+
rpm=0,
|
324
|
+
tpm=0,
|
325
|
+
# Free during preview.
|
326
|
+
cost_per_1m_input_tokens=None,
|
327
|
+
cost_per_1m_output_tokens=None,
|
328
|
+
)
|
329
|
+
}
|
330
|
+
|
331
|
+
|
332
|
+
@pg.use_init_args(['model'])
|
333
|
+
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
334
|
+
class VertexAILlama(VertexAI, openai_compatible.OpenAICompatible):
|
335
|
+
"""Llama models on VertexAI."""
|
336
|
+
|
337
|
+
model: pg.typing.Annotated[
|
338
|
+
pg.typing.Enum(pg.MISSING_VALUE, list(LLAMA_MODELS.keys())),
|
339
|
+
'Llama model ID.',
|
340
|
+
]
|
341
|
+
|
342
|
+
locations: Annotated[
|
343
|
+
Literal['us-central1'],
|
344
|
+
(
|
345
|
+
'GCP locations with Llama models hosted. '
|
346
|
+
'See https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama#regions-quotas'
|
347
|
+
)
|
348
|
+
] = 'us-central1'
|
349
|
+
|
350
|
+
@property
|
351
|
+
def api_endpoint(self) -> str:
|
352
|
+
assert self._api_initialized
|
353
|
+
return (
|
354
|
+
f'https://{self._location}-aiplatform.googleapis.com/v1beta1/projects/'
|
355
|
+
f'{self._project}/locations/{self._location}/endpoints/'
|
356
|
+
f'openapi/chat/completions'
|
357
|
+
)
|
358
|
+
|
359
|
+
def request(
|
360
|
+
self,
|
361
|
+
prompt: lf.Message,
|
362
|
+
sampling_options: lf.LMSamplingOptions
|
363
|
+
):
|
364
|
+
request = super().request(prompt, sampling_options)
|
365
|
+
request['model'] = f'meta/{self.model}'
|
366
|
+
return request
|
367
|
+
|
368
|
+
@property
|
369
|
+
def max_concurrency(self) -> int:
|
370
|
+
rpm = LLAMA_MODELS[self.model].get('rpm', 0)
|
371
|
+
tpm = LLAMA_MODELS[self.model].get('tpm', 0)
|
372
|
+
return self.rate_to_max_concurrency(
|
373
|
+
requests_per_min=rpm, tokens_per_min=tpm
|
374
|
+
)
|
375
|
+
|
376
|
+
def estimate_cost(
|
377
|
+
self,
|
378
|
+
num_input_tokens: int,
|
379
|
+
num_output_tokens: int
|
380
|
+
) -> float | None:
|
381
|
+
"""Estimate the cost based on usage."""
|
382
|
+
cost_per_1m_input_tokens = LLAMA_MODELS[self.model].get(
|
383
|
+
'cost_per_1m_input_tokens', None
|
384
|
+
)
|
385
|
+
cost_per_1m_output_tokens = LLAMA_MODELS[self.model].get(
|
386
|
+
'cost_per_1m_output_tokens', None
|
387
|
+
)
|
388
|
+
if cost_per_1m_output_tokens is None or cost_per_1m_input_tokens is None:
|
389
|
+
return None
|
390
|
+
return (
|
391
|
+
cost_per_1m_input_tokens * num_input_tokens
|
392
|
+
+ cost_per_1m_output_tokens * num_output_tokens
|
393
|
+
) / 1000_000
|
394
|
+
|
395
|
+
|
396
|
+
# pylint: disable=invalid-name
|
397
|
+
class VertexAILlama3_2_90B(VertexAILlama):
|
398
|
+
"""Llama 3.2 90B vision instruct model on VertexAI."""
|
399
|
+
|
400
|
+
model = 'llama-3.2-90b-vision-instruct-maas'
|
401
|
+
|
402
|
+
|
403
|
+
class VertexAILlama3_1_405B(VertexAILlama):
|
404
|
+
"""Llama 3.1 405B vision instruct model on VertexAI."""
|
405
|
+
|
406
|
+
model = 'llama-3.1-405b-instruct-maas'
|
407
|
+
|
408
|
+
|
409
|
+
class VertexAILlama3_1_70B(VertexAILlama):
|
410
|
+
"""Llama 3.1 70B vision instruct model on VertexAI."""
|
411
|
+
|
412
|
+
model = 'llama-3.1-70b-instruct-maas'
|
413
|
+
|
414
|
+
|
415
|
+
class VertexAILlama3_1_8B(VertexAILlama):
|
416
|
+
"""Llama 3.1 8B vision instruct model on VertexAI."""
|
417
|
+
|
418
|
+
model = 'llama-3.1-8b-instruct-maas'
|
419
|
+
# pylint: enable=invalid-name
|
420
|
+
|
421
|
+
#
|
422
|
+
# Mistral models on Vertex AI.
|
423
|
+
# pylint: disable=line-too-long
|
424
|
+
# Pricing: https://cloud.google.com/vertex-ai/generative-ai/pricing?_gl=1*ukuk6u*_ga*MjEzMjc4NjM2My4xNzMzODg4OTg3*_ga_WH2QY8WWF5*MTczNzEzNDU1Mi4xMjQuMS4xNzM3MTM0NzczLjU5LjAuMA..#mistral-models
|
425
|
+
# pylint: enable=line-too-long
|
426
|
+
|
427
|
+
|
428
|
+
MISTRAL_MODELS = {
|
429
|
+
'mistral-large-2411': pg.Dict(
|
430
|
+
latest_update='2024-11-21',
|
431
|
+
in_service=True,
|
432
|
+
rpm=0,
|
433
|
+
tpm=0,
|
434
|
+
# GA.
|
435
|
+
cost_per_1m_input_tokens=2,
|
436
|
+
cost_per_1m_output_tokens=6,
|
437
|
+
),
|
438
|
+
'mistral-large@2407': pg.Dict(
|
439
|
+
latest_update='2024-07-24',
|
440
|
+
in_service=True,
|
441
|
+
rpm=0,
|
442
|
+
tpm=0,
|
443
|
+
# GA.
|
444
|
+
cost_per_1m_input_tokens=2,
|
445
|
+
cost_per_1m_output_tokens=6,
|
446
|
+
),
|
447
|
+
'mistral-nemo@2407': pg.Dict(
|
448
|
+
latest_update='2024-07-24',
|
449
|
+
in_service=True,
|
450
|
+
rpm=0,
|
451
|
+
tpm=0,
|
452
|
+
# GA.
|
453
|
+
cost_per_1m_input_tokens=0.15,
|
454
|
+
cost_per_1m_output_tokens=0.15,
|
455
|
+
),
|
456
|
+
'codestral-2501': pg.Dict(
|
457
|
+
latest_update='2025-01-13',
|
458
|
+
in_service=True,
|
459
|
+
rpm=0,
|
460
|
+
tpm=0,
|
461
|
+
# GA.
|
462
|
+
cost_per_1m_input_tokens=0.3,
|
463
|
+
cost_per_1m_output_tokens=0.9,
|
464
|
+
),
|
465
|
+
'codestral@2405': pg.Dict(
|
466
|
+
latest_update='2024-05-29',
|
467
|
+
in_service=True,
|
468
|
+
rpm=0,
|
469
|
+
tpm=0,
|
470
|
+
# GA.
|
471
|
+
cost_per_1m_input_tokens=0.2,
|
472
|
+
cost_per_1m_output_tokens=0.6,
|
473
|
+
),
|
474
|
+
}
|
475
|
+
|
476
|
+
|
477
|
+
@pg.use_init_args(['model'])
|
478
|
+
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
479
|
+
class VertexAIMistral(VertexAI, openai_compatible.OpenAICompatible):
|
480
|
+
"""Mistral AI models on VertexAI."""
|
481
|
+
|
482
|
+
model: pg.typing.Annotated[
|
483
|
+
pg.typing.Enum(pg.MISSING_VALUE, list(MISTRAL_MODELS.keys())),
|
484
|
+
'Mistral model ID.',
|
485
|
+
]
|
486
|
+
|
487
|
+
locations: Annotated[
|
488
|
+
Literal['us-central1', 'europe-west4'],
|
489
|
+
(
|
490
|
+
'GCP locations with Mistral models hosted. '
|
491
|
+
'See https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral#regions-quotas'
|
492
|
+
)
|
493
|
+
] = 'us-central1'
|
494
|
+
|
495
|
+
@property
|
496
|
+
def api_endpoint(self) -> str:
|
497
|
+
assert self._api_initialized
|
498
|
+
return (
|
499
|
+
f'https://{self._location}-aiplatform.googleapis.com/v1/projects/'
|
500
|
+
f'{self._project}/locations/{self._location}/publishers/mistralai/'
|
501
|
+
f'models/{self.model}:rawPredict'
|
502
|
+
)
|
503
|
+
|
504
|
+
@property
|
505
|
+
def max_concurrency(self) -> int:
|
506
|
+
rpm = MISTRAL_MODELS[self.model].get('rpm', 0)
|
507
|
+
tpm = MISTRAL_MODELS[self.model].get('tpm', 0)
|
508
|
+
return self.rate_to_max_concurrency(
|
509
|
+
requests_per_min=rpm, tokens_per_min=tpm
|
510
|
+
)
|
511
|
+
|
512
|
+
def estimate_cost(
|
513
|
+
self,
|
514
|
+
num_input_tokens: int,
|
515
|
+
num_output_tokens: int
|
516
|
+
) -> float | None:
|
517
|
+
"""Estimate the cost based on usage."""
|
518
|
+
cost_per_1m_input_tokens = MISTRAL_MODELS[self.model].get(
|
519
|
+
'cost_per_1m_input_tokens', None
|
520
|
+
)
|
521
|
+
cost_per_1m_output_tokens = MISTRAL_MODELS[self.model].get(
|
522
|
+
'cost_per_1m_output_tokens', None
|
523
|
+
)
|
524
|
+
if cost_per_1m_output_tokens is None or cost_per_1m_input_tokens is None:
|
525
|
+
return None
|
526
|
+
return (
|
527
|
+
cost_per_1m_input_tokens * num_input_tokens
|
528
|
+
+ cost_per_1m_output_tokens * num_output_tokens
|
529
|
+
) / 1000_000
|
530
|
+
|
531
|
+
|
532
|
+
# pylint: disable=invalid-name
|
533
|
+
class VertexAIMistralLarge_20241121(VertexAIMistral):
|
534
|
+
"""Mistral Large model on VertexAI released on 2024/11/21."""
|
535
|
+
|
536
|
+
model = 'mistral-large-2411'
|
537
|
+
|
538
|
+
|
539
|
+
class VertexAIMistralLarge_20240724(VertexAIMistral):
|
540
|
+
"""Mistral Large model on VertexAI released on 2024/07/24."""
|
541
|
+
|
542
|
+
model = 'mistral-large@2407'
|
543
|
+
|
544
|
+
|
545
|
+
class VertexAIMistralNemo_20240724(VertexAIMistral):
|
546
|
+
"""Mistral Nemo model on VertexAI released on 2024/07/24."""
|
547
|
+
|
548
|
+
model = 'mistral-nemo@2407'
|
549
|
+
|
550
|
+
|
551
|
+
class VertexAICodestral_20250113(VertexAIMistral):
|
552
|
+
"""Mistral Nemo model on VertexAI released on 2024/07/24."""
|
553
|
+
|
554
|
+
model = 'codestral-2501'
|
555
|
+
|
556
|
+
|
557
|
+
class VertexAICodestral_20240529(VertexAIMistral):
|
558
|
+
"""Mistral Nemo model on VertexAI released on 2024/05/29."""
|
559
|
+
|
560
|
+
model = 'codestral@2405'
|
561
|
+
# pylint: enable=invalid-name
|