crfm-helm 0.5.6__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/METADATA +72 -130
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/RECORD +372 -305
- helm/benchmark/adaptation/adapter_spec.py +10 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
- helm/benchmark/annotation/aci_bench_annotator.py +11 -22
- helm/benchmark/annotation/air_bench_annotator.py +1 -1
- helm/benchmark/annotation/alrage_annotator.py +90 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
- helm/benchmark/annotation/dischargeme_annotator.py +11 -22
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/med_dialog_annotator.py +11 -22
- helm/benchmark/annotation/medalign_annotator.py +11 -22
- helm/benchmark/annotation/medi_qa_annotator.py +11 -22
- helm/benchmark/annotation/medication_qa_annotator.py +11 -22
- helm/benchmark/annotation/mental_health_annotator.py +11 -22
- helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
- helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
- helm/benchmark/annotation/model_as_judge.py +23 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
- helm/benchmark/metrics/air_bench_metrics.py +3157 -1
- helm/benchmark/metrics/alrage_metric.py +35 -0
- helm/benchmark/metrics/basic_metrics.py +267 -2
- helm/benchmark/metrics/bbq_metrics.py +12 -0
- helm/benchmark/metrics/classification_metrics.py +19 -1
- helm/benchmark/metrics/codeinsights_code_efficiency_metrics.py +186 -0
- helm/benchmark/metrics/codeinsights_code_evaluation_metrics.py +477 -0
- helm/benchmark/metrics/codeinsights_correct_code_metrics.py +366 -0
- helm/benchmark/metrics/codeinsights_edge_case_metrics.py +92 -0
- helm/benchmark/metrics/codeinsights_metric_specs.py +51 -0
- helm/benchmark/metrics/comet_metric.py +1 -1
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
- helm/benchmark/metrics/copyright_metrics.py +1 -1
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +1 -1
- helm/benchmark/metrics/dry_run_metrics.py +30 -1
- helm/benchmark/metrics/efficiency_metrics.py +74 -0
- helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
- helm/benchmark/metrics/evaluate_reference_metrics.py +312 -1
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
- helm/benchmark/metrics/ifeval_metrics.py +13 -1
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +13 -2
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
- helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
- helm/benchmark/metrics/language_modeling_metrics.py +13 -1
- helm/benchmark/metrics/live_qa_metrics.py +13 -1
- helm/benchmark/metrics/llm_jury_metrics.py +13 -1
- helm/benchmark/metrics/lmkt_metric_specs.py +12 -0
- helm/benchmark/metrics/lmkt_metrics.py +47 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
- helm/benchmark/metrics/medec_metrics.py +25 -2
- helm/benchmark/metrics/melt_toxicity_metric.py +1 -1
- helm/benchmark/metrics/metric.py +25 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
- helm/benchmark/metrics/omni_math_metrics.py +13 -1
- helm/benchmark/metrics/safety_metrics.py +13 -1
- helm/benchmark/metrics/seahelm_metrics.py +14 -1
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/summarization_metrics.py +129 -1
- helm/benchmark/metrics/toxicity_metrics.py +31 -1
- helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
- helm/benchmark/metrics/wildbench_metrics.py +21 -1
- helm/benchmark/model_deployment_registry.py +11 -19
- helm/benchmark/presentation/create_plots.py +11 -2
- helm/benchmark/presentation/run_display.py +13 -3
- helm/benchmark/presentation/run_entry.py +2 -2
- helm/benchmark/presentation/schema.py +10 -22
- helm/benchmark/presentation/summarize.py +189 -14
- helm/benchmark/presentation/taxonomy_info.py +20 -0
- helm/benchmark/presentation/test_create_plots.py +4 -1
- helm/benchmark/run.py +15 -4
- helm/benchmark/run_expander.py +4 -0
- helm/benchmark/run_specs/arabic_run_specs.py +197 -0
- helm/benchmark/run_specs/bluex_run_specs.py +40 -0
- helm/benchmark/run_specs/classic_run_specs.py +2 -55
- helm/benchmark/run_specs/codeinsights_run_specs.py +192 -0
- helm/benchmark/run_specs/healthqa_br_run_specs.py +40 -0
- helm/benchmark/run_specs/heim_run_specs.py +3 -1
- helm/benchmark/run_specs/lmkt_run_specs.py +144 -0
- helm/benchmark/run_specs/long_context_run_specs.py +48 -1
- helm/benchmark/run_specs/medhelm/__init__.py +0 -0
- helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +363 -53
- helm/benchmark/run_specs/multilingual_run_specs.py +50 -0
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +11 -13
- helm/benchmark/runner.py +7 -0
- helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
- helm/benchmark/scenarios/air_bench_scenario.py +21 -0
- helm/benchmark/scenarios/alghafa_scenario.py +126 -0
- helm/benchmark/scenarios/alrage_scenario.py +54 -0
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
- helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
- helm/benchmark/scenarios/arabic_mmlu_scenario.py +82 -0
- helm/benchmark/scenarios/aratrust_scenario.py +95 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +74 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +70 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -53
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -21
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -52
- helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
- helm/benchmark/scenarios/banking77_scenario.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +15 -0
- helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
- helm/benchmark/scenarios/bluex_scenario.py +70 -0
- helm/benchmark/scenarios/bold_scenario.py +15 -0
- helm/benchmark/scenarios/boolq_scenario.py +20 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
- helm/benchmark/scenarios/clear_scenario.py +23 -0
- helm/benchmark/scenarios/cleva_scenario.py +480 -1
- helm/benchmark/scenarios/code_scenario.py +28 -0
- helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py +197 -0
- helm/benchmark/scenarios/codeinsights_correct_code_scenario.py +78 -0
- helm/benchmark/scenarios/codeinsights_edge_case_scenario.py +192 -0
- helm/benchmark/scenarios/codeinsights_student_coding_scenario.py +162 -0
- helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py +188 -0
- helm/benchmark/scenarios/commonsense_scenario.py +32 -0
- helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
- helm/benchmark/scenarios/copyright_scenario.py +35 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
- helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
- helm/benchmark/scenarios/disinformation_scenario.py +22 -0
- helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
- helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
- helm/benchmark/scenarios/exams_multilingual_scenario.py +115 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
- helm/benchmark/scenarios/financebench_scenario.py +21 -0
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
- helm/benchmark/scenarios/gpqa_scenario.py +18 -0
- helm/benchmark/scenarios/grammar_scenario.py +20 -1
- helm/benchmark/scenarios/gsm_scenario.py +21 -0
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
- helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
- helm/benchmark/scenarios/headqa_scenario.py +22 -0
- helm/benchmark/scenarios/healthqa_br_scenario.py +80 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
- helm/benchmark/scenarios/ice_scenario.py +21 -1
- helm/benchmark/scenarios/ifeval_scenario.py +18 -0
- helm/benchmark/scenarios/imdb_scenario.py +15 -0
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +111 -0
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +1 -1
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
- helm/benchmark/scenarios/koala_scenario.py +21 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
- helm/benchmark/scenarios/legal_support_scenario.py +13 -0
- helm/benchmark/scenarios/legalbench_scenario.py +19 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
- helm/benchmark/scenarios/lextreme_scenario.py +11 -0
- helm/benchmark/scenarios/lmkt_scenarios.py +288 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
- helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
- helm/benchmark/scenarios/math_scenario.py +54 -20
- helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
- helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
- helm/benchmark/scenarios/med_qa_scenario.py +20 -0
- helm/benchmark/scenarios/medalign_scenario.py +23 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +19 -125
- helm/benchmark/scenarios/medbullets_scenario.py +22 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
- helm/benchmark/scenarios/medec_scenario.py +23 -0
- helm/benchmark/scenarios/medhallu_scenario.py +23 -0
- helm/benchmark/scenarios/medhelm/__init__.py +0 -0
- helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
- helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +24 -1
- helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
- helm/benchmark/scenarios/melt_scenarios.py +2 -2
- helm/benchmark/scenarios/mental_health_scenario.py +23 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +25 -1
- helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
- helm/benchmark/scenarios/mmlu_scenario.py +21 -0
- helm/benchmark/scenarios/mmmlu_scenario.py +85 -0
- helm/benchmark/scenarios/msmarco_scenario.py +30 -0
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +19 -0
- helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
- helm/benchmark/scenarios/omni_math_scenario.py +18 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
- helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
- helm/benchmark/scenarios/quac_scenario.py +14 -0
- helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
- helm/benchmark/scenarios/raft_scenario.py +15 -0
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
- helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
- helm/benchmark/scenarios/scenario.py +31 -0
- helm/benchmark/scenarios/seahelm_scenario.py +350 -2
- helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +23 -1
- helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
- helm/benchmark/scenarios/situation_prompts.yaml +49 -0
- helm/benchmark/scenarios/spider_scenario.py +18 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
- helm/benchmark/scenarios/summarization_scenario.py +37 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
- helm/benchmark/scenarios/test_alghafa_scenario.py +29 -0
- helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
- helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
- helm/benchmark/scenarios/test_aratrust_scenario.py +21 -0
- helm/benchmark/scenarios/test_bluex_scenario.py +59 -0
- helm/benchmark/scenarios/test_exams_multilingual_scenario.py +29 -0
- helm/benchmark/scenarios/test_healtha_br_scenario.py +57 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
- helm/benchmark/scenarios/the_pile_scenario.py +13 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
- helm/benchmark/scenarios/vicuna_scenario.py +21 -1
- helm/benchmark/scenarios/wikifact_scenario.py +20 -0
- helm/benchmark/scenarios/wildbench_scenario.py +18 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +19 -0
- helm/benchmark/slurm_jobs.py +1 -2
- helm/benchmark/slurm_runner.py +8 -1
- helm/benchmark/static/schema_arabic.yaml +271 -0
- helm/benchmark/static/schema_classic.yaml +0 -17
- helm/benchmark/static/schema_long_context.yaml +17 -18
- helm/benchmark/static/schema_medhelm.yaml +36 -0
- helm/benchmark/static/schema_slp.yaml +219 -0
- helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
- helm/benchmark/static_build/assets/index-oIeiQW2g.css +1 -0
- helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
- helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
- helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
- helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
- helm/benchmark/static_build/index.html +5 -6
- helm/benchmark/window_services/image_generation/clip_window_service.py +1 -3
- helm/clients/ai21_client.py +2 -0
- helm/clients/aleph_alpha_client.py +2 -0
- helm/clients/anthropic_client.py +7 -1
- helm/clients/audio_language/diva_llama_client.py +2 -0
- helm/clients/audio_language/llama_omni/arguments.py +61 -0
- helm/clients/audio_language/llama_omni/constants.py +9 -0
- helm/clients/audio_language/llama_omni/conversation.py +213 -0
- helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
- helm/clients/audio_language/llama_omni/model/builder.py +88 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
- helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
- helm/clients/audio_language/llama_omni/preprocess.py +295 -0
- helm/clients/audio_language/llama_omni/utils.py +202 -0
- helm/clients/audio_language/llama_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_5_omni_client.py +21 -8
- helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
- helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
- helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
- helm/clients/bedrock_client.py +63 -6
- helm/clients/cohere_client.py +3 -0
- helm/clients/dspy_client.py +135 -0
- helm/clients/google_client.py +2 -0
- helm/clients/http_model_client.py +2 -0
- helm/clients/huggingface_client.py +4 -3
- helm/clients/ibm_client.py +3 -1
- helm/clients/image_generation/adobe_vision_client.py +2 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
- helm/clients/image_generation/cogview2_client.py +2 -1
- helm/clients/image_generation/dalle2_client.py +2 -0
- helm/clients/image_generation/dalle_mini_client.py +2 -1
- helm/clients/image_generation/deep_floyd_client.py +2 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
- helm/clients/image_generation/lexica_client.py +2 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
- helm/clients/image_generation/mindalle_client.py +2 -1
- helm/clients/image_generation/together_image_generation_client.py +2 -0
- helm/clients/megatron_client.py +2 -0
- helm/clients/mistral_client.py +2 -0
- helm/clients/moderation_api_client.py +2 -0
- helm/clients/openai_client.py +38 -21
- helm/clients/openai_responses_client.py +34 -8
- helm/clients/openrouter_client.py +31 -0
- helm/clients/palmyra_client.py +2 -1
- helm/clients/reka_client.py +2 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
- helm/clients/stanfordhealthcare_http_model_client.py +2 -0
- helm/clients/test_huggingface_client.py +3 -3
- helm/clients/test_openrouter_client.py +69 -0
- helm/clients/together_client.py +52 -13
- helm/clients/vertexai_client.py +23 -11
- helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
- helm/clients/vision_language/huggingface_vlm_client.py +2 -0
- helm/clients/vision_language/idefics_client.py +2 -1
- helm/clients/vision_language/open_flamingo_client.py +2 -1
- helm/clients/vision_language/paligemma_client.py +2 -1
- helm/clients/vision_language/palmyra_vision_client.py +2 -0
- helm/clients/vision_language/qwen2_vlm_client.py +2 -1
- helm/clients/vision_language/qwen_vlm_client.py +2 -1
- helm/clients/vllm_client.py +43 -7
- helm/clients/vllm_granite_thinking_client.py +56 -0
- helm/clients/writer_client.py +5 -2
- helm/common/critique_request.py +0 -1
- helm/common/hierarchical_logger.py +103 -34
- helm/common/object_spec.py +23 -8
- helm/common/optional_dependencies.py +1 -1
- helm/common/test_general.py +4 -0
- helm/common/test_logging.py +94 -0
- helm/config/model_deployments.yaml +1001 -187
- helm/config/model_metadata.yaml +602 -18
- helm/config/tokenizer_configs.yaml +202 -5
- helm/proxy/cli.py +1 -1
- helm/proxy/example_queries.py +8 -8
- helm/proxy/retry.py +5 -0
- helm/proxy/server.py +2 -1
- helm/proxy/static/index.css +4 -0
- helm/proxy/static/index.js +7 -1
- helm/tokenizers/auto_tokenizer.py +2 -2
- helm/tokenizers/grok_tokenizer.py +2 -0
- helm/benchmark/metrics/aci_bench_metrics.py +0 -14
- helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
- helm/benchmark/metrics/dischargeme_metrics.py +0 -14
- helm/benchmark/metrics/med_dialog_metrics.py +0 -14
- helm/benchmark/metrics/medalign_metrics.py +0 -14
- helm/benchmark/metrics/medi_qa_metrics.py +0 -14
- helm/benchmark/metrics/medication_qa_metrics.py +0 -14
- helm/benchmark/metrics/mental_health_metrics.py +0 -14
- helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
- helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
- helm/benchmark/metrics/numeracy_metrics.py +0 -72
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
- helm/benchmark/metrics/test_numeracy_metrics.py +0 -95
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +0 -103
- helm/benchmark/scenarios/numeracy_scenario.py +0 -794
- helm/benchmark/static_build/assets/index-94295e78.js +0 -10
- helm/benchmark/static_build/assets/index-b9779128.css +0 -1
- helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
- helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
- helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.6.dist-info → crfm_helm-0.5.10.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
- /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
- /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
- /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
- /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
- /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -278,7 +278,7 @@ models:
|
|
|
278
278
|
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
279
|
- name: amazon/nova-premier-v1:0
|
|
280
280
|
display_name: Amazon Nova Premier
|
|
281
|
-
description: Amazon Nova Premier is
|
|
281
|
+
description: Amazon Nova Premier is a capable multimodal foundation model and teacher for model distillation that processes text, images, and videos with a one-million token context window. ([model card](https://www.amazon.science/publications/amazon-nova-premier-technical-report-and-model-card), [blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
|
|
282
282
|
creator_organization_name: Amazon
|
|
283
283
|
access: limited
|
|
284
284
|
release_date: 2025-04-30
|
|
@@ -286,7 +286,7 @@ models:
|
|
|
286
286
|
|
|
287
287
|
- name: amazon/nova-pro-v1:0
|
|
288
288
|
display_name: Amazon Nova Pro
|
|
289
|
-
description: Amazon Nova Pro
|
|
289
|
+
description: Amazon Nova Pro is a highly capable multimodal model that balances of accuracy, speed, and cost for a wide range of tasks ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
290
290
|
creator_organization_name: Amazon
|
|
291
291
|
access: limited
|
|
292
292
|
release_date: 2024-12-03
|
|
@@ -294,7 +294,7 @@ models:
|
|
|
294
294
|
|
|
295
295
|
- name: amazon/nova-lite-v1:0
|
|
296
296
|
display_name: Amazon Nova Lite
|
|
297
|
-
description: Amazon Nova Lite
|
|
297
|
+
description: Amazon Nova Lite is a low-cost multimodal model that is fast for processing images, video, documents and text. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
298
298
|
creator_organization_name: Amazon
|
|
299
299
|
access: limited
|
|
300
300
|
release_date: 2024-12-03
|
|
@@ -302,7 +302,7 @@ models:
|
|
|
302
302
|
|
|
303
303
|
- name: amazon/nova-micro-v1:0
|
|
304
304
|
display_name: Amazon Nova Micro
|
|
305
|
-
description: Amazon Nova Micro
|
|
305
|
+
description: Amazon Nova Micro is a text-only model that delivers low-latency responses at low cost. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
306
306
|
creator_organization_name: Amazon
|
|
307
307
|
access: limited
|
|
308
308
|
release_date: 2024-12-03
|
|
@@ -555,6 +555,14 @@ models:
|
|
|
555
555
|
release_date: 2025-05-14
|
|
556
556
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
557
557
|
|
|
558
|
+
- name: anthropic/claude-sonnet-4-5-20250929
|
|
559
|
+
display_name: Claude 4.5 Sonnet (20250929)
|
|
560
|
+
description: Claude 4.5 Sonnet is a model from Anthropic that shows particular strengths in software coding, in agentic tasks where it runs in a loop and uses tools, and in using computers. ([blog](https://www.anthropic.com/news/claude-sonnet-4-5), [system card](https://assets.anthropic.com/m/12f214efcc2f457a/original/Claude-Sonnet-4-5-System-Card.pdf))
|
|
561
|
+
creator_organization_name: Anthropic
|
|
562
|
+
access: limited
|
|
563
|
+
release_date: 2025-09-29
|
|
564
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
565
|
+
|
|
558
566
|
- name: anthropic/stanford-online-all-v4-s3
|
|
559
567
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
560
568
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -946,6 +954,24 @@ models:
|
|
|
946
954
|
release_date: 2025-01-20
|
|
947
955
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
948
956
|
|
|
957
|
+
- name: deepseek-ai/deepseek-r1-distill-llama-70b
|
|
958
|
+
display_name: DeepSeek-R1-Distill-Llama-70B
|
|
959
|
+
description: DeepSeek-R1-Distill-Llama-70B is a fine-tuned open-source models based on Llama-3.3-70B-Instruct using samples generated by DeepSeek-R1.
|
|
960
|
+
creator_organization_name: DeepSeek
|
|
961
|
+
access: open
|
|
962
|
+
num_parameters: 70600000000
|
|
963
|
+
release_date: 2025-01-20
|
|
964
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
965
|
+
|
|
966
|
+
- name: deepseek-ai/deepseek-r1-distill-qwen-14b
|
|
967
|
+
display_name: DeepSeek-R1-Distill-Qwen-14B
|
|
968
|
+
description: DeepSeek-R1-Distill-Qwen-14B is a fine-tuned open-source models based on Qwen2.5-14B using samples generated by DeepSeek-R1.
|
|
969
|
+
creator_organization_name: DeepSeek
|
|
970
|
+
access: open
|
|
971
|
+
num_parameters: 14800000000
|
|
972
|
+
release_date: 2025-01-20
|
|
973
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
974
|
+
|
|
949
975
|
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
950
976
|
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
951
977
|
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
@@ -1207,7 +1233,7 @@ models:
|
|
|
1207
1233
|
|
|
1208
1234
|
- name: google/gemini-2.0-flash-001
|
|
1209
1235
|
display_name: Gemini 2.0 Flash
|
|
1210
|
-
description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1236
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1211
1237
|
creator_organization_name: Google
|
|
1212
1238
|
access: limited
|
|
1213
1239
|
release_date: 2025-02-01
|
|
@@ -1215,7 +1241,7 @@ models:
|
|
|
1215
1241
|
|
|
1216
1242
|
- name: google/gemini-2.0-flash-lite-preview-02-05
|
|
1217
1243
|
display_name: Gemini 2.0 Flash Lite (02-05 preview)
|
|
1218
|
-
description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1244
|
+
description: Gemini 2.0 Flash Lite (02-05 preview) ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1219
1245
|
creator_organization_name: Google
|
|
1220
1246
|
access: limited
|
|
1221
1247
|
release_date: 2025-02-05
|
|
@@ -1223,7 +1249,7 @@ models:
|
|
|
1223
1249
|
|
|
1224
1250
|
- name: google/gemini-2.0-flash-lite-001
|
|
1225
1251
|
display_name: Gemini 2.0 Flash Lite
|
|
1226
|
-
description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1252
|
+
description: Gemini 2.0 Flash Lite is the fastest and most cost efficient Flash model in the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1227
1253
|
creator_organization_name: Google
|
|
1228
1254
|
access: limited
|
|
1229
1255
|
release_date: 2025-03-25
|
|
@@ -1253,6 +1279,14 @@ models:
|
|
|
1253
1279
|
release_date: 2025-06-17
|
|
1254
1280
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1255
1281
|
|
|
1282
|
+
- name: google/gemini-2.5-flash-lite
|
|
1283
|
+
display_name: Gemini 2.5 Flash-Lite
|
|
1284
|
+
description: Gemini 2.5 Flash-Lite ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
|
|
1285
|
+
creator_organization_name: Google
|
|
1286
|
+
access: limited
|
|
1287
|
+
release_date: 2025-07-22
|
|
1288
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1289
|
+
|
|
1256
1290
|
- name: google/gemini-2.5-flash-preview-04-17
|
|
1257
1291
|
display_name: Gemini 2.5 Flash (04-17 preview)
|
|
1258
1292
|
description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
@@ -1372,6 +1406,14 @@ models:
|
|
|
1372
1406
|
access: open
|
|
1373
1407
|
release_date: 2024-06-27
|
|
1374
1408
|
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1409
|
+
|
|
1410
|
+
- name: google/medgemma-4b-it
|
|
1411
|
+
display_name: MedGemma (4B)
|
|
1412
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
1413
|
+
creator_organization_name: Google
|
|
1414
|
+
access: open
|
|
1415
|
+
release_date: 2025-05-20
|
|
1416
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1375
1417
|
|
|
1376
1418
|
- name: google/paligemma-3b-mix-224
|
|
1377
1419
|
display_name: PaliGemma (3B) Mix 224
|
|
@@ -2573,6 +2615,14 @@ models:
|
|
|
2573
2615
|
release_date: 2025-05-07
|
|
2574
2616
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2575
2617
|
|
|
2618
|
+
- name: mistralai/mistral-medium-3.1
|
|
2619
|
+
display_name: Mistral Medium 3.1
|
|
2620
|
+
description: Mistral Medium 3.1 is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
|
|
2621
|
+
creator_organization_name: Mistral AI
|
|
2622
|
+
access: limited
|
|
2623
|
+
release_date: 2025-05-07
|
|
2624
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2625
|
+
|
|
2576
2626
|
- name: mistralai/mistral-large-2402
|
|
2577
2627
|
display_name: Mistral Large (2402)
|
|
2578
2628
|
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -2624,6 +2674,15 @@ models:
|
|
|
2624
2674
|
release_date: 2024-11-18
|
|
2625
2675
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2626
2676
|
|
|
2677
|
+
# Moonshot AI
|
|
2678
|
+
- name: moonshotai/kimi-k2-instruct
|
|
2679
|
+
display_name: Kimi K2 Instruct
|
|
2680
|
+
description: Kimi K2 Instruct is a mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters trained with the Muon optimizer on 15.5T tokens. ([blog](https://moonshotai.github.io/Kimi-K2/))
|
|
2681
|
+
creator_organization_name: Moonshot AI
|
|
2682
|
+
access: open
|
|
2683
|
+
num_parameters: 1029173256720
|
|
2684
|
+
release_date: 2024-07-14 # Blog post has no date, so use the date from this news article https://www.cnbc.com/2025/07/14/alibaba-backed-moonshot-releases-kimi-k2-ai-rivaling-chatgpt-claude.html
|
|
2685
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2627
2686
|
|
|
2628
2687
|
# MosaicML
|
|
2629
2688
|
- name: mosaicml/mpt-7b
|
|
@@ -3043,6 +3102,30 @@ models:
|
|
|
3043
3102
|
release_date: 2025-04-14
|
|
3044
3103
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3045
3104
|
|
|
3105
|
+
- name: openai/gpt-5-2025-08-07
|
|
3106
|
+
display_name: GPT-5 (2025-08-07)
|
|
3107
|
+
description: GPT-5 (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
|
|
3108
|
+
creator_organization_name: OpenAI
|
|
3109
|
+
access: limited
|
|
3110
|
+
release_date: 2025-08-07
|
|
3111
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3112
|
+
|
|
3113
|
+
- name: openai/gpt-5-mini-2025-08-07
|
|
3114
|
+
display_name: GPT-5 mini (2025-08-07)
|
|
3115
|
+
description: GPT-5 mini (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
|
|
3116
|
+
creator_organization_name: OpenAI
|
|
3117
|
+
access: limited
|
|
3118
|
+
release_date: 2025-08-07
|
|
3119
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3120
|
+
|
|
3121
|
+
- name: openai/gpt-5-nano-2025-08-07
|
|
3122
|
+
display_name: GPT-5 nano (2025-08-07)
|
|
3123
|
+
description: GPT-5 nano (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
|
|
3124
|
+
creator_organization_name: OpenAI
|
|
3125
|
+
access: limited
|
|
3126
|
+
release_date: 2025-08-07
|
|
3127
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3128
|
+
|
|
3046
3129
|
- name: openai/whisper-1_gpt-4o-2024-11-20
|
|
3047
3130
|
display_name: Whisper-1 + GPT-4o (2024-11-20)
|
|
3048
3131
|
description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
|
|
@@ -3256,6 +3339,31 @@ models:
|
|
|
3256
3339
|
release_date: 2025-04-16
|
|
3257
3340
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3258
3341
|
|
|
3342
|
+
- name: openai/o3-pro-2025-06-10-high-reasoning-effort
|
|
3343
|
+
display_name: o3-pro (2025-06-10, high reasoning effort)
|
|
3344
|
+
description: o3-pro is an o-series model designed to think longer and provide the most reliable responses. ([blog post](https://help.openai.com/en/articles/9624314-model-release-notes))
|
|
3345
|
+
creator_organization_name: OpenAI
|
|
3346
|
+
access: limited
|
|
3347
|
+
release_date: 2025-06-10
|
|
3348
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3349
|
+
|
|
3350
|
+
## GPT-OSS
|
|
3351
|
+
- name: openai/gpt-oss-20b
|
|
3352
|
+
display_name: gpt-oss-20b
|
|
3353
|
+
description: gpt-oss-20b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 3.6B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
|
|
3354
|
+
creator_organization_name: OpenAI
|
|
3355
|
+
access: open
|
|
3356
|
+
release_date: 2025-08-05
|
|
3357
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3358
|
+
|
|
3359
|
+
- name: openai/gpt-oss-120b
|
|
3360
|
+
display_name: gpt-oss-120b
|
|
3361
|
+
description: gpt-oss-120b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 5.1B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
|
|
3362
|
+
creator_organization_name: OpenAI
|
|
3363
|
+
access: open
|
|
3364
|
+
release_date: 2025-08-05
|
|
3365
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3366
|
+
|
|
3259
3367
|
## Codex Models
|
|
3260
3368
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
3261
3369
|
|
|
@@ -3532,6 +3640,22 @@ models:
|
|
|
3532
3640
|
release_date: 2025-04-29
|
|
3533
3641
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3534
3642
|
|
|
3643
|
+
- name: qwen/qwen3-next-80b-a3b-thinking
|
|
3644
|
+
display_name: Qwen3-Next 80B A3B Thinking
|
|
3645
|
+
description: Qwen3-Next is a new model architecture for improving training and inference efficiency under long-context and large-parameter settings. Compared to the MoE structure of Qwen3, Qwen3-Next introduces a hybrid attention mechanism, a highly sparse Mixture-of-Experts (MoE) structure, training-stability-friendly optimizations, and a multi-token prediction mechanism for faster inference. ([blog](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list))
|
|
3646
|
+
creator_organization_name: Qwen
|
|
3647
|
+
access: open
|
|
3648
|
+
release_date: 2025-07-21 # https://x.com/Alibaba_Qwen/status/1947344511988076547
|
|
3649
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3650
|
+
|
|
3651
|
+
- name: qwen/qwen3-235b-a22b-instruct-2507-fp8
|
|
3652
|
+
display_name: Qwen3 235B A22B Instruct 2507 FP8
|
|
3653
|
+
description: Qwen3 235B A22B Instruct 2507 FP8 is an updated version of the non-thinking mode of Qwen3 235B A22B FP8.
|
|
3654
|
+
creator_organization_name: Qwen
|
|
3655
|
+
access: open
|
|
3656
|
+
release_date: 2025-07-21 # https://x.com/Alibaba_Qwen/status/1947344511988076547
|
|
3657
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3658
|
+
|
|
3535
3659
|
- name: qwen/qwq-32b-preview
|
|
3536
3660
|
display_name: QwQ (32B Preview)
|
|
3537
3661
|
description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
|
|
@@ -3875,7 +3999,190 @@ models:
|
|
|
3875
3999
|
release_date: 2023-05-25
|
|
3876
4000
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
3877
4001
|
|
|
4002
|
+
- name: tiiuae/falcon3-1b-instruct
|
|
4003
|
+
display_name: Falcon3-1B-Instruct
|
|
4004
|
+
description: Falcon3-1B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4005
|
+
creator_organization_name: TII UAE
|
|
4006
|
+
access: open
|
|
4007
|
+
num_parameters: 1670000000
|
|
4008
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4009
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4010
|
+
|
|
4011
|
+
- name: tiiuae/falcon3-3b-instruct
|
|
4012
|
+
display_name: Falcon3-3B-Instruct
|
|
4013
|
+
description: Falcon3-3B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4014
|
+
creator_organization_name: TII UAE
|
|
4015
|
+
access: open
|
|
4016
|
+
num_parameters: 3230000000
|
|
4017
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4018
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3878
4019
|
|
|
4020
|
+
- name: tiiuae/falcon3-7b-instruct
|
|
4021
|
+
display_name: Falcon3-7B-Instruct
|
|
4022
|
+
description: Falcon3-7B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4023
|
+
creator_organization_name: TII UAE
|
|
4024
|
+
access: open
|
|
4025
|
+
num_parameters: 7460000000
|
|
4026
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4027
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4028
|
+
|
|
4029
|
+
- name: tiiuae/falcon3-10b-instruct
|
|
4030
|
+
display_name: Falcon3-10B-Instruct
|
|
4031
|
+
description: Falcon3-10B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4032
|
+
creator_organization_name: TII UAE
|
|
4033
|
+
access: open
|
|
4034
|
+
num_parameters: 10300000000
|
|
4035
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4036
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4037
|
+
|
|
4038
|
+
# AceGPT-v2
|
|
4039
|
+
- name: freedomintelligence/acegpt-v2-8b-chat
|
|
4040
|
+
display_name: AceGPT-v2-8B-Chat
|
|
4041
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-8B-Chat is based on Meta-Llama-3-8B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4042
|
+
creator_organization_name: FreedomAI
|
|
4043
|
+
access: open
|
|
4044
|
+
num_parameters: 8030000000
|
|
4045
|
+
release_date: 2024-10-20
|
|
4046
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4047
|
+
|
|
4048
|
+
- name: freedomintelligence/acegpt-v2-32b-chat
|
|
4049
|
+
display_name: AceGPT-v2-32B-Chat
|
|
4050
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-32B-Chat is based on Qwen1.5-32B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4051
|
+
creator_organization_name: FreedomAI
|
|
4052
|
+
access: open
|
|
4053
|
+
num_parameters: 32500000000
|
|
4054
|
+
release_date: 2024-10-20
|
|
4055
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4056
|
+
|
|
4057
|
+
- name: freedomintelligence/acegpt-v2-70b-chat
|
|
4058
|
+
display_name: AceGPT-v2-70B-Chat
|
|
4059
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-70B-Chat is based on Meta-Llama-3-70B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4060
|
+
creator_organization_name: FreedomAI
|
|
4061
|
+
access: open
|
|
4062
|
+
num_parameters: 70600000000
|
|
4063
|
+
release_date: 2024-10-20
|
|
4064
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4065
|
+
|
|
4066
|
+
# ALLaM
|
|
4067
|
+
- name: allam-ai/allam-7b-instruct-preview
|
|
4068
|
+
display_name: ALLaM-7B-Instruct-preview
|
|
4069
|
+
description: ALLaM-7B-Instruct-preview is a model designed to advance Arabic language technology, which used a recipe of training on 4T English tokens followed by training on 1.2T mixed Arabic/English tokens. ([paper](https://arxiv.org/abs/2407.15390v1))
|
|
4070
|
+
creator_organization_name: NCAI & SDAIA
|
|
4071
|
+
access: open
|
|
4072
|
+
num_parameters: 7000000000
|
|
4073
|
+
release_date: 2024-07-22
|
|
4074
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4075
|
+
|
|
4076
|
+
# SILMA
|
|
4077
|
+
- name: silma-ai/silma-9b-instruct-v1.0
|
|
4078
|
+
display_name: SILMA 9B
|
|
4079
|
+
description: SILMA 9B is a compact Arabic language model based on Google Gemma. ([model card](https://huggingface.co/silma-ai/SILMA-9B-Instruct-v1.0))
|
|
4080
|
+
creator_organization_name: SILMA AI
|
|
4081
|
+
access: open
|
|
4082
|
+
num_parameters: 9240000000
|
|
4083
|
+
release_date: 2024-08-17
|
|
4084
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4085
|
+
|
|
4086
|
+
# Jais Family
|
|
4087
|
+
|
|
4088
|
+
- name: inceptionai/jais-family-590m-chat
|
|
4089
|
+
display_name: Jais-family-590m-chat
|
|
4090
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4091
|
+
creator_organization_name: Inception
|
|
4092
|
+
access: open
|
|
4093
|
+
num_parameters: 771000000
|
|
4094
|
+
release_date: 2023-08-30
|
|
4095
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4096
|
+
|
|
4097
|
+
- name: inceptionai/jais-family-1p3b-chat
|
|
4098
|
+
display_name: Jais-family-1p3b-chat
|
|
4099
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4100
|
+
creator_organization_name: Inception
|
|
4101
|
+
access: open
|
|
4102
|
+
num_parameters: 1560000000
|
|
4103
|
+
release_date: 2023-08-30
|
|
4104
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4105
|
+
|
|
4106
|
+
- name: inceptionai/jais-family-2p7b-chat
|
|
4107
|
+
display_name: Jais-family-2p7b-chat
|
|
4108
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4109
|
+
creator_organization_name: Inception
|
|
4110
|
+
access: open
|
|
4111
|
+
num_parameters: 2950000000
|
|
4112
|
+
release_date: 2023-08-30
|
|
4113
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4114
|
+
|
|
4115
|
+
- name: inceptionai/jais-family-6p7b-chat
|
|
4116
|
+
display_name: Jais-family-6p7b-chat
|
|
4117
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4118
|
+
creator_organization_name: Inception
|
|
4119
|
+
access: open
|
|
4120
|
+
num_parameters: 7140000000
|
|
4121
|
+
release_date: 2023-08-30
|
|
4122
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4123
|
+
|
|
4124
|
+
- name: inceptionai/jais-family-6p7b-chat
|
|
4125
|
+
display_name: Jais-family-6p7b-chat
|
|
4126
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4127
|
+
creator_organization_name: Inception
|
|
4128
|
+
access: open
|
|
4129
|
+
num_parameters: 7140000000
|
|
4130
|
+
release_date: 2023-08-30
|
|
4131
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4132
|
+
|
|
4133
|
+
- name: inceptionai/jais-family-13b-chat
|
|
4134
|
+
display_name: Jais-family-13b-chat
|
|
4135
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4136
|
+
creator_organization_name: Inception
|
|
4137
|
+
access: open
|
|
4138
|
+
num_parameters: 13500000000
|
|
4139
|
+
release_date: 2023-08-30
|
|
4140
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4141
|
+
|
|
4142
|
+
- name: inceptionai/jais-family-30b-8k-chat
|
|
4143
|
+
display_name: Jais-family-30b-8k-chat
|
|
4144
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4145
|
+
creator_organization_name: Inception
|
|
4146
|
+
access: open
|
|
4147
|
+
num_parameters: 30800000000
|
|
4148
|
+
release_date: 2023-08-30
|
|
4149
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4150
|
+
|
|
4151
|
+
- name: inceptionai/jais-family-30b-16k-chat
|
|
4152
|
+
display_name: Jais-family-30b-16k-chat
|
|
4153
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4154
|
+
creator_organization_name: Inception
|
|
4155
|
+
access: open
|
|
4156
|
+
num_parameters: 30800000000
|
|
4157
|
+
release_date: 2023-08-30
|
|
4158
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4159
|
+
|
|
4160
|
+
- name: inceptionai/jais-adapted-7b-chat
|
|
4161
|
+
display_name: Jais-adapted-7b-chat
|
|
4162
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4163
|
+
creator_organization_name: Inception
|
|
4164
|
+
access: open
|
|
4165
|
+
num_parameters: 7000000000
|
|
4166
|
+
release_date: 2023-08-30
|
|
4167
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4168
|
+
|
|
4169
|
+
- name: inceptionai/jais-adapted-13b-chat
|
|
4170
|
+
display_name: Jais-adapted-13b-chat
|
|
4171
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4172
|
+
creator_organization_name: Inception
|
|
4173
|
+
access: open
|
|
4174
|
+
num_parameters: 13300000000
|
|
4175
|
+
release_date: 2023-08-30
|
|
4176
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4177
|
+
|
|
4178
|
+
- name: inceptionai/jais-adapted-70b-chat
|
|
4179
|
+
display_name: Jais-adapted-70b-chat
|
|
4180
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4181
|
+
creator_organization_name: Inception
|
|
4182
|
+
access: open
|
|
4183
|
+
num_parameters: 69500000000
|
|
4184
|
+
release_date: 2023-08-30
|
|
4185
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3879
4186
|
|
|
3880
4187
|
# Together
|
|
3881
4188
|
- name: together/gpt-jt-6b-v1
|
|
@@ -4108,7 +4415,15 @@ models:
|
|
|
4108
4415
|
description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
|
|
4109
4416
|
creator_organization_name: Writer
|
|
4110
4417
|
access: limited
|
|
4111
|
-
release_date:
|
|
4418
|
+
release_date: 2025-04-28
|
|
4419
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4420
|
+
|
|
4421
|
+
- name: writer/palmyra-x5-v1-bedrock
|
|
4422
|
+
display_name: Palmyra X5 (Bedrock)
|
|
4423
|
+
description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/)) This is the model verison that is hosted on Bedrock. ([blog](https://aws.amazon.com/blogs/aws/writer-palmyra-x5-and-x4-foundation-models-are-now-available-in-amazon-bedrock/))
|
|
4424
|
+
creator_organization_name: Writer
|
|
4425
|
+
access: limited
|
|
4426
|
+
release_date: 2025-04-28
|
|
4112
4427
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4113
4428
|
|
|
4114
4429
|
- name: writer/palmyra-med-32k
|
|
@@ -4163,6 +4478,14 @@ models:
|
|
|
4163
4478
|
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
4164
4479
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4165
4480
|
|
|
4481
|
+
- name: xai/grok-4-0709
|
|
4482
|
+
display_name: Grok 4 (0709)
|
|
4483
|
+
description: Grok 4 (0709) is a model that includes native tool use and real-time search integration. ([blog](https://x.ai/news/grok-4))
|
|
4484
|
+
creator_organization_name: xAI
|
|
4485
|
+
access: limited
|
|
4486
|
+
release_date: 2025-07-09
|
|
4487
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4488
|
+
|
|
4166
4489
|
# Yandex
|
|
4167
4490
|
- name: yandex/yalm
|
|
4168
4491
|
display_name: YaLM (100B)
|
|
@@ -4266,6 +4589,42 @@ models:
|
|
|
4266
4589
|
release_date: 2023-11-08
|
|
4267
4590
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4268
4591
|
|
|
4592
|
+
- name: maritaca-ai/sabiazinho-3
|
|
4593
|
+
display_name: Sabiazinho 3
|
|
4594
|
+
description: Sabiazinho-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
|
|
4595
|
+
creator_organization_name: Maritaca AI
|
|
4596
|
+
access: limited
|
|
4597
|
+
release_date: 2025-02-06
|
|
4598
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4599
|
+
|
|
4600
|
+
- name: maritaca-ai/sabia-3
|
|
4601
|
+
display_name: Sabía 3
|
|
4602
|
+
description: Sabiá-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
|
|
4603
|
+
creator_organization_name: Maritaca AI
|
|
4604
|
+
access: limited
|
|
4605
|
+
release_date: 2024-12-11
|
|
4606
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4607
|
+
|
|
4608
|
+
- name: maritaca-ai/sabia-3.1-2025-05-08
|
|
4609
|
+
display_name: Sabía 3.1
|
|
4610
|
+
description: Sabiá-3.1 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to August 2024.
|
|
4611
|
+
creator_organization_name: Maritaca AI
|
|
4612
|
+
access: limited
|
|
4613
|
+
release_date: 2025-05-08
|
|
4614
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4615
|
+
|
|
4616
|
+
# Z.ai
|
|
4617
|
+
|
|
4618
|
+
- name: zai-org/glm-4.5-air-fp8
|
|
4619
|
+
display_name: GLM-4.5-Air-FP8
|
|
4620
|
+
description: GLM-4.5-Air-FP8 is a hybrid reasoning model designed to unify reasoning, coding, and agentic capabilities into a single model. It has 106 billion total parameters and 12 billion active parameters. The thinking mode is enabled by default. ([blog](https://z.ai/blog/glm-4.5))
|
|
4621
|
+
creator_organization_name: Z.ai
|
|
4622
|
+
access: open
|
|
4623
|
+
num_parameters: 110000000000
|
|
4624
|
+
release_date: 2025-07-28
|
|
4625
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4626
|
+
|
|
4627
|
+
|
|
4269
4628
|
# Granite - IBM
|
|
4270
4629
|
# https://www.ibm.com/granite
|
|
4271
4630
|
# https://github.com/ibm-granite/granite-3.0-language-models
|
|
@@ -4479,21 +4838,61 @@ models:
|
|
|
4479
4838
|
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4480
4839
|
|
|
4481
4840
|
- name: ibm/granite-3.3-8b-instruct
|
|
4482
|
-
display_name: Granite 3.3 8B Instruct
|
|
4483
|
-
description: Granite 3.3 8B Instruct is
|
|
4841
|
+
display_name: IBM Granite 3.3 8B Instruct
|
|
4842
|
+
description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4484
4843
|
creator_organization_name: IBM
|
|
4485
4844
|
access: open
|
|
4486
4845
|
num_parameters: 8170000000
|
|
4487
4846
|
release_date: 2025-04-16
|
|
4488
4847
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4489
4848
|
|
|
4490
|
-
- name:
|
|
4491
|
-
display_name:
|
|
4492
|
-
description:
|
|
4493
|
-
creator_organization_name:
|
|
4494
|
-
access:
|
|
4495
|
-
|
|
4496
|
-
|
|
4849
|
+
- name: ibm/granite-3.3-8b-instruct-with-guardian
|
|
4850
|
+
display_name: IBM Granite 3.3 8B Instruct (with guardian)
|
|
4851
|
+
description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. All prompts were first evaluated for risk by [IBM Granite Guardian 3.2 5B](https://www.ibm.com/granite/docs/models/guardian/) and prompts that were deemed risky (with a risk threshold of 0.8) received the response "I'm very sorry, but I can't assist with that.". ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4852
|
+
creator_organization_name: IBM
|
|
4853
|
+
access: open
|
|
4854
|
+
num_parameters: 8170000000
|
|
4855
|
+
release_date: 2025-04-16
|
|
4856
|
+
# Unfortunately this setup used a IBM internal API endpoint that is not publicly available, so we mark it with DEPRECATED_MODEL_TAG
|
|
4857
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4858
|
+
|
|
4859
|
+
- name: ibm/granite-4.0-h-small
|
|
4860
|
+
display_name: IBM Granite 4.0 Small
|
|
4861
|
+
description: IBM Granite 4.0 Small is a hybrid model with 32B total parameters and 9B active parameters that uses the Mixture of Experts (MoE) routing strategy with Mamba-2 and Transformer-based self-attention components.
|
|
4862
|
+
creator_organization_name: IBM
|
|
4863
|
+
access: open
|
|
4864
|
+
num_parameters: 32200000000
|
|
4865
|
+
release: 2025-10-02
|
|
4866
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4867
|
+
|
|
4868
|
+
- name: ibm/granite-4.0-micro
|
|
4869
|
+
display_name: IBM Granite 4.0 Micro
|
|
4870
|
+
description: IBM Granite 4.0 Micro is a dense Transformer model with 3B total parameters that provides an alternative option for users when Mamba2 support is not yet optimized.
|
|
4871
|
+
creator_organization_name: IBM
|
|
4872
|
+
access: open
|
|
4873
|
+
num_parameters: 3400000000
|
|
4874
|
+
release: 2025-10-02
|
|
4875
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4876
|
+
|
|
4877
|
+
- name: ibm/granite-4.0-h-small-with-guardian
|
|
4878
|
+
display_name: IBM Granite 4.0 Small (with guardian)
|
|
4879
|
+
description: IBM Granite 4.0 Small is a hybrid model with 32B total parameters and 9B active parameters that uses the Mixture of Experts (MoE) routing strategy with Mamba-2 and Transformer-based self-attention components.
|
|
4880
|
+
creator_organization_name: IBM
|
|
4881
|
+
access: open
|
|
4882
|
+
num_parameters: 32200000000
|
|
4883
|
+
release: 2025-10-02
|
|
4884
|
+
# Unfortunately this setup used a IBM internal API endpoint that is not publicly available, so we mark it with DEPRECATED_MODEL_TAG
|
|
4885
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4886
|
+
|
|
4887
|
+
- name: ibm/granite-4.0-micro-with-guardian
|
|
4888
|
+
display_name: IBM Granite 4.0 Micro (with guardian)
|
|
4889
|
+
description: IBM Granite 4.0 Micro is a dense Transformer model with 3B total parameters that provides an alternative option for users when Mamba2 support is not yet optimized.
|
|
4890
|
+
creator_organization_name: IBM
|
|
4891
|
+
access: open
|
|
4892
|
+
num_parameters: 3400000000
|
|
4893
|
+
release: 2025-10-02
|
|
4894
|
+
# Unfortunately this setup used a IBM internal API endpoint that is not publicly available, so we mark it with DEPRECATED_MODEL_TAG
|
|
4895
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4497
4896
|
|
|
4498
4897
|
- name: ura-hcmut/ura-llama-2.1-8b
|
|
4499
4898
|
display_name: URA-Llama 2.1 (8B)
|
|
@@ -4682,4 +5081,189 @@ models:
|
|
|
4682
5081
|
access: open
|
|
4683
5082
|
num_parameters: 4000000000
|
|
4684
5083
|
release_date: 2024-04-02
|
|
4685
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5084
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5085
|
+
|
|
5086
|
+
- name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
|
|
5087
|
+
display_name: Gemma-3 Gaia PT-BR 4b Instruct
|
|
5088
|
+
description: Gemma-3 Gaia PT-BR 4b Instruct is a model trained by CEIA-UFG for understanding and generating Brazilian Portuguese text.
|
|
5089
|
+
creator_organization_name: CEIA-UFG
|
|
5090
|
+
access: open
|
|
5091
|
+
num_parameters: 4000000000
|
|
5092
|
+
release_date: 2025-06-01
|
|
5093
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5094
|
+
|
|
5095
|
+
- name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
|
|
5096
|
+
display_name: Bode 13B Alpaca PT-BR
|
|
5097
|
+
description: Bode is a language model (LLM) for Portuguese, based on LLaMA 2 and fine-tuned with the Alpaca dataset translated into Portuguese. Suitable for instruction, text generation, translation and tasks in Portuguese.
|
|
5098
|
+
creator_organization_name: Recogna NLP
|
|
5099
|
+
access: open
|
|
5100
|
+
num_parameters: 13000000000
|
|
5101
|
+
release_date: 2024-01-05
|
|
5102
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5103
|
+
|
|
5104
|
+
- name: 22h/cabrita_7b_pt_850000
|
|
5105
|
+
display_name: Cabrita PT-BR 7B
|
|
5106
|
+
description: Cabrita is an OpenLLaMA-based model, continuously trained in Portuguese (mC4-pt subset) for 850000 steps with efficient tokenization adapted to the language.
|
|
5107
|
+
creator_organization_name: 22h
|
|
5108
|
+
access: open
|
|
5109
|
+
num_parameters: 7000000000
|
|
5110
|
+
release_date: 2023-08-23
|
|
5111
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5112
|
+
|
|
5113
|
+
- name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
|
|
5114
|
+
display_name: Gervásio PT-BR/PT-PT 7B Decoder
|
|
5115
|
+
description: Gervásio PT* is a 7B parameter decoder model, adapted from LLaMA27B, trained for both Brazilian and European Portuguese. Fine-tuned with translated data from benchmarks such as GLUE and SuperGLUE.
|
|
5116
|
+
creator_organization_name: PORTULAN (University of Lisbon NLX)
|
|
5117
|
+
access: open
|
|
5118
|
+
num_parameters: 6740000000
|
|
5119
|
+
release_date: 2024-02-29
|
|
5120
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5121
|
+
|
|
5122
|
+
- name: TucanoBR/Tucano-2b4
|
|
5123
|
+
display_name: Tucano PT-BR 2b4
|
|
5124
|
+
description: Tucano is a series of decoder models based on LLaMA2, natively pre-trained in Portuguese using the GigaVerbo dataset (200B tokens), with the 2B model trained for 1.96M steps over 845h (515B tokens, 4 epochs).
|
|
5125
|
+
creator_organization_name: TucanoBR (University of Bonn)
|
|
5126
|
+
access: open
|
|
5127
|
+
num_parameters: 2444618240
|
|
5128
|
+
release_date: 2024-12-11
|
|
5129
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5130
|
+
|
|
5131
|
+
- name: nicholasKluge/TeenyTinyLlama-460m
|
|
5132
|
+
display_name: TeenyTinyLlama 460M PT-BR
|
|
5133
|
+
description: TeenyTinyLlama-460m is a lightweight and efficient model based on LLaMA2, trained exclusively on Brazilian Portuguese. It uses RoPE embeddings and SwiGLU activations, with a refined SentencePiece tokenizer and a low-resource optimized architecture.
|
|
5134
|
+
creator_organization_name: Nicholas Kluge.
|
|
5135
|
+
access: open
|
|
5136
|
+
num_parameters: 460000000
|
|
5137
|
+
release_date: 2024-01-30
|
|
5138
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5139
|
+
|
|
5140
|
+
# DSPy Models (EXPERIMENTAL)
|
|
5141
|
+
# The following model configurations use the DSPyClient for inference with DSPy modules.
|
|
5142
|
+
|
|
5143
|
+
- name: anthropic/claude-3-7-sonnet-20250219-dspy-zs-predict
|
|
5144
|
+
display_name: Claude 3.7 Sonnet (20250219) (DSPy Zero-Shot Predict)
|
|
5145
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
|
|
5146
|
+
creator_organization_name: Anthropic
|
|
5147
|
+
access: limited
|
|
5148
|
+
release_date: 2025-02-24
|
|
5149
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5150
|
+
|
|
5151
|
+
- name: google/gemini-2.0-flash-001-dspy-zs-predict
|
|
5152
|
+
display_name: Gemini 2.0 Flash (DSPy Zero-Shot Predict)
|
|
5153
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
5154
|
+
creator_organization_name: Google
|
|
5155
|
+
access: limited
|
|
5156
|
+
release_date: 2025-02-01
|
|
5157
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5158
|
+
|
|
5159
|
+
- name: openai/gpt-4o-2024-05-13-dspy-zs-predict
|
|
5160
|
+
display_name: GPT-4o (2024-05-13) (DSPy Zero-Shot Predict)
|
|
5161
|
+
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
|
|
5162
|
+
creator_organization_name: OpenAI
|
|
5163
|
+
access: limited
|
|
5164
|
+
release_date: 2024-04-09
|
|
5165
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5166
|
+
|
|
5167
|
+
- name: openai/o3-mini-2025-01-31-dspy-zs-predict
|
|
5168
|
+
display_name: o3-mini (2025-01-31) (DSPy Zero-Shot Predict)
|
|
5169
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
|
|
5170
|
+
creator_organization_name: OpenAI
|
|
5171
|
+
access: limited
|
|
5172
|
+
release_date: 2025-01-31
|
|
5173
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5174
|
+
|
|
5175
|
+
- name: anthropic/claude-3-7-sonnet-20250219-dspy-zs-cot
|
|
5176
|
+
display_name: Claude 3.7 Sonnet (20250219) (DSPy Zero-Shot ChainOfThought)
|
|
5177
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
|
|
5178
|
+
creator_organization_name: Anthropic
|
|
5179
|
+
access: limited
|
|
5180
|
+
release_date: 2025-02-24
|
|
5181
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5182
|
+
|
|
5183
|
+
- name: google/gemini-2.0-flash-001-dspy-zs-cot
|
|
5184
|
+
display_name: Gemini 2.0 Flash (DSPy Zero-Shot ChainOfThought)
|
|
5185
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
5186
|
+
creator_organization_name: Google
|
|
5187
|
+
access: limited
|
|
5188
|
+
release_date: 2025-02-01
|
|
5189
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5190
|
+
|
|
5191
|
+
- name: openai/gpt-4o-2024-05-13-dspy-zs-cot
|
|
5192
|
+
display_name: GPT-4o (2024-05-13) (DSPy Zero-Shot ChainOfThought)
|
|
5193
|
+
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
|
|
5194
|
+
creator_organization_name: OpenAI
|
|
5195
|
+
access: limited
|
|
5196
|
+
release_date: 2024-04-09
|
|
5197
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5198
|
+
|
|
5199
|
+
- name: openai/o3-mini-2025-01-31-dspy-zs-cot
|
|
5200
|
+
display_name: o3-mini (2025-01-31) (DSPy Zero-Shot ChainOfThought)
|
|
5201
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
|
|
5202
|
+
creator_organization_name: OpenAI
|
|
5203
|
+
access: limited
|
|
5204
|
+
release_date: 2025-01-31
|
|
5205
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5206
|
+
|
|
5207
|
+
- name: anthropic/claude-3-7-sonnet-20250219-dspy-fs-bfrs
|
|
5208
|
+
display_name: Claude 3.7 Sonnet (20250219) (DSPy BootstrapFewShotWithRandomSearch)
|
|
5209
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
|
|
5210
|
+
creator_organization_name: Anthropic
|
|
5211
|
+
access: limited
|
|
5212
|
+
release_date: 2025-02-24
|
|
5213
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5214
|
+
|
|
5215
|
+
- name: google/gemini-2.0-flash-001-dspy-fs-bfrs
|
|
5216
|
+
display_name: Gemini 2.0 Flash (DSPy BootstrapFewShotWithRandomSearch)
|
|
5217
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
5218
|
+
creator_organization_name: Google
|
|
5219
|
+
access: limited
|
|
5220
|
+
release_date: 2025-02-01
|
|
5221
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5222
|
+
|
|
5223
|
+
- name: openai/gpt-4o-2024-05-13-dspy-fs-bfrs
|
|
5224
|
+
display_name: GPT-4o (2024-05-13) (DSPy BootstrapFewShotWithRandomSearch)
|
|
5225
|
+
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
|
|
5226
|
+
creator_organization_name: OpenAI
|
|
5227
|
+
access: limited
|
|
5228
|
+
release_date: 2024-04-09
|
|
5229
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5230
|
+
|
|
5231
|
+
- name: openai/o3-mini-2025-01-31-dspy-fs-bfrs
|
|
5232
|
+
display_name: o3-mini (2025-01-31) (DSPy BootstrapFewShotWithRandomSearch)
|
|
5233
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
|
|
5234
|
+
creator_organization_name: OpenAI
|
|
5235
|
+
access: limited
|
|
5236
|
+
release_date: 2025-01-31
|
|
5237
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5238
|
+
|
|
5239
|
+
- name: anthropic/claude-3-7-sonnet-20250219-dspy-fs-miprov2
|
|
5240
|
+
display_name: Claude 3.7 Sonnet (20250219) (DSPy MIPROv2)
|
|
5241
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
|
|
5242
|
+
creator_organization_name: Anthropic
|
|
5243
|
+
access: limited
|
|
5244
|
+
release_date: 2025-02-24
|
|
5245
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5246
|
+
|
|
5247
|
+
- name: google/gemini-2.0-flash-001-dspy-fs-miprov2
|
|
5248
|
+
display_name: Gemini 2.0 Flash (DSPy MIPROv2)
|
|
5249
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
5250
|
+
creator_organization_name: Google
|
|
5251
|
+
access: limited
|
|
5252
|
+
release_date: 2025-02-01
|
|
5253
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5254
|
+
|
|
5255
|
+
- name: openai/gpt-4o-2024-05-13-dspy-fs-miprov2
|
|
5256
|
+
display_name: GPT-4o (2024-05-13) (DSPy MIPROv2)
|
|
5257
|
+
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
|
|
5258
|
+
creator_organization_name: OpenAI
|
|
5259
|
+
access: limited
|
|
5260
|
+
release_date: 2024-04-09
|
|
5261
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
5262
|
+
|
|
5263
|
+
- name: openai/o3-mini-2025-01-31-dspy-fs-miprov2
|
|
5264
|
+
display_name: o3-mini (2025-01-31) (DSPy MIPROv2)
|
|
5265
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
|
|
5266
|
+
creator_organization_name: OpenAI
|
|
5267
|
+
access: limited
|
|
5268
|
+
release_date: 2025-01-31
|
|
5269
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|