crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +32 -31
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/air_bench_annotator.py +64 -0
- helm/benchmark/annotation/annotator_factory.py +6 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
- helm/benchmark/annotation/call_center_annotator.py +247 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +68 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +71 -0
- helm/benchmark/annotation/medication_qa_annotator.py +68 -0
- helm/benchmark/annotation/model_as_judge.py +45 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
- helm/benchmark/annotation/xstest_annotator.py +110 -0
- helm/benchmark/augmentations/translate_perturbation.py +1 -0
- helm/benchmark/huggingface_registration.py +16 -6
- helm/benchmark/metrics/air_bench_metrics.py +56 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/fin_qa_metrics.py +60 -0
- helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
- helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
- helm/benchmark/metrics/live_qa_metrics.py +23 -0
- helm/benchmark/metrics/medication_qa_metrics.py +23 -0
- helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
- helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
- helm/benchmark/metrics/safety_metrics.py +57 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +20 -10
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/schema.py +54 -4
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/presentation/test_schema.py +11 -0
- helm/benchmark/run.py +31 -2
- helm/benchmark/run_expander.py +113 -10
- helm/benchmark/run_spec_factory.py +4 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
- helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/classic_run_specs.py +15 -11
- helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
- helm/benchmark/run_specs/experimental_run_specs.py +85 -0
- helm/benchmark/run_specs/finance_run_specs.py +110 -0
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +251 -57
- helm/benchmark/scenarios/air_bench_scenario.py +50 -0
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +3149 -0
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_classic.yaml +3 -59
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +189 -0
- helm/benchmark/static/schema_image2struct.yaml +588 -0
- helm/benchmark/static/schema_instruction_following.yaml +3 -52
- helm/benchmark/static/schema_lite.yaml +3 -61
- helm/benchmark/static/schema_medical.yaml +255 -0
- helm/benchmark/static/schema_mmlu.yaml +3 -61
- helm/benchmark/static/schema_safety.yaml +247 -0
- helm/benchmark/static/schema_tables.yaml +317 -0
- helm/benchmark/static/schema_thai.yaml +244 -0
- helm/benchmark/static/schema_unitxt.yaml +3 -61
- helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
- helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
- helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
- helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +50 -28
- helm/clients/auto_client.py +11 -0
- helm/clients/client.py +24 -7
- helm/clients/cohere_client.py +98 -3
- helm/clients/huggingface_client.py +79 -19
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +11 -5
- helm/clients/palmyra_client.py +25 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/reka_client.py +189 -0
- helm/clients/test_client.py +7 -9
- helm/clients/test_huggingface_client.py +19 -3
- helm/clients/test_together_client.py +72 -2
- helm/clients/together_client.py +129 -23
- helm/clients/vertexai_client.py +62 -18
- helm/clients/vision_language/huggingface_vlm_client.py +1 -0
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/paligemma_client.py +146 -0
- helm/clients/vision_language/palmyra_vision_client.py +99 -0
- helm/clients/yi_client.py +31 -0
- helm/common/critique_request.py +10 -1
- helm/common/images_utils.py +25 -0
- helm/common/mongo_key_value_store.py +2 -1
- helm/common/request.py +16 -0
- helm/config/model_deployments.yaml +740 -363
- helm/config/model_metadata.yaml +824 -128
- helm/config/tokenizer_configs.yaml +207 -10
- helm/proxy/critique/model_critique_client.py +32 -4
- helm/proxy/example_queries.py +14 -21
- helm/proxy/services/server_service.py +2 -3
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/auto_tokenizer.py +1 -1
- helm/tokenizers/cohere_tokenizer.py +29 -62
- helm/tokenizers/huggingface_tokenizer.py +35 -13
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/tokenizers/test_cohere_tokenizer.py +39 -0
- helm/tokenizers/test_huggingface_tokenizer.py +5 -1
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/schema_image2structure.yaml +0 -304
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
- helm/benchmark/static_build/assets/index-878a1094.css +0 -1
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -31,50 +31,41 @@ models:
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
# AI21 Labs
|
|
34
|
-
- name: ai21/j1-jumbo
|
|
34
|
+
- name: ai21/j1-jumbo
|
|
35
35
|
display_name: J1-Jumbo v1 (178B)
|
|
36
36
|
description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
37
37
|
creator_organization_name: AI21 Labs
|
|
38
38
|
access: limited
|
|
39
39
|
num_parameters: 178000000000
|
|
40
40
|
release_date: 2021-08-11
|
|
41
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
41
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
42
42
|
|
|
43
|
-
- name: ai21/j1-large
|
|
43
|
+
- name: ai21/j1-large
|
|
44
44
|
display_name: J1-Large v1 (7.5B)
|
|
45
45
|
description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
46
46
|
creator_organization_name: AI21 Labs
|
|
47
47
|
access: limited
|
|
48
48
|
num_parameters: 7500000000
|
|
49
49
|
release_date: 2021-08-11
|
|
50
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
50
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
51
51
|
|
|
52
|
-
- name: ai21/j1-grande
|
|
52
|
+
- name: ai21/j1-grande
|
|
53
53
|
display_name: J1-Grande v1 (17B)
|
|
54
54
|
description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
55
55
|
creator_organization_name: AI21 Labs
|
|
56
56
|
access: limited
|
|
57
57
|
num_parameters: 17000000000
|
|
58
58
|
release_date: 2022-05-03
|
|
59
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
59
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
60
60
|
|
|
61
|
-
- name: ai21/j1-grande-v2-beta
|
|
61
|
+
- name: ai21/j1-grande-v2-beta
|
|
62
62
|
display_name: J1-Grande v2 beta (17B)
|
|
63
63
|
description: Jurassic-1 Grande v2 beta (17B parameters)
|
|
64
64
|
creator_organization_name: AI21 Labs
|
|
65
65
|
access: limited
|
|
66
66
|
num_parameters: 17000000000
|
|
67
67
|
release_date: 2022-10-28
|
|
68
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
69
|
-
|
|
70
|
-
- name: ai21/j2-jumbo
|
|
71
|
-
display_name: Jurassic-2 Jumbo (178B)
|
|
72
|
-
description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
73
|
-
creator_organization_name: AI21 Labs
|
|
74
|
-
access: limited
|
|
75
|
-
num_parameters: 178000000000
|
|
76
|
-
release_date: 2023-03-09
|
|
77
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
68
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
78
69
|
|
|
79
70
|
- name: ai21/j2-large
|
|
80
71
|
display_name: Jurassic-2 Large (7.5B)
|
|
@@ -83,7 +74,7 @@ models:
|
|
|
83
74
|
access: limited
|
|
84
75
|
num_parameters: 7500000000
|
|
85
76
|
release_date: 2023-03-09
|
|
86
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
77
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
87
78
|
|
|
88
79
|
- name: ai21/j2-grande
|
|
89
80
|
display_name: Jurassic-2 Grande (17B)
|
|
@@ -92,13 +83,67 @@ models:
|
|
|
92
83
|
access: limited
|
|
93
84
|
num_parameters: 17000000000
|
|
94
85
|
release_date: 2023-03-09
|
|
95
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
86
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
87
|
+
|
|
88
|
+
- name: ai21/j2-jumbo
|
|
89
|
+
display_name: Jurassic-2 Jumbo (178B)
|
|
90
|
+
description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
91
|
+
creator_organization_name: AI21 Labs
|
|
92
|
+
access: limited
|
|
93
|
+
num_parameters: 178000000000
|
|
94
|
+
release_date: 2023-03-09
|
|
95
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
96
96
|
|
|
97
97
|
# TODO(1524): Change AI21 model names
|
|
98
98
|
# - j2-jumbo -> j2-ultra
|
|
99
99
|
# - j2-grande -> j2-mid
|
|
100
100
|
# - j2-large -> j2-light
|
|
101
101
|
|
|
102
|
+
- name: ai21/jamba-instruct
|
|
103
|
+
display_name: Jamba Instruct
|
|
104
|
+
description: Jamba Instruct is an instruction tuned version of Jamba, which uses a hybrid Transformer-Mamba mixture-of-experts (MoE) architecture that interleaves blocks of Transformer and Mamba layers. ([blog](https://www.ai21.com/blog/announcing-jamba-instruct))
|
|
105
|
+
creator_organization_name: AI21 Labs
|
|
106
|
+
access: limited
|
|
107
|
+
num_parameters: 52000000000
|
|
108
|
+
release_date: 2024-05-02
|
|
109
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
110
|
+
|
|
111
|
+
- name: ai21/jamba-1.5-mini
|
|
112
|
+
display_name: Jamba 1.5 Mini
|
|
113
|
+
description: Jamba 1.5 Mini is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
|
|
114
|
+
creator_organization_name: AI21 Labs
|
|
115
|
+
access: open
|
|
116
|
+
num_parameters: 51600000000
|
|
117
|
+
release_date: 2024-08-22
|
|
118
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
119
|
+
|
|
120
|
+
- name: ai21/jamba-1.5-large
|
|
121
|
+
display_name: Jamba 1.5 Large
|
|
122
|
+
description: Jamba 1.5 Large is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
|
|
123
|
+
creator_organization_name: AI21 Labs
|
|
124
|
+
access: open
|
|
125
|
+
num_parameters: 399000000000
|
|
126
|
+
release_date: 2024-08-22
|
|
127
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
128
|
+
|
|
129
|
+
# AI Singapore
|
|
130
|
+
- name: aisingapore/sea-lion-7b
|
|
131
|
+
display_name: SEA-LION (7B)
|
|
132
|
+
description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
|
|
133
|
+
creator_organization_name: AI Singapore
|
|
134
|
+
access: open
|
|
135
|
+
num_parameters: 7000000000
|
|
136
|
+
release_date: 2023-02-24
|
|
137
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
138
|
+
|
|
139
|
+
- name: aisingapore/sea-lion-7b-instruct
|
|
140
|
+
display_name: SEA-LION Instruct (7B)
|
|
141
|
+
description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
|
|
142
|
+
creator_organization_name: AI Singapore
|
|
143
|
+
access: open
|
|
144
|
+
num_parameters: 7000000000
|
|
145
|
+
release_date: 2023-02-24
|
|
146
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
102
147
|
|
|
103
148
|
|
|
104
149
|
# Aleph Alpha
|
|
@@ -253,7 +298,14 @@ models:
|
|
|
253
298
|
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
254
299
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
255
300
|
|
|
256
|
-
|
|
301
|
+
- name: anthropic/claude-3-5-sonnet-20240620
|
|
302
|
+
display_name: Claude 3.5 Sonnet (20240620)
|
|
303
|
+
description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
|
|
304
|
+
creator_organization_name: Anthropic
|
|
305
|
+
access: limited
|
|
306
|
+
release_date: 2024-06-20
|
|
307
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
308
|
+
|
|
257
309
|
- name: anthropic/stanford-online-all-v4-s3
|
|
258
310
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
259
311
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -261,7 +313,7 @@ models:
|
|
|
261
313
|
access: closed
|
|
262
314
|
num_parameters: 52000000000
|
|
263
315
|
release_date: 2021-12-01
|
|
264
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
316
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
265
317
|
|
|
266
318
|
|
|
267
319
|
|
|
@@ -327,6 +379,18 @@ models:
|
|
|
327
379
|
release_date: 2023-05-09 # ArXiv submission date
|
|
328
380
|
tags: [CODE_MODEL_TAG]
|
|
329
381
|
|
|
382
|
+
# BioMistral
|
|
383
|
+
|
|
384
|
+
- name: biomistral/biomistral-7b
|
|
385
|
+
display_name: BioMistral (7B)
|
|
386
|
+
description: BioMistral 7B is an open-source LLM tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
|
|
387
|
+
creator_organization_name: BioMistral
|
|
388
|
+
access: open
|
|
389
|
+
num_parameters: 7300000000
|
|
390
|
+
release_date: 2024-02-15
|
|
391
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
392
|
+
|
|
393
|
+
|
|
330
394
|
|
|
331
395
|
|
|
332
396
|
# Cerebras Systems
|
|
@@ -370,16 +434,16 @@ models:
|
|
|
370
434
|
access: limited
|
|
371
435
|
num_parameters: 52400000000
|
|
372
436
|
release_date: 2022-06-09
|
|
373
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
437
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
374
438
|
|
|
375
|
-
- name: cohere/large-20220720
|
|
439
|
+
- name: cohere/large-20220720
|
|
376
440
|
display_name: Cohere large v20220720 (13.1B)
|
|
377
441
|
description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
|
|
378
442
|
creator_organization_name: Cohere
|
|
379
443
|
access: limited
|
|
380
444
|
num_parameters: 13100000000
|
|
381
445
|
release_date: 2022-07-20
|
|
382
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
446
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
383
447
|
|
|
384
448
|
- name: cohere/medium-20220720
|
|
385
449
|
display_name: Cohere medium v20220720 (6.1B)
|
|
@@ -388,16 +452,16 @@ models:
|
|
|
388
452
|
access: limited
|
|
389
453
|
num_parameters: 6100000000
|
|
390
454
|
release_date: 2022-07-20
|
|
391
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
455
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
392
456
|
|
|
393
|
-
- name: cohere/small-20220720
|
|
457
|
+
- name: cohere/small-20220720
|
|
394
458
|
display_name: Cohere small v20220720 (410M)
|
|
395
459
|
description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
|
|
396
460
|
creator_organization_name: Cohere
|
|
397
461
|
access: limited
|
|
398
462
|
num_parameters: 410000000
|
|
399
463
|
release_date: 2022-07-20
|
|
400
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
464
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
401
465
|
|
|
402
466
|
- name: cohere/xlarge-20221108
|
|
403
467
|
display_name: Cohere xlarge v20221108 (52.4B)
|
|
@@ -406,37 +470,37 @@ models:
|
|
|
406
470
|
access: limited
|
|
407
471
|
num_parameters: 52400000000
|
|
408
472
|
release_date: 2022-11-08
|
|
409
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
473
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
410
474
|
|
|
411
|
-
- name: cohere/medium-20221108
|
|
475
|
+
- name: cohere/medium-20221108
|
|
412
476
|
display_name: Cohere medium v20221108 (6.1B)
|
|
413
477
|
description: Cohere medium v20221108 (6.1B parameters)
|
|
414
478
|
creator_organization_name: Cohere
|
|
415
479
|
access: limited
|
|
416
480
|
num_parameters: 6100000000
|
|
417
481
|
release_date: 2022-11-08
|
|
418
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
482
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
419
483
|
|
|
420
|
-
- name: cohere/command-medium-beta
|
|
421
|
-
display_name:
|
|
422
|
-
description:
|
|
484
|
+
- name: cohere/command-medium-beta
|
|
485
|
+
display_name: Command beta (6.1B)
|
|
486
|
+
description: Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
|
|
423
487
|
creator_organization_name: Cohere
|
|
424
488
|
access: limited
|
|
425
489
|
num_parameters: 6100000000
|
|
426
490
|
release_date: 2022-11-08
|
|
427
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
491
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
428
492
|
|
|
429
|
-
- name: cohere/command-xlarge-beta
|
|
430
|
-
display_name:
|
|
431
|
-
description:
|
|
493
|
+
- name: cohere/command-xlarge-beta
|
|
494
|
+
display_name: Command beta (52.4B)
|
|
495
|
+
description: Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
|
|
432
496
|
creator_organization_name: Cohere
|
|
433
497
|
access: limited
|
|
434
498
|
num_parameters: 52400000000
|
|
435
499
|
release_date: 2022-11-08
|
|
436
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
500
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
437
501
|
|
|
438
502
|
- name: cohere/command
|
|
439
|
-
display_name:
|
|
503
|
+
display_name: Command
|
|
440
504
|
description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
|
|
441
505
|
creator_organization_name: Cohere
|
|
442
506
|
access: limited
|
|
@@ -444,12 +508,30 @@ models:
|
|
|
444
508
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
445
509
|
|
|
446
510
|
- name: cohere/command-light
|
|
447
|
-
display_name:
|
|
511
|
+
display_name: Command Light
|
|
448
512
|
description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
|
|
449
513
|
creator_organization_name: Cohere
|
|
450
514
|
access: limited
|
|
451
515
|
release_date: 2023-09-29
|
|
452
|
-
tags: [TEXT_MODEL_TAG,
|
|
516
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
517
|
+
|
|
518
|
+
- name: cohere/command-r
|
|
519
|
+
display_name: Command R
|
|
520
|
+
description: Command R is a multilingual 35B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
|
|
521
|
+
creator_organization_name: Cohere
|
|
522
|
+
access: open
|
|
523
|
+
num_parameters: 35000000000
|
|
524
|
+
release_date: 2024-03-11
|
|
525
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
526
|
+
|
|
527
|
+
- name: cohere/command-r-plus
|
|
528
|
+
display_name: Command R Plus
|
|
529
|
+
description: Command R+ is a multilingual 104B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
|
|
530
|
+
creator_organization_name: Cohere
|
|
531
|
+
access: open
|
|
532
|
+
num_parameters: 104000000000
|
|
533
|
+
release_date: 2024-04-04
|
|
534
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
453
535
|
|
|
454
536
|
# Craiyon
|
|
455
537
|
- name: craiyon/dalle-mini
|
|
@@ -624,7 +706,16 @@ models:
|
|
|
624
706
|
release_date: 2023-02-13
|
|
625
707
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
626
708
|
|
|
709
|
+
# EPFL LLM
|
|
627
710
|
|
|
711
|
+
- name: epfl-llm/meditron-7b
|
|
712
|
+
display_name: Meditron (7B)
|
|
713
|
+
description: Meditron-7B is a 7 billion parameter model adapted to the medical domain from Llama-2-7B through continued pretraining on a comprehensively curated medical corpus.
|
|
714
|
+
creator_organization_name: EPFL LLM
|
|
715
|
+
access: open
|
|
716
|
+
num_parameters: 7000000000
|
|
717
|
+
release_date: 2023-11-27
|
|
718
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
628
719
|
|
|
629
720
|
# Google
|
|
630
721
|
- name: google/t5-11b
|
|
@@ -673,13 +764,21 @@ models:
|
|
|
673
764
|
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
674
765
|
|
|
675
766
|
- name: google/gemini-1.0-pro-001
|
|
676
|
-
display_name: Gemini 1.0 Pro
|
|
767
|
+
display_name: Gemini 1.0 Pro (001)
|
|
677
768
|
description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
678
769
|
creator_organization_name: Google
|
|
679
770
|
access: limited
|
|
680
771
|
release_date: 2023-12-13
|
|
681
772
|
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
682
773
|
|
|
774
|
+
- name: google/gemini-1.0-pro-002
|
|
775
|
+
display_name: Gemini 1.0 Pro (002)
|
|
776
|
+
description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
777
|
+
creator_organization_name: Google
|
|
778
|
+
access: limited
|
|
779
|
+
release_date: 2024-04-09
|
|
780
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
781
|
+
|
|
683
782
|
# Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
|
|
684
783
|
- name: google/gemini-pro-vision
|
|
685
784
|
display_name: Gemini Pro Vision
|
|
@@ -697,18 +796,81 @@ models:
|
|
|
697
796
|
release_date: 2023-12-13
|
|
698
797
|
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
699
798
|
|
|
799
|
+
- name: google/gemini-1.5-pro-001
|
|
800
|
+
display_name: Gemini 1.5 Pro (001)
|
|
801
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
802
|
+
creator_organization_name: Google
|
|
803
|
+
access: limited
|
|
804
|
+
release_date: 2024-05-24
|
|
805
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
806
|
+
|
|
807
|
+
- name: google/gemini-1.5-flash-001
|
|
808
|
+
display_name: Gemini 1.5 Flash (001)
|
|
809
|
+
description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
810
|
+
creator_organization_name: Google
|
|
811
|
+
access: limited
|
|
812
|
+
release_date: 2024-05-24
|
|
813
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
814
|
+
|
|
700
815
|
- name: google/gemini-1.5-pro-preview-0409
|
|
701
816
|
display_name: Gemini 1.5 Pro (0409 preview)
|
|
702
|
-
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
|
|
817
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
703
818
|
creator_organization_name: Google
|
|
704
819
|
access: limited
|
|
705
820
|
release_date: 2024-04-10
|
|
706
821
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
707
822
|
|
|
823
|
+
- name: google/gemini-1.5-pro-preview-0514
|
|
824
|
+
display_name: Gemini 1.5 Pro (0514 preview)
|
|
825
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
826
|
+
creator_organization_name: Google
|
|
827
|
+
access: limited
|
|
828
|
+
release_date: 2024-05-14
|
|
829
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
830
|
+
|
|
831
|
+
- name: google/gemini-1.5-flash-preview-0514
|
|
832
|
+
display_name: Gemini 1.5 Flash (0514 preview)
|
|
833
|
+
description: Gemini 1.5 Flash is a smaller Gemini model. It has a 1 million token context window and allows interleaving text, images, audio and video as inputs. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([blog](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/))
|
|
834
|
+
creator_organization_name: Google
|
|
835
|
+
access: limited
|
|
836
|
+
release_date: 2024-05-14
|
|
837
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
838
|
+
|
|
839
|
+
- name: google/gemini-1.5-pro-001-safety-default
|
|
840
|
+
display_name: Gemini 1.5 Pro (001, default safety)
|
|
841
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
|
|
842
|
+
creator_organization_name: Google
|
|
843
|
+
access: limited
|
|
844
|
+
release_date: 2024-05-24
|
|
845
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
846
|
+
|
|
847
|
+
- name: google/gemini-1.5-pro-001-safety-block-none
|
|
848
|
+
display_name: Gemini 1.5 Pro (001, BLOCK_NONE safety)
|
|
849
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
850
|
+
creator_organization_name: Google
|
|
851
|
+
access: limited
|
|
852
|
+
release_date: 2024-05-24
|
|
853
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
854
|
+
|
|
855
|
+
- name: google/gemini-1.5-flash-001-safety-default
|
|
856
|
+
display_name: Gemini 1.5 Flash (001, default safety)
|
|
857
|
+
description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
|
|
858
|
+
creator_organization_name: Google
|
|
859
|
+
access: limited
|
|
860
|
+
release_date: 2024-05-24
|
|
861
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
862
|
+
|
|
863
|
+
- name: google/gemini-1.5-flash-001-safety-block-none
|
|
864
|
+
display_name: Gemini 1.5 Flash (001, BLOCK_NONE safety)
|
|
865
|
+
description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
866
|
+
creator_organization_name: Google
|
|
867
|
+
access: limited
|
|
868
|
+
release_date: 2024-05-24
|
|
869
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
870
|
+
|
|
708
871
|
- name: google/gemma-2b
|
|
709
872
|
display_name: Gemma (2B)
|
|
710
|
-
|
|
711
|
-
description: TBD
|
|
873
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
712
874
|
creator_organization_name: Google
|
|
713
875
|
access: open
|
|
714
876
|
release_date: 2024-02-21
|
|
@@ -716,8 +878,7 @@ models:
|
|
|
716
878
|
|
|
717
879
|
- name: google/gemma-2b-it
|
|
718
880
|
display_name: Gemma Instruct (2B)
|
|
719
|
-
|
|
720
|
-
description: TBD
|
|
881
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
721
882
|
creator_organization_name: Google
|
|
722
883
|
access: open
|
|
723
884
|
release_date: 2024-02-21
|
|
@@ -725,8 +886,7 @@ models:
|
|
|
725
886
|
|
|
726
887
|
- name: google/gemma-7b
|
|
727
888
|
display_name: Gemma (7B)
|
|
728
|
-
|
|
729
|
-
description: TBD
|
|
889
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
730
890
|
creator_organization_name: Google
|
|
731
891
|
access: open
|
|
732
892
|
release_date: 2024-02-21
|
|
@@ -734,14 +894,60 @@ models:
|
|
|
734
894
|
|
|
735
895
|
- name: google/gemma-7b-it
|
|
736
896
|
display_name: Gemma Instruct (7B)
|
|
737
|
-
|
|
738
|
-
description: TBD
|
|
897
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
739
898
|
creator_organization_name: Google
|
|
740
899
|
access: open
|
|
741
900
|
release_date: 2024-02-21
|
|
742
|
-
# TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
|
|
743
901
|
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
744
902
|
|
|
903
|
+
- name: google/gemma-2-9b
|
|
904
|
+
display_name: Gemma 2 (9B)
|
|
905
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
906
|
+
creator_organization_name: Google
|
|
907
|
+
access: open
|
|
908
|
+
release_date: 2024-06-27
|
|
909
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
910
|
+
|
|
911
|
+
- name: google/gemma-2-9b-it
|
|
912
|
+
display_name: Gemma 2 Instruct (9B)
|
|
913
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
914
|
+
creator_organization_name: Google
|
|
915
|
+
access: open
|
|
916
|
+
release_date: 2024-06-27
|
|
917
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
918
|
+
|
|
919
|
+
- name: google/gemma-2-27b
|
|
920
|
+
display_name: Gemma 2 (27B)
|
|
921
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
922
|
+
creator_organization_name: Google
|
|
923
|
+
access: open
|
|
924
|
+
release_date: 2024-06-27
|
|
925
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
926
|
+
|
|
927
|
+
- name: google/gemma-2-27b-it
|
|
928
|
+
display_name: Gemma 2 Instruct (27B)
|
|
929
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
930
|
+
creator_organization_name: Google
|
|
931
|
+
access: open
|
|
932
|
+
release_date: 2024-06-27
|
|
933
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
934
|
+
|
|
935
|
+
- name: google/paligemma-3b-mix-224
|
|
936
|
+
display_name: PaliGemma (3B) Mix 224
|
|
937
|
+
description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 224x224 input images and 128 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
|
|
938
|
+
creator_organization_name: Google
|
|
939
|
+
access: open
|
|
940
|
+
release_date: 2024-05-12
|
|
941
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
942
|
+
|
|
943
|
+
- name: google/paligemma-3b-mix-448
|
|
944
|
+
display_name: PaliGemma (3B) Mix 448
|
|
945
|
+
description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 448x448 input images and 512 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
|
|
946
|
+
creator_organization_name: Google
|
|
947
|
+
access: open
|
|
948
|
+
release_date: 2024-05-12
|
|
949
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
950
|
+
|
|
745
951
|
- name: google/text-bison@001
|
|
746
952
|
display_name: PaLM-2 (Bison)
|
|
747
953
|
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -798,7 +1004,21 @@ models:
|
|
|
798
1004
|
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
799
1005
|
tags: [CODE_MODEL_TAG]
|
|
800
1006
|
|
|
1007
|
+
- name: google/medlm-medium
|
|
1008
|
+
display_name: MedLM (Medium)
|
|
1009
|
+
description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
|
|
1010
|
+
creator_organization_name: Google
|
|
1011
|
+
access: limited
|
|
1012
|
+
release_date: 2023-12-13
|
|
1013
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
801
1014
|
|
|
1015
|
+
- name: google/medlm-large
|
|
1016
|
+
display_name: MedLM (Large)
|
|
1017
|
+
description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
|
|
1018
|
+
creator_organization_name: Google
|
|
1019
|
+
access: limited
|
|
1020
|
+
release_date: 2023-12-13
|
|
1021
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
802
1022
|
|
|
803
1023
|
# HuggingFace
|
|
804
1024
|
- name: HuggingFaceM4/idefics2-8b
|
|
@@ -1059,8 +1279,6 @@ models:
|
|
|
1059
1279
|
release_date: 2023-06-22
|
|
1060
1280
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1061
1281
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
1282
|
# Meta
|
|
1065
1283
|
- name: meta/opt-iml-175b # NOT SUPPORTED
|
|
1066
1284
|
display_name: OPT-IML (175B)
|
|
@@ -1203,7 +1421,7 @@ models:
|
|
|
1203
1421
|
|
|
1204
1422
|
- name: meta/llama-3-8b
|
|
1205
1423
|
display_name: Llama 3 (8B)
|
|
1206
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1424
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1207
1425
|
creator_organization_name: Meta
|
|
1208
1426
|
access: open
|
|
1209
1427
|
num_parameters: 8000000000
|
|
@@ -1212,16 +1430,43 @@ models:
|
|
|
1212
1430
|
|
|
1213
1431
|
- name: meta/llama-3-70b
|
|
1214
1432
|
display_name: Llama 3 (70B)
|
|
1215
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1433
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1216
1434
|
creator_organization_name: Meta
|
|
1217
1435
|
access: open
|
|
1218
1436
|
num_parameters: 70000000000
|
|
1219
1437
|
release_date: 2024-04-18
|
|
1220
1438
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1221
1439
|
|
|
1440
|
+
- name: meta/llama-3.1-8b-instruct-turbo
|
|
1441
|
+
display_name: Llama 3.1 Instruct Turbo (8B)
|
|
1442
|
+
description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1443
|
+
creator_organization_name: Meta
|
|
1444
|
+
access: open
|
|
1445
|
+
num_parameters: 8000000000
|
|
1446
|
+
release_date: 2024-07-23
|
|
1447
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1448
|
+
|
|
1449
|
+
- name: meta/llama-3.1-70b-instruct-turbo
|
|
1450
|
+
display_name: Llama 3.1 Instruct Turbo (70B)
|
|
1451
|
+
description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1452
|
+
creator_organization_name: Meta
|
|
1453
|
+
access: open
|
|
1454
|
+
num_parameters: 70000000000
|
|
1455
|
+
release_date: 2024-07-23
|
|
1456
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1457
|
+
|
|
1458
|
+
- name: meta/llama-3.1-405b-instruct-turbo
|
|
1459
|
+
display_name: Llama 3.1 Instruct Turbo (405B)
|
|
1460
|
+
description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1461
|
+
creator_organization_name: Meta
|
|
1462
|
+
access: open
|
|
1463
|
+
num_parameters: 405000000000
|
|
1464
|
+
release_date: 2024-07-23
|
|
1465
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1466
|
+
|
|
1222
1467
|
- name: meta/llama-3-8b-chat
|
|
1223
|
-
display_name: Llama 3
|
|
1224
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1468
|
+
display_name: Llama 3 Instruct (8B)
|
|
1469
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1225
1470
|
creator_organization_name: Meta
|
|
1226
1471
|
access: open
|
|
1227
1472
|
num_parameters: 8000000000
|
|
@@ -1229,14 +1474,44 @@ models:
|
|
|
1229
1474
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1230
1475
|
|
|
1231
1476
|
- name: meta/llama-3-70b-chat
|
|
1232
|
-
display_name: Llama 3
|
|
1233
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1477
|
+
display_name: Llama 3 Instruct (70B)
|
|
1478
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1234
1479
|
creator_organization_name: Meta
|
|
1235
1480
|
access: open
|
|
1236
1481
|
num_parameters: 70000000000
|
|
1237
1482
|
release_date: 2024-04-18
|
|
1238
1483
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1239
1484
|
|
|
1485
|
+
- name: meta/llama-guard-7b
|
|
1486
|
+
display_name: Llama Guard (7B)
|
|
1487
|
+
description: Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
|
|
1488
|
+
creator_organization_name: Meta
|
|
1489
|
+
access: open
|
|
1490
|
+
num_parameters: 7000000000
|
|
1491
|
+
release_date: 2023-12-07
|
|
1492
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1493
|
+
|
|
1494
|
+
- name: meta/llama-guard-2-8b
|
|
1495
|
+
display_name: Llama Guard 2 (8B)
|
|
1496
|
+
description: Llama Guard 2 is an 8B parameter Llama 3-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
|
|
1497
|
+
creator_organization_name: Meta
|
|
1498
|
+
access: open
|
|
1499
|
+
num_parameters: 8000000000
|
|
1500
|
+
release_date: 2024-04-18
|
|
1501
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1502
|
+
|
|
1503
|
+
- name: meta/llama-guard-3-8b
|
|
1504
|
+
display_name: Llama Guard 3 (8B)
|
|
1505
|
+
description: Llama Guard 3 is an 8B parameter Llama 3.1-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
|
|
1506
|
+
creator_organization_name: Meta
|
|
1507
|
+
access: open
|
|
1508
|
+
num_parameters: 8000000000
|
|
1509
|
+
release_date: 2024-07-23
|
|
1510
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1511
|
+
|
|
1512
|
+
|
|
1513
|
+
|
|
1514
|
+
|
|
1240
1515
|
|
|
1241
1516
|
# Microsoft/NVIDIA
|
|
1242
1517
|
- name: microsoft/TNLGv2_530B
|
|
@@ -1246,7 +1521,7 @@ models:
|
|
|
1246
1521
|
access: closed
|
|
1247
1522
|
num_parameters: 530000000000
|
|
1248
1523
|
release_date: 2022-01-28
|
|
1249
|
-
tags: [
|
|
1524
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1250
1525
|
|
|
1251
1526
|
- name: microsoft/TNLGv2_7B
|
|
1252
1527
|
display_name: TNLG v2 (6.7B)
|
|
@@ -1255,7 +1530,7 @@ models:
|
|
|
1255
1530
|
access: closed
|
|
1256
1531
|
num_parameters: 6700000000
|
|
1257
1532
|
release_date: 2022-01-28
|
|
1258
|
-
tags: [
|
|
1533
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1259
1534
|
|
|
1260
1535
|
- name: microsoft/llava-1.5-7b-hf
|
|
1261
1536
|
display_name: LLaVA 1.5 (7B)
|
|
@@ -1329,7 +1604,33 @@ models:
|
|
|
1329
1604
|
release_date: 2023-10-05
|
|
1330
1605
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1331
1606
|
|
|
1607
|
+
- name: microsoft/phi-3-small-8k-instruct
|
|
1608
|
+
display_name: Phi-3 (7B)
|
|
1609
|
+
description: Phi-3-Small-8K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
|
|
1610
|
+
creator_organization_name: Microsoft
|
|
1611
|
+
access: open
|
|
1612
|
+
num_parameters: 7000000000
|
|
1613
|
+
release_date: 2024-05-21
|
|
1614
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1615
|
+
|
|
1616
|
+
- name: microsoft/phi-3-medium-4k-instruct
|
|
1617
|
+
display_name: Phi-3 (14B)
|
|
1618
|
+
description: Phi-3-Medium-4K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
|
|
1619
|
+
creator_organization_name: Microsoft
|
|
1620
|
+
access: open
|
|
1621
|
+
num_parameters: 14000000000
|
|
1622
|
+
release_date: 2024-05-21
|
|
1623
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1332
1624
|
|
|
1625
|
+
# KAIST AI
|
|
1626
|
+
- name: kaistai/prometheus-vision-13b-v1.0-hf
|
|
1627
|
+
display_name: LLaVA + Vicuna-v1.5 (13B)
|
|
1628
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1629
|
+
creator_organization_name: KAIST AI
|
|
1630
|
+
access: open
|
|
1631
|
+
num_parameters: 13000000000
|
|
1632
|
+
release_date: 2024-01-01
|
|
1633
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1333
1634
|
|
|
1334
1635
|
# 01.AI
|
|
1335
1636
|
- name: 01-ai/yi-6b
|
|
@@ -1340,6 +1641,7 @@ models:
|
|
|
1340
1641
|
num_parameters: 6000000000
|
|
1341
1642
|
release_date: 2023-11-02
|
|
1342
1643
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1644
|
+
|
|
1343
1645
|
- name: 01-ai/yi-34b
|
|
1344
1646
|
display_name: Yi (34B)
|
|
1345
1647
|
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
@@ -1348,6 +1650,7 @@ models:
|
|
|
1348
1650
|
num_parameters: 34000000000
|
|
1349
1651
|
release_date: 2023-11-02
|
|
1350
1652
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1653
|
+
|
|
1351
1654
|
- name: 01-ai/yi-6b-chat
|
|
1352
1655
|
display_name: Yi Chat (6B)
|
|
1353
1656
|
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
@@ -1356,6 +1659,7 @@ models:
|
|
|
1356
1659
|
num_parameters: 6000000000
|
|
1357
1660
|
release_date: 2023-11-23
|
|
1358
1661
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1662
|
+
|
|
1359
1663
|
- name: 01-ai/yi-34b-chat
|
|
1360
1664
|
display_name: Yi Chat (34B)
|
|
1361
1665
|
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
@@ -1365,6 +1669,22 @@ models:
|
|
|
1365
1669
|
release_date: 2023-11-23
|
|
1366
1670
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1367
1671
|
|
|
1672
|
+
- name: 01-ai/yi-large
|
|
1673
|
+
display_name: Yi Large
|
|
1674
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
|
|
1675
|
+
creator_organization_name: 01.AI
|
|
1676
|
+
access: limited
|
|
1677
|
+
release_date: 2024-05-12
|
|
1678
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1679
|
+
|
|
1680
|
+
- name: 01-ai/yi-large-preview
|
|
1681
|
+
display_name: Yi Large (Preview)
|
|
1682
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
|
|
1683
|
+
creator_organization_name: 01.AI
|
|
1684
|
+
access: limited
|
|
1685
|
+
release_date: 2024-05-12
|
|
1686
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1687
|
+
|
|
1368
1688
|
# Allen Institute for AI
|
|
1369
1689
|
# OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
|
|
1370
1690
|
- name: allenai/olmo-7b
|
|
@@ -1395,35 +1715,70 @@ models:
|
|
|
1395
1715
|
# TODO: Add instruct tag.
|
|
1396
1716
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1397
1717
|
|
|
1718
|
+
- name: allenai/olmo-1.7-7b
|
|
1719
|
+
display_name: OLMo 1.7 (7B)
|
|
1720
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
|
|
1721
|
+
creator_organization_name: Allen Institute for AI
|
|
1722
|
+
access: open
|
|
1723
|
+
num_parameters: 7000000000
|
|
1724
|
+
release_date: 2024-04-17
|
|
1725
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1398
1726
|
|
|
1399
1727
|
# Mistral AI
|
|
1400
1728
|
- name: mistralai/mistral-7b-v0.1
|
|
1401
1729
|
display_name: Mistral v0.1 (7B)
|
|
1402
|
-
description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
1730
|
+
description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
|
|
1403
1731
|
creator_organization_name: Mistral AI
|
|
1404
1732
|
access: open
|
|
1405
1733
|
num_parameters: 7300000000
|
|
1406
1734
|
release_date: 2023-09-27
|
|
1735
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1736
|
+
|
|
1737
|
+
- name: mistralai/mistral-7b-instruct-v0.1
|
|
1738
|
+
display_name: Mistral Instruct v0.1 (7B)
|
|
1739
|
+
description: Mistral v0.1 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). The instruct version was fined-tuned using publicly available conversation datasets. ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
|
|
1740
|
+
creator_organization_name: Mistral AI
|
|
1741
|
+
access: open
|
|
1742
|
+
num_parameters: 7300000000
|
|
1743
|
+
release_date: 2023-09-27
|
|
1744
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1745
|
+
|
|
1746
|
+
- name: mistralai/mistral-7b-instruct-v0.2
|
|
1747
|
+
display_name: Mistral Instruct v0.2 (7B)
|
|
1748
|
+
description: Mistral v0.2 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
|
|
1749
|
+
creator_organization_name: Mistral AI
|
|
1750
|
+
access: open
|
|
1751
|
+
num_parameters: 7300000000
|
|
1752
|
+
release_date: 2024-03-23
|
|
1753
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1754
|
+
|
|
1755
|
+
- name: mistralai/mistral-7b-instruct-v0.3
|
|
1756
|
+
display_name: Mistral Instruct v0.3 (7B)
|
|
1757
|
+
description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
|
|
1758
|
+
creator_organization_name: Mistral AI
|
|
1759
|
+
access: open
|
|
1760
|
+
num_parameters: 7300000000
|
|
1761
|
+
release_date: 2024-05-22
|
|
1407
1762
|
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1408
1763
|
|
|
1409
1764
|
- name: mistralai/mixtral-8x7b-32kseqlen
|
|
1410
1765
|
display_name: Mixtral (8x7B 32K seqlen)
|
|
1411
|
-
description:
|
|
1766
|
+
description: Mixtral is a mixture-of-experts model that has 46.7B total parameters but only uses 12.9B parameters per token. ([blog post](https://mistral.ai/news/mixtral-of-experts/), [tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
|
|
1412
1767
|
creator_organization_name: Mistral AI
|
|
1413
1768
|
access: open
|
|
1414
1769
|
num_parameters: 46700000000
|
|
1415
1770
|
release_date: 2023-12-08
|
|
1416
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
1771
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1417
1772
|
|
|
1418
1773
|
- name: mistralai/mixtral-8x7b-instruct-v0.1
|
|
1419
|
-
display_name: Mixtral (8x7B
|
|
1420
|
-
description: Mixtral (8x7B
|
|
1774
|
+
display_name: Mixtral Instruct (8x7B)
|
|
1775
|
+
description: Mixtral Instruct (8x7B) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following. ([blog post](https://mistral.ai/news/mixtral-of-experts/)).
|
|
1421
1776
|
creator_organization_name: Mistral AI
|
|
1422
1777
|
access: open
|
|
1423
1778
|
num_parameters: 46700000000
|
|
1424
1779
|
# Blog post: https://mistral.ai/news/mixtral-of-experts/
|
|
1425
1780
|
release_date: 2023-12-11
|
|
1426
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1781
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1427
1782
|
|
|
1428
1783
|
- name: mistralai/mixtral-8x22b
|
|
1429
1784
|
display_name: Mixtral (8x22B)
|
|
@@ -1432,7 +1787,7 @@ models:
|
|
|
1432
1787
|
access: open
|
|
1433
1788
|
num_parameters: 176000000000
|
|
1434
1789
|
release_date: 2024-04-10
|
|
1435
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
1790
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1436
1791
|
|
|
1437
1792
|
- name: mistralai/mixtral-8x22b-instruct-v0.1
|
|
1438
1793
|
display_name: Mixtral Instruct (8x22B)
|
|
@@ -1454,13 +1809,11 @@ models:
|
|
|
1454
1809
|
|
|
1455
1810
|
- name: mistralai/mistral-small-2402
|
|
1456
1811
|
display_name: Mistral Small (2402)
|
|
1457
|
-
|
|
1458
|
-
description: TBD
|
|
1812
|
+
description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
1459
1813
|
creator_organization_name: Mistral AI
|
|
1460
1814
|
access: limited
|
|
1461
|
-
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1462
1815
|
release_date: 2023-02-26
|
|
1463
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1816
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1464
1817
|
|
|
1465
1818
|
- name: mistralai/mistral-medium-2312
|
|
1466
1819
|
display_name: Mistral Medium (2312)
|
|
@@ -1468,18 +1821,32 @@ models:
|
|
|
1468
1821
|
creator_organization_name: Mistral AI
|
|
1469
1822
|
access: limited
|
|
1470
1823
|
release_date: 2023-12-11
|
|
1471
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1824
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1472
1825
|
|
|
1473
1826
|
- name: mistralai/mistral-large-2402
|
|
1474
1827
|
display_name: Mistral Large (2402)
|
|
1475
|
-
|
|
1476
|
-
description: TBD
|
|
1828
|
+
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
1477
1829
|
creator_organization_name: Mistral AI
|
|
1478
1830
|
access: limited
|
|
1479
|
-
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1480
1831
|
release_date: 2023-02-26
|
|
1481
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1832
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1482
1833
|
|
|
1834
|
+
- name: mistralai/mistral-large-2407
|
|
1835
|
+
display_name: Mistral Large 2 (2407)
|
|
1836
|
+
description: Mistral Large 2 is a 123 billion parameter model that has a 128k context window and supports dozens of languages and 80+ coding languages. ([blog](https://mistral.ai/news/mistral-large-2407/))
|
|
1837
|
+
creator_organization_name: Mistral AI
|
|
1838
|
+
access: open
|
|
1839
|
+
num_parameters: 123000000000
|
|
1840
|
+
release_date: 2023-07-24
|
|
1841
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1842
|
+
|
|
1843
|
+
- name: mistralai/open-mistral-nemo-2407
|
|
1844
|
+
display_name: Mistral NeMo (2402)
|
|
1845
|
+
description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
|
|
1846
|
+
creator_organization_name: Mistral AI
|
|
1847
|
+
access: open
|
|
1848
|
+
release_date: 2024-07-18
|
|
1849
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1483
1850
|
|
|
1484
1851
|
# MosaicML
|
|
1485
1852
|
- name: mosaicml/mpt-7b
|
|
@@ -1558,7 +1925,13 @@ models:
|
|
|
1558
1925
|
release_date: 2019-09-17 # paper date
|
|
1559
1926
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
|
|
1560
1927
|
|
|
1561
|
-
|
|
1928
|
+
- name: nvidia/nemotron-4-340b-instruct
|
|
1929
|
+
display_name: Nemotron-4 Instruct (340B)
|
|
1930
|
+
description: Nemotron-4 Instruct (340B) is an open weights model sized to fit on a single DGX H100 with 8 GPUs when deployed in FP8 precision. 98% of the data used for model alignment was synthetically generated ([paper](https://arxiv.org/abs/2406.11704)).
|
|
1931
|
+
creator_organization_name: NVIDIA
|
|
1932
|
+
access: open
|
|
1933
|
+
release_date: 2024-06-17
|
|
1934
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1562
1935
|
|
|
1563
1936
|
# OpenAI
|
|
1564
1937
|
|
|
@@ -1596,97 +1969,95 @@ models:
|
|
|
1596
1969
|
|
|
1597
1970
|
# DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
|
|
1598
1971
|
|
|
1599
|
-
- name: openai/davinci
|
|
1972
|
+
- name: openai/davinci
|
|
1600
1973
|
display_name: davinci (175B)
|
|
1601
1974
|
description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1602
1975
|
creator_organization_name: OpenAI
|
|
1603
1976
|
access: limited
|
|
1604
1977
|
num_parameters: 175000000000
|
|
1605
1978
|
release_date: 2020-05-28
|
|
1606
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1979
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1607
1980
|
|
|
1608
|
-
- name: openai/curie
|
|
1981
|
+
- name: openai/curie
|
|
1609
1982
|
display_name: curie (6.7B)
|
|
1610
1983
|
description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1611
1984
|
creator_organization_name: OpenAI
|
|
1612
1985
|
access: limited
|
|
1613
1986
|
num_parameters: 6700000000
|
|
1614
1987
|
release_date: 2020-05-28
|
|
1615
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1988
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1616
1989
|
|
|
1617
|
-
- name: openai/babbage
|
|
1990
|
+
- name: openai/babbage
|
|
1618
1991
|
display_name: babbage (1.3B)
|
|
1619
1992
|
description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1620
1993
|
creator_organization_name: OpenAI
|
|
1621
1994
|
access: limited
|
|
1622
1995
|
num_parameters: 1300000000
|
|
1623
1996
|
release_date: 2020-05-28
|
|
1624
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1997
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1625
1998
|
|
|
1626
|
-
- name: openai/ada
|
|
1999
|
+
- name: openai/ada
|
|
1627
2000
|
display_name: ada (350M)
|
|
1628
2001
|
description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1629
2002
|
creator_organization_name: OpenAI
|
|
1630
2003
|
access: limited
|
|
1631
2004
|
num_parameters: 350000000
|
|
1632
2005
|
release_date: 2020-05-28
|
|
1633
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2006
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1634
2007
|
|
|
1635
|
-
- name: openai/text-davinci-003
|
|
2008
|
+
- name: openai/text-davinci-003
|
|
1636
2009
|
display_name: GPT-3.5 (text-davinci-003)
|
|
1637
2010
|
description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1638
2011
|
creator_organization_name: OpenAI
|
|
1639
2012
|
access: limited
|
|
1640
2013
|
num_parameters: 175000000000
|
|
1641
2014
|
release_date: 2022-11-28
|
|
1642
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2015
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1643
2016
|
|
|
1644
|
-
|
|
1645
|
-
# https://github.com/stanford-crfm/benchmarking/issues/359
|
|
1646
|
-
- name: openai/text-davinci-002 # DEPRECATED
|
|
2017
|
+
- name: openai/text-davinci-002
|
|
1647
2018
|
display_name: GPT-3.5 (text-davinci-002)
|
|
1648
2019
|
description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1649
2020
|
creator_organization_name: OpenAI
|
|
1650
2021
|
access: limited
|
|
1651
2022
|
num_parameters: 175000000000
|
|
1652
2023
|
release_date: 2022-01-27
|
|
1653
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2024
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1654
2025
|
|
|
1655
|
-
- name: openai/text-davinci-001
|
|
2026
|
+
- name: openai/text-davinci-001
|
|
1656
2027
|
display_name: GPT-3.5 (text-davinci-001)
|
|
1657
2028
|
description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1658
2029
|
creator_organization_name: OpenAI
|
|
1659
2030
|
access: limited
|
|
1660
2031
|
num_parameters: 175000000000
|
|
1661
2032
|
release_date: 2022-01-27
|
|
1662
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2033
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1663
2034
|
|
|
1664
|
-
- name: openai/text-curie-001
|
|
2035
|
+
- name: openai/text-curie-001
|
|
1665
2036
|
display_name: text-curie-001
|
|
1666
2037
|
description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1667
2038
|
creator_organization_name: OpenAI
|
|
1668
2039
|
access: limited
|
|
1669
2040
|
num_parameters: 6700000000
|
|
1670
2041
|
release_date: 2022-01-27
|
|
1671
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2042
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1672
2043
|
|
|
1673
|
-
- name: openai/text-babbage-001
|
|
2044
|
+
- name: openai/text-babbage-001
|
|
1674
2045
|
display_name: text-babbage-001
|
|
1675
2046
|
description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1676
2047
|
creator_organization_name: OpenAI
|
|
1677
2048
|
access: limited
|
|
1678
2049
|
num_parameters: 1300000000
|
|
1679
2050
|
release_date: 2022-01-27
|
|
1680
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2051
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1681
2052
|
|
|
1682
|
-
- name: openai/text-ada-001
|
|
2053
|
+
- name: openai/text-ada-001
|
|
1683
2054
|
display_name: text-ada-001
|
|
1684
2055
|
description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1685
2056
|
creator_organization_name: OpenAI
|
|
1686
2057
|
access: limited
|
|
1687
2058
|
num_parameters: 350000000
|
|
1688
2059
|
release_date: 2022-01-27
|
|
1689
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2060
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1690
2061
|
|
|
1691
2062
|
|
|
1692
2063
|
## GPT 3.5 Turbo Models
|
|
@@ -1727,7 +2098,7 @@ models:
|
|
|
1727
2098
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1728
2099
|
|
|
1729
2100
|
- name: openai/gpt-3.5-turbo-0125
|
|
1730
|
-
display_name:
|
|
2101
|
+
display_name: GPT-3.5 Turbo (0125)
|
|
1731
2102
|
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
|
|
1732
2103
|
creator_organization_name: OpenAI
|
|
1733
2104
|
access: limited
|
|
@@ -1804,7 +2175,31 @@ models:
|
|
|
1804
2175
|
creator_organization_name: OpenAI
|
|
1805
2176
|
access: limited
|
|
1806
2177
|
release_date: 2024-04-09
|
|
1807
|
-
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2178
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2179
|
+
|
|
2180
|
+
- name: openai/gpt-4o-2024-05-13
|
|
2181
|
+
display_name: GPT-4o (2024-05-13)
|
|
2182
|
+
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
|
|
2183
|
+
creator_organization_name: OpenAI
|
|
2184
|
+
access: limited
|
|
2185
|
+
release_date: 2024-04-09
|
|
2186
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2187
|
+
|
|
2188
|
+
- name: openai/gpt-4o-2024-08-06
|
|
2189
|
+
display_name: GPT-4o (2024-08-06)
|
|
2190
|
+
description: GPT-4o (2024-08-06) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
|
|
2191
|
+
creator_organization_name: OpenAI
|
|
2192
|
+
access: limited
|
|
2193
|
+
release_date: 2024-08-06
|
|
2194
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2195
|
+
|
|
2196
|
+
- name: openai/gpt-4o-mini-2024-07-18
|
|
2197
|
+
display_name: GPT-4o mini (2024-07-18)
|
|
2198
|
+
description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
|
|
2199
|
+
creator_organization_name: OpenAI
|
|
2200
|
+
access: limited
|
|
2201
|
+
release_date: 2024-07-18
|
|
2202
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1808
2203
|
|
|
1809
2204
|
- name: openai/gpt-4-vision-preview
|
|
1810
2205
|
# According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
|
|
@@ -1826,30 +2221,30 @@ models:
|
|
|
1826
2221
|
## Codex Models
|
|
1827
2222
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
1828
2223
|
|
|
1829
|
-
- name: openai/code-davinci-002
|
|
2224
|
+
- name: openai/code-davinci-002
|
|
1830
2225
|
display_name: code-davinci-002
|
|
1831
2226
|
description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
|
|
1832
2227
|
creator_organization_name: OpenAI
|
|
1833
2228
|
access: limited
|
|
1834
2229
|
release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
|
|
1835
|
-
tags: [CODE_MODEL_TAG]
|
|
2230
|
+
tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
|
|
1836
2231
|
|
|
1837
|
-
- name: openai/code-davinci-001
|
|
2232
|
+
- name: openai/code-davinci-001
|
|
1838
2233
|
display_name: code-davinci-001
|
|
1839
2234
|
description: code-davinci-001 model
|
|
1840
2235
|
creator_organization_name: OpenAI
|
|
1841
2236
|
access: limited
|
|
1842
2237
|
release_date: 2021-07-01 # Paper date
|
|
1843
|
-
tags: [CODE_MODEL_TAG]
|
|
2238
|
+
tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
|
|
1844
2239
|
|
|
1845
|
-
- name: openai/code-cushman-001
|
|
2240
|
+
- name: openai/code-cushman-001
|
|
1846
2241
|
display_name: code-cushman-001 (12B)
|
|
1847
2242
|
description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
|
|
1848
2243
|
creator_organization_name: OpenAI
|
|
1849
2244
|
access: limited
|
|
1850
2245
|
num_parameters: 12000000000
|
|
1851
2246
|
release_date: 2021-07-01 # Paper date
|
|
1852
|
-
tags: [CODE_MODEL_TAG]
|
|
2247
|
+
tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
|
|
1853
2248
|
|
|
1854
2249
|
|
|
1855
2250
|
## Text Similarity Models
|
|
@@ -1859,41 +2254,41 @@ models:
|
|
|
1859
2254
|
# DEPRECATED: Announced on July 06 2023 that first generation embeddings models
|
|
1860
2255
|
# will be shut down on January 04 2024.
|
|
1861
2256
|
|
|
1862
|
-
- name: openai/text-similarity-davinci-001
|
|
2257
|
+
- name: openai/text-similarity-davinci-001
|
|
1863
2258
|
display_name: text-similarity-davinci-001
|
|
1864
2259
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1865
2260
|
creator_organization_name: OpenAI
|
|
1866
2261
|
access: limited
|
|
1867
2262
|
num_parameters: 175000000000
|
|
1868
2263
|
release_date: 2022-01-25 # Blog post date
|
|
1869
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2264
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
1870
2265
|
|
|
1871
|
-
- name: openai/text-similarity-curie-001
|
|
2266
|
+
- name: openai/text-similarity-curie-001
|
|
1872
2267
|
display_name: text-similarity-curie-001
|
|
1873
2268
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1874
2269
|
creator_organization_name: OpenAI
|
|
1875
2270
|
access: limited
|
|
1876
2271
|
num_parameters: 6700000000
|
|
1877
2272
|
release_date: 2022-01-25 # Blog post date
|
|
1878
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2273
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
1879
2274
|
|
|
1880
|
-
- name: openai/text-similarity-babbage-001
|
|
2275
|
+
- name: openai/text-similarity-babbage-001
|
|
1881
2276
|
display_name: text-similarity-babbage-001
|
|
1882
2277
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1883
2278
|
creator_organization_name: OpenAI
|
|
1884
2279
|
access: limited
|
|
1885
2280
|
num_parameters: 1300000000
|
|
1886
2281
|
release_date: 2022-01-25 # Blog post date
|
|
1887
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2282
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
1888
2283
|
|
|
1889
|
-
- name: openai/text-similarity-ada-001
|
|
2284
|
+
- name: openai/text-similarity-ada-001
|
|
1890
2285
|
display_name: text-similarity-ada-001
|
|
1891
2286
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1892
2287
|
creator_organization_name: OpenAI
|
|
1893
2288
|
access: limited
|
|
1894
2289
|
num_parameters: 350000000
|
|
1895
2290
|
release_date: 2022-01-25 # Blog post date
|
|
1896
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2291
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
1897
2292
|
|
|
1898
2293
|
- name: openai/text-embedding-ada-002
|
|
1899
2294
|
display_name: text-embedding-ada-002
|
|
@@ -1949,11 +2344,39 @@ models:
|
|
|
1949
2344
|
release_date: 2023-11-06
|
|
1950
2345
|
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1951
2346
|
|
|
2347
|
+
# OpenThaiGPT
|
|
2348
|
+
- name: openthaigpt/openthaigpt-1.0.0-7b-chat
|
|
2349
|
+
display_name: OpenThaiGPT v1.0.0 (7B)
|
|
2350
|
+
description: OpenThaiGPT v1.0.0 (7B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
|
|
2351
|
+
creator_organization_name: OpenThaiGPT
|
|
2352
|
+
access: open
|
|
2353
|
+
num_parameters: 7000000000
|
|
2354
|
+
release_date: 2024-04-08
|
|
2355
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2356
|
+
|
|
2357
|
+
- name: openthaigpt/openthaigpt-1.0.0-13b-chat
|
|
2358
|
+
display_name: OpenThaiGPT v1.0.0 (13B)
|
|
2359
|
+
description: OpenThaiGPT v1.0.0 (13B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
|
|
2360
|
+
creator_organization_name: OpenThaiGPT
|
|
2361
|
+
access: open
|
|
2362
|
+
num_parameters: 13000000000
|
|
2363
|
+
release_date: 2024-04-08
|
|
2364
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2365
|
+
|
|
2366
|
+
- name: openthaigpt/openthaigpt-1.0.0-70b-chat
|
|
2367
|
+
display_name: OpenThaiGPT v1.0.0 (70B)
|
|
2368
|
+
description: OpenThaiGPT v1.0.0 (70B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
|
|
2369
|
+
creator_organization_name: OpenThaiGPT
|
|
2370
|
+
access: open
|
|
2371
|
+
num_parameters: 70000000000
|
|
2372
|
+
release_date: 2024-04-08
|
|
2373
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2374
|
+
|
|
1952
2375
|
# Qwen
|
|
1953
2376
|
|
|
1954
2377
|
- name: qwen/qwen-7b
|
|
1955
2378
|
display_name: Qwen
|
|
1956
|
-
description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen
|
|
2379
|
+
description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
1957
2380
|
creator_organization_name: Qwen
|
|
1958
2381
|
access: open
|
|
1959
2382
|
release_date: 2024-02-05
|
|
@@ -1961,7 +2384,7 @@ models:
|
|
|
1961
2384
|
|
|
1962
2385
|
- name: qwen/qwen1.5-7b
|
|
1963
2386
|
display_name: Qwen1.5 (7B)
|
|
1964
|
-
description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen
|
|
2387
|
+
description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
1965
2388
|
creator_organization_name: Qwen
|
|
1966
2389
|
access: open
|
|
1967
2390
|
release_date: 2024-02-05
|
|
@@ -1969,7 +2392,7 @@ models:
|
|
|
1969
2392
|
|
|
1970
2393
|
- name: qwen/qwen1.5-14b
|
|
1971
2394
|
display_name: Qwen1.5 (14B)
|
|
1972
|
-
description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen
|
|
2395
|
+
description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
1973
2396
|
creator_organization_name: Qwen
|
|
1974
2397
|
access: open
|
|
1975
2398
|
release_date: 2024-02-05
|
|
@@ -1977,20 +2400,68 @@ models:
|
|
|
1977
2400
|
|
|
1978
2401
|
- name: qwen/qwen1.5-32b
|
|
1979
2402
|
display_name: Qwen1.5 (32B)
|
|
1980
|
-
description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen
|
|
2403
|
+
description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
|
|
1981
2404
|
creator_organization_name: Qwen
|
|
1982
2405
|
access: open
|
|
1983
|
-
release_date: 2024-02
|
|
2406
|
+
release_date: 2024-04-02
|
|
1984
2407
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1985
2408
|
|
|
1986
2409
|
- name: qwen/qwen1.5-72b
|
|
1987
2410
|
display_name: Qwen1.5 (72B)
|
|
1988
|
-
description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen
|
|
2411
|
+
description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
1989
2412
|
creator_organization_name: Qwen
|
|
1990
2413
|
access: open
|
|
1991
2414
|
release_date: 2024-02-05
|
|
1992
2415
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1993
2416
|
|
|
2417
|
+
- name: qwen/qwen1.5-7b-chat
|
|
2418
|
+
display_name: Qwen1.5 Chat (7B)
|
|
2419
|
+
description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
2420
|
+
creator_organization_name: Qwen
|
|
2421
|
+
access: open
|
|
2422
|
+
release_date: 2024-02-05
|
|
2423
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2424
|
+
|
|
2425
|
+
- name: qwen/qwen1.5-14b-chat
|
|
2426
|
+
display_name: Qwen1.5 Chat (14B)
|
|
2427
|
+
description: 14B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
2428
|
+
creator_organization_name: Qwen
|
|
2429
|
+
access: open
|
|
2430
|
+
release_date: 2024-02-05
|
|
2431
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2432
|
+
|
|
2433
|
+
- name: qwen/qwen1.5-32b-chat
|
|
2434
|
+
display_name: Qwen1.5 Chat (32B)
|
|
2435
|
+
description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
|
|
2436
|
+
creator_organization_name: Qwen
|
|
2437
|
+
access: open
|
|
2438
|
+
release_date: 2024-04-02
|
|
2439
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2440
|
+
|
|
2441
|
+
- name: qwen/qwen1.5-72b-chat
|
|
2442
|
+
display_name: Qwen1.5 Chat (72B)
|
|
2443
|
+
description: 72B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
|
|
2444
|
+
creator_organization_name: Qwen
|
|
2445
|
+
access: open
|
|
2446
|
+
release_date: 2024-02-05
|
|
2447
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2448
|
+
|
|
2449
|
+
- name: qwen/qwen1.5-110b-chat
|
|
2450
|
+
display_name: Qwen1.5 Chat (110B)
|
|
2451
|
+
description: 110B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 110B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-110b/))
|
|
2452
|
+
creator_organization_name: Qwen
|
|
2453
|
+
access: open
|
|
2454
|
+
release_date: 2024-04-25
|
|
2455
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2456
|
+
|
|
2457
|
+
- name: qwen/qwen2-72b-instruct
|
|
2458
|
+
display_name: Qwen2 Instruct (72B)
|
|
2459
|
+
description: 72B-parameter chat version of the large language model series, Qwen2. Qwen2 uses Group Query Attention (GQA) and has extended context length support up to 128K tokens. ([blog](https://qwenlm.github.io/blog/qwen2/))
|
|
2460
|
+
creator_organization_name: Qwen
|
|
2461
|
+
access: open
|
|
2462
|
+
release_date: 2024-06-07
|
|
2463
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2464
|
+
|
|
1994
2465
|
- name: qwen/qwen-vl
|
|
1995
2466
|
display_name: Qwen-VL
|
|
1996
2467
|
description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
@@ -2007,6 +2478,43 @@ models:
|
|
|
2007
2478
|
release_date: 2023-08-24
|
|
2008
2479
|
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2009
2480
|
|
|
2481
|
+
# SAIL (Sea AI Lab)
|
|
2482
|
+
- name: sail/sailor-7b
|
|
2483
|
+
display_name: Sailor (7B)
|
|
2484
|
+
description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
|
|
2485
|
+
creator_organization_name: SAIL
|
|
2486
|
+
access: open
|
|
2487
|
+
num_parameters: 7000000000
|
|
2488
|
+
release_date: 2024-04-04
|
|
2489
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2490
|
+
|
|
2491
|
+
- name: sail/sailor-7b-chat
|
|
2492
|
+
display_name: Sailor Chat (7B)
|
|
2493
|
+
description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
|
|
2494
|
+
creator_organization_name: SAIL
|
|
2495
|
+
access: open
|
|
2496
|
+
num_parameters: 7000000000
|
|
2497
|
+
release_date: 2024-04-04
|
|
2498
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2499
|
+
|
|
2500
|
+
- name: sail/sailor-14b
|
|
2501
|
+
display_name: Sailor (14B)
|
|
2502
|
+
description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
|
|
2503
|
+
creator_organization_name: SAIL
|
|
2504
|
+
access: open
|
|
2505
|
+
num_parameters: 14000000000
|
|
2506
|
+
release_date: 2024-04-04
|
|
2507
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2508
|
+
|
|
2509
|
+
- name: sail/sailor-14b-chat
|
|
2510
|
+
display_name: Sailor Chat (14B)
|
|
2511
|
+
description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
|
|
2512
|
+
creator_organization_name: SAIL
|
|
2513
|
+
access: open
|
|
2514
|
+
num_parameters: 14000000000
|
|
2515
|
+
release_date: 2024-04-04
|
|
2516
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2517
|
+
|
|
2010
2518
|
# Salesforce
|
|
2011
2519
|
- name: salesforce/codegen # NOT SUPPORTED
|
|
2012
2520
|
display_name: CodeGen (16B)
|
|
@@ -2017,6 +2525,125 @@ models:
|
|
|
2017
2525
|
release_date: 2022-03-25
|
|
2018
2526
|
tags: [] # TODO: add tags
|
|
2019
2527
|
|
|
2528
|
+
# SambaNova
|
|
2529
|
+
- name: sambanova/sambalingo-thai-base
|
|
2530
|
+
display_name: SambaLingo-Thai-Base
|
|
2531
|
+
description: SambaLingo-Thai-Base is a pretrained bi-lingual Thai and English model that adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2532
|
+
creator_organization_name: SambaLingo
|
|
2533
|
+
access: open
|
|
2534
|
+
num_parameters: 7000000000
|
|
2535
|
+
release_date: 2024-04-08
|
|
2536
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2537
|
+
|
|
2538
|
+
- name: sambanova/sambalingo-thai-chat
|
|
2539
|
+
display_name: SambaLingo-Thai-Chat
|
|
2540
|
+
description: SambaLingo-Thai-Chat is a chat model trained using direct preference optimization on SambaLingo-Thai-Base. SambaLingo-Thai-Base adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2541
|
+
creator_organization_name: SambaLingo
|
|
2542
|
+
access: open
|
|
2543
|
+
num_parameters: 7000000000
|
|
2544
|
+
release_date: 2024-04-08
|
|
2545
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2546
|
+
|
|
2547
|
+
- name: sambanova/sambalingo-thai-base-70b
|
|
2548
|
+
display_name: SambaLingo-Thai-Base-70B
|
|
2549
|
+
description: SambaLingo-Thai-Base-70B is a pretrained bi-lingual Thai and English model that adapts Llama 2 (70B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2550
|
+
creator_organization_name: SambaLingo
|
|
2551
|
+
access: open
|
|
2552
|
+
num_parameters: 70000000000
|
|
2553
|
+
release_date: 2024-04-08
|
|
2554
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2555
|
+
|
|
2556
|
+
- name: sambanova/sambalingo-thai-chat-70b
|
|
2557
|
+
display_name: SambaLingo-Thai-Chat-70B
|
|
2558
|
+
description: SambaLingo-Thai-Chat-70B is a chat model trained using direct preference optimization on SambaLingo-Thai-Base-70B. SambaLingo-Thai-Base-70B adapts Llama 2 (7B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2559
|
+
creator_organization_name: SambaLingo
|
|
2560
|
+
access: open
|
|
2561
|
+
num_parameters: 70000000000
|
|
2562
|
+
release_date: 2024-04-08
|
|
2563
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2564
|
+
|
|
2565
|
+
# SCB10X
|
|
2566
|
+
- name: scb10x/typhoon-7b
|
|
2567
|
+
display_name: Typhoon (7B)
|
|
2568
|
+
description: Typhoon (7B) is pretrained Thai large language model with 7 billion parameters based on Mistral 7B. ([paper](https://arxiv.org/abs/2312.13951))
|
|
2569
|
+
creator_organization_name: SCB10X
|
|
2570
|
+
access: open
|
|
2571
|
+
num_parameters: 7000000000
|
|
2572
|
+
release_date: 2023-12-21
|
|
2573
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2574
|
+
|
|
2575
|
+
- name: scb10x/typhoon-v1.5-8b
|
|
2576
|
+
display_name: Typhoon v1.5 (8B)
|
|
2577
|
+
description: Typhoon v1.5 (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2578
|
+
creator_organization_name: SCB10X
|
|
2579
|
+
access: open
|
|
2580
|
+
num_parameters: 8000000000
|
|
2581
|
+
release_date: 2024-05-08
|
|
2582
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2583
|
+
|
|
2584
|
+
- name: scb10x/typhoon-v1.5-8b-instruct
|
|
2585
|
+
display_name: Typhoon v1.5 Instruct (8B)
|
|
2586
|
+
description: Typhoon v1.5 Instruct (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2587
|
+
creator_organization_name: SCB10X
|
|
2588
|
+
access: open
|
|
2589
|
+
num_parameters: 8000000000
|
|
2590
|
+
release_date: 2024-05-08
|
|
2591
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2592
|
+
|
|
2593
|
+
- name: scb10x/typhoon-v1.5-72b
|
|
2594
|
+
display_name: Typhoon v1.5 (72B)
|
|
2595
|
+
description: Typhoon v1.5 (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2596
|
+
creator_organization_name: SCB10X
|
|
2597
|
+
access: open
|
|
2598
|
+
num_parameters: 72000000000
|
|
2599
|
+
release_date: 2024-05-08
|
|
2600
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2601
|
+
|
|
2602
|
+
- name: scb10x/typhoon-v1.5-72b-instruct
|
|
2603
|
+
display_name: Typhoon v1.5 Instruct (72B)
|
|
2604
|
+
description: Typhoon v1.5 Instruct (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2605
|
+
creator_organization_name: SCB10X
|
|
2606
|
+
access: open
|
|
2607
|
+
num_parameters: 72000000000
|
|
2608
|
+
release_date: 2024-05-08
|
|
2609
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2610
|
+
|
|
2611
|
+
- name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
|
|
2612
|
+
display_name: Typhoon 1.5X instruct (8B)
|
|
2613
|
+
description: Llama-3-Typhoon-1.5X-8B-instruct is a 8 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
|
|
2614
|
+
creator_organization_name: SCB10X
|
|
2615
|
+
access: open
|
|
2616
|
+
num_parameters: 8000000000
|
|
2617
|
+
release_date: 2024-05-29
|
|
2618
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2619
|
+
|
|
2620
|
+
- name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
|
|
2621
|
+
display_name: Typhoon 1.5X instruct (70B)
|
|
2622
|
+
description: Llama-3-Typhoon-1.5X-70B-instruct is a 70 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
|
|
2623
|
+
creator_organization_name: SCB10X
|
|
2624
|
+
access: open
|
|
2625
|
+
num_parameters: 70000000000
|
|
2626
|
+
release_date: 2024-05-29
|
|
2627
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2628
|
+
|
|
2629
|
+
# Alibaba DAMO Academy
|
|
2630
|
+
- name: damo/seallm-7b-v2
|
|
2631
|
+
display_name: SeaLLM v2 (7B)
|
|
2632
|
+
description: SeaLLM v2 is a multilingual LLM for Southeast Asian (SEA) languages trained from Mistral (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
|
|
2633
|
+
creator_organization_name: Alibaba DAMO Academy
|
|
2634
|
+
access: open
|
|
2635
|
+
num_parameters: 7000000000
|
|
2636
|
+
release_date: 2024-02-02
|
|
2637
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2638
|
+
|
|
2639
|
+
- name: damo/seallm-7b-v2.5
|
|
2640
|
+
display_name: SeaLLM v2.5 (7B)
|
|
2641
|
+
description: SeaLLM is a multilingual LLM for Southeast Asian (SEA) languages trained from Gemma (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
|
|
2642
|
+
creator_organization_name: Alibaba DAMO Academy
|
|
2643
|
+
access: open
|
|
2644
|
+
num_parameters: 7000000000
|
|
2645
|
+
release_date: 2024-04-12
|
|
2646
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2020
2647
|
|
|
2021
2648
|
# Snowflake
|
|
2022
2649
|
- name: snowflake/snowflake-arctic-instruct
|
|
@@ -2187,7 +2814,7 @@ models:
|
|
|
2187
2814
|
release_date: 2022-08-04
|
|
2188
2815
|
# Inference with echo=True is not feasible -- in the prompt encoding phase, they use
|
|
2189
2816
|
# bidirectional attention and do not perform predictions on them.
|
|
2190
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
2817
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
2191
2818
|
|
|
2192
2819
|
- name: tsinghua/codegeex # NOT SUPPORTED
|
|
2193
2820
|
display_name: CodeGeeX (13B)
|
|
@@ -2222,7 +2849,6 @@ models:
|
|
|
2222
2849
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2223
2850
|
|
|
2224
2851
|
- name: writer/palmyra-instruct-30
|
|
2225
|
-
deprecated: true # Internal error
|
|
2226
2852
|
display_name: InstructPalmyra (30B)
|
|
2227
2853
|
description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
|
|
2228
2854
|
creator_organization_name: Writer
|
|
@@ -2230,10 +2856,9 @@ models:
|
|
|
2230
2856
|
num_parameters: 30000000000
|
|
2231
2857
|
release_date: 2023-02-16
|
|
2232
2858
|
# Does not support echo
|
|
2233
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2859
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2234
2860
|
|
|
2235
2861
|
- name: writer/palmyra-e
|
|
2236
|
-
deprecated: true # Internal error
|
|
2237
2862
|
display_name: Palmyra E (30B)
|
|
2238
2863
|
description: Palmyra E (30B)
|
|
2239
2864
|
creator_organization_name: Writer
|
|
@@ -2241,7 +2866,7 @@ models:
|
|
|
2241
2866
|
num_parameters: 30000000000
|
|
2242
2867
|
release_date: 2023-03-03
|
|
2243
2868
|
# Does not support echo
|
|
2244
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2869
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2245
2870
|
|
|
2246
2871
|
- name: writer/silk-road
|
|
2247
2872
|
display_name: Silk Road (35B)
|
|
@@ -2293,6 +2918,15 @@ models:
|
|
|
2293
2918
|
# Does not support echo
|
|
2294
2919
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2295
2920
|
|
|
2921
|
+
- name: writer/palmyra-vision-003
|
|
2922
|
+
display_name: Palmyra Vision 003
|
|
2923
|
+
description: Palmyra Vision 003 (internal only)
|
|
2924
|
+
creator_organization_name: Writer
|
|
2925
|
+
access: limited
|
|
2926
|
+
num_parameters: 5000000000
|
|
2927
|
+
release_date: 2024-05-24
|
|
2928
|
+
# Does not support echo
|
|
2929
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
2296
2930
|
|
|
2297
2931
|
|
|
2298
2932
|
# Yandex
|
|
@@ -2304,3 +2938,65 @@ models:
|
|
|
2304
2938
|
num_parameters: 100000000000
|
|
2305
2939
|
release_date: 2022-06-23
|
|
2306
2940
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
2941
|
+
|
|
2942
|
+
# Reka
|
|
2943
|
+
- name: reka/reka-core
|
|
2944
|
+
display_name: Reka-Core
|
|
2945
|
+
description: Reka-Core
|
|
2946
|
+
creator_organization_name: Reka AI
|
|
2947
|
+
access: limited
|
|
2948
|
+
release_date: 2024-04-18
|
|
2949
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2950
|
+
|
|
2951
|
+
- name: reka/reka-core-20240415
|
|
2952
|
+
display_name: Reka-Core-20240415
|
|
2953
|
+
description: Reka-Core-20240415
|
|
2954
|
+
creator_organization_name: Reka AI
|
|
2955
|
+
access: limited
|
|
2956
|
+
release_date: 2024-04-18
|
|
2957
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2958
|
+
|
|
2959
|
+
- name: reka/reka-core-20240501
|
|
2960
|
+
display_name: Reka-Core-20240501
|
|
2961
|
+
description: Reka-Core-20240501
|
|
2962
|
+
creator_organization_name: Reka AI
|
|
2963
|
+
access: limited
|
|
2964
|
+
release_date: 2024-05-01
|
|
2965
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2966
|
+
|
|
2967
|
+
- name: reka/reka-flash
|
|
2968
|
+
display_name: Reka-Flash (21B)
|
|
2969
|
+
description: Reka-Flash (21B)
|
|
2970
|
+
creator_organization_name: Reka AI
|
|
2971
|
+
access: limited
|
|
2972
|
+
num_parameters: 21000000000
|
|
2973
|
+
release_date: 2024-04-18
|
|
2974
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2975
|
+
|
|
2976
|
+
- name: reka/reka-flash-20240226
|
|
2977
|
+
display_name: Reka-Flash-20240226 (21B)
|
|
2978
|
+
description: Reka-Flash-20240226 (21B)
|
|
2979
|
+
creator_organization_name: Reka AI
|
|
2980
|
+
access: limited
|
|
2981
|
+
num_parameters: 21000000000
|
|
2982
|
+
release_date: 2024-04-18
|
|
2983
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2984
|
+
|
|
2985
|
+
- name: reka/reka-edge
|
|
2986
|
+
display_name: Reka-Edge (7B)
|
|
2987
|
+
description: Reka-Edge (7B)
|
|
2988
|
+
creator_organization_name: Reka AI
|
|
2989
|
+
access: limited
|
|
2990
|
+
num_parameters: 7000000000
|
|
2991
|
+
release_date: 2024-04-18
|
|
2992
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2993
|
+
|
|
2994
|
+
- name: reka/reka-edge-20240208
|
|
2995
|
+
display_name: Reka-Edge-20240208 (7B)
|
|
2996
|
+
description: Reka-Edge-20240208 (7B)
|
|
2997
|
+
creator_organization_name: Reka AI
|
|
2998
|
+
access: limited
|
|
2999
|
+
num_parameters: 7000000000
|
|
3000
|
+
release_date: 2024-04-18
|
|
3001
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
3002
|
+
|