crfm-helm 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +81 -112
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +165 -155
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +57 -0
- helm/benchmark/annotation/call_center_annotator.py +258 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +55 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +37 -45
- helm/benchmark/annotation/medication_qa_annotator.py +36 -44
- helm/benchmark/annotation/model_as_judge.py +96 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +50 -0
- helm/benchmark/annotation/xstest_annotator.py +100 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/safety_metrics.py +79 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +17 -3
- helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/create_plots.py +1 -1
- helm/benchmark/presentation/schema.py +3 -0
- helm/benchmark/presentation/summarize.py +106 -256
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/presentation/test_summarize.py +145 -3
- helm/benchmark/run.py +15 -0
- helm/benchmark/run_expander.py +83 -30
- helm/benchmark/run_specs/bhasa_run_specs.py +652 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
- helm/benchmark/run_specs/experimental_run_specs.py +52 -0
- helm/benchmark/run_specs/finance_run_specs.py +82 -1
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +100 -24
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1942 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/raft_scenario.py +1 -1
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +750 -750
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +55 -9
- helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
- helm/benchmark/static/schema_legal.yaml +566 -0
- helm/benchmark/static/schema_safety.yaml +266 -0
- helm/benchmark/static/schema_tables.yaml +149 -8
- helm/benchmark/static/schema_thai.yaml +21 -0
- helm/benchmark/static/schema_vhelm.yaml +137 -101
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/benchmark/window_services/tokenizer_service.py +0 -5
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +7 -19
- helm/clients/huggingface_client.py +38 -37
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +18 -4
- helm/clients/palmyra_client.py +24 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/test_client.py +4 -6
- helm/clients/together_client.py +22 -0
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/palmyra_vision_client.py +28 -13
- helm/common/cache.py +8 -30
- helm/common/images_utils.py +6 -0
- helm/common/key_value_store.py +9 -9
- helm/common/mongo_key_value_store.py +5 -4
- helm/common/request.py +16 -0
- helm/common/test_cache.py +1 -48
- helm/common/tokenization_request.py +0 -9
- helm/config/model_deployments.yaml +444 -329
- helm/config/model_metadata.yaml +513 -111
- helm/config/tokenizer_configs.yaml +140 -11
- helm/proxy/example_queries.py +14 -21
- helm/proxy/server.py +0 -9
- helm/proxy/services/remote_service.py +0 -6
- helm/proxy/services/server_service.py +6 -20
- helm/proxy/services/service.py +0 -6
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/cohere_tokenizer.py +0 -75
- helm/tokenizers/huggingface_tokenizer.py +0 -1
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
- helm/benchmark/data_overlap/export_scenario_text.py +0 -119
- helm/benchmark/data_overlap/light_scenario.py +0 -60
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
- helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/{data_overlap → scenarios/vision_language/image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct/webpage}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -31,50 +31,41 @@ models:
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
# AI21 Labs
|
|
34
|
-
- name: ai21/j1-jumbo
|
|
34
|
+
- name: ai21/j1-jumbo
|
|
35
35
|
display_name: J1-Jumbo v1 (178B)
|
|
36
36
|
description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
37
37
|
creator_organization_name: AI21 Labs
|
|
38
38
|
access: limited
|
|
39
39
|
num_parameters: 178000000000
|
|
40
40
|
release_date: 2021-08-11
|
|
41
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
41
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
42
42
|
|
|
43
|
-
- name: ai21/j1-large
|
|
43
|
+
- name: ai21/j1-large
|
|
44
44
|
display_name: J1-Large v1 (7.5B)
|
|
45
45
|
description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
46
46
|
creator_organization_name: AI21 Labs
|
|
47
47
|
access: limited
|
|
48
48
|
num_parameters: 7500000000
|
|
49
49
|
release_date: 2021-08-11
|
|
50
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
50
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
51
51
|
|
|
52
|
-
- name: ai21/j1-grande
|
|
52
|
+
- name: ai21/j1-grande
|
|
53
53
|
display_name: J1-Grande v1 (17B)
|
|
54
54
|
description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
55
55
|
creator_organization_name: AI21 Labs
|
|
56
56
|
access: limited
|
|
57
57
|
num_parameters: 17000000000
|
|
58
58
|
release_date: 2022-05-03
|
|
59
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
59
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
60
60
|
|
|
61
|
-
- name: ai21/j1-grande-v2-beta
|
|
61
|
+
- name: ai21/j1-grande-v2-beta
|
|
62
62
|
display_name: J1-Grande v2 beta (17B)
|
|
63
63
|
description: Jurassic-1 Grande v2 beta (17B parameters)
|
|
64
64
|
creator_organization_name: AI21 Labs
|
|
65
65
|
access: limited
|
|
66
66
|
num_parameters: 17000000000
|
|
67
67
|
release_date: 2022-10-28
|
|
68
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
69
|
-
|
|
70
|
-
- name: ai21/j2-jumbo
|
|
71
|
-
display_name: Jurassic-2 Jumbo (178B)
|
|
72
|
-
description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
73
|
-
creator_organization_name: AI21 Labs
|
|
74
|
-
access: limited
|
|
75
|
-
num_parameters: 178000000000
|
|
76
|
-
release_date: 2023-03-09
|
|
77
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
68
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
78
69
|
|
|
79
70
|
- name: ai21/j2-large
|
|
80
71
|
display_name: Jurassic-2 Large (7.5B)
|
|
@@ -83,7 +74,7 @@ models:
|
|
|
83
74
|
access: limited
|
|
84
75
|
num_parameters: 7500000000
|
|
85
76
|
release_date: 2023-03-09
|
|
86
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
77
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
87
78
|
|
|
88
79
|
- name: ai21/j2-grande
|
|
89
80
|
display_name: Jurassic-2 Grande (17B)
|
|
@@ -92,13 +83,48 @@ models:
|
|
|
92
83
|
access: limited
|
|
93
84
|
num_parameters: 17000000000
|
|
94
85
|
release_date: 2023-03-09
|
|
95
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
86
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
87
|
+
|
|
88
|
+
- name: ai21/j2-jumbo
|
|
89
|
+
display_name: Jurassic-2 Jumbo (178B)
|
|
90
|
+
description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
91
|
+
creator_organization_name: AI21 Labs
|
|
92
|
+
access: limited
|
|
93
|
+
num_parameters: 178000000000
|
|
94
|
+
release_date: 2023-03-09
|
|
95
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
96
96
|
|
|
97
97
|
# TODO(1524): Change AI21 model names
|
|
98
98
|
# - j2-jumbo -> j2-ultra
|
|
99
99
|
# - j2-grande -> j2-mid
|
|
100
100
|
# - j2-large -> j2-light
|
|
101
101
|
|
|
102
|
+
- name: ai21/jamba-instruct
|
|
103
|
+
display_name: Jamba Instruct
|
|
104
|
+
description: Jamba Instruct is an instruction tuned version of Jamba, which uses a hybrid Transformer-Mamba mixture-of-experts (MoE) architecture that interleaves blocks of Transformer and Mamba layers. ([blog](https://www.ai21.com/blog/announcing-jamba-instruct))
|
|
105
|
+
creator_organization_name: AI21 Labs
|
|
106
|
+
access: limited
|
|
107
|
+
num_parameters: 52000000000
|
|
108
|
+
release_date: 2024-05-02
|
|
109
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
110
|
+
|
|
111
|
+
- name: ai21/jamba-1.5-mini
|
|
112
|
+
display_name: Jamba 1.5 Mini
|
|
113
|
+
description: Jamba 1.5 Mini is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
|
|
114
|
+
creator_organization_name: AI21 Labs
|
|
115
|
+
access: open
|
|
116
|
+
num_parameters: 51600000000
|
|
117
|
+
release_date: 2024-08-22
|
|
118
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
119
|
+
|
|
120
|
+
- name: ai21/jamba-1.5-large
|
|
121
|
+
display_name: Jamba 1.5 Large
|
|
122
|
+
description: Jamba 1.5 Large is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
|
|
123
|
+
creator_organization_name: AI21 Labs
|
|
124
|
+
access: open
|
|
125
|
+
num_parameters: 399000000000
|
|
126
|
+
release_date: 2024-08-22
|
|
127
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
102
128
|
|
|
103
129
|
# AI Singapore
|
|
104
130
|
- name: aisingapore/sea-lion-7b
|
|
@@ -117,8 +143,25 @@ models:
|
|
|
117
143
|
access: open
|
|
118
144
|
num_parameters: 7000000000
|
|
119
145
|
release_date: 2023-02-24
|
|
120
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
146
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
121
147
|
|
|
148
|
+
- name: aisingapore/llama3-8b-cpt-sea-lionv2-base
|
|
149
|
+
display_name: Llama 3 CPT SEA-Lion v2 (8B)
|
|
150
|
+
description: Llama 3 CPT SEA-Lion v2 (8B) is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
|
|
151
|
+
creator_organization_name: AI Singapore
|
|
152
|
+
access: open
|
|
153
|
+
num_parameters: 80300000000
|
|
154
|
+
release_date: 2024-07-31
|
|
155
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
156
|
+
|
|
157
|
+
- name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
|
|
158
|
+
display_name: Llama 3 CPT SEA-Lion v2.1 Instruct (8B)
|
|
159
|
+
description: Llama 3 CPT SEA-Lion v2.1 Instruct (8B) is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
|
|
160
|
+
creator_organization_name: AI Singapore
|
|
161
|
+
access: open
|
|
162
|
+
num_parameters: 80300000000
|
|
163
|
+
release_date: 2024-08-21
|
|
164
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
122
165
|
|
|
123
166
|
# Aleph Alpha
|
|
124
167
|
# Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
|
|
@@ -272,7 +315,14 @@ models:
|
|
|
272
315
|
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
273
316
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
274
317
|
|
|
275
|
-
|
|
318
|
+
- name: anthropic/claude-3-5-sonnet-20240620
|
|
319
|
+
display_name: Claude 3.5 Sonnet (20240620)
|
|
320
|
+
description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
|
|
321
|
+
creator_organization_name: Anthropic
|
|
322
|
+
access: limited
|
|
323
|
+
release_date: 2024-06-20
|
|
324
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
325
|
+
|
|
276
326
|
- name: anthropic/stanford-online-all-v4-s3
|
|
277
327
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
278
328
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -280,7 +330,7 @@ models:
|
|
|
280
330
|
access: closed
|
|
281
331
|
num_parameters: 52000000000
|
|
282
332
|
release_date: 2021-12-01
|
|
283
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
333
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
284
334
|
|
|
285
335
|
|
|
286
336
|
|
|
@@ -401,16 +451,16 @@ models:
|
|
|
401
451
|
access: limited
|
|
402
452
|
num_parameters: 52400000000
|
|
403
453
|
release_date: 2022-06-09
|
|
404
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
454
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
405
455
|
|
|
406
|
-
- name: cohere/large-20220720
|
|
456
|
+
- name: cohere/large-20220720
|
|
407
457
|
display_name: Cohere large v20220720 (13.1B)
|
|
408
458
|
description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
|
|
409
459
|
creator_organization_name: Cohere
|
|
410
460
|
access: limited
|
|
411
461
|
num_parameters: 13100000000
|
|
412
462
|
release_date: 2022-07-20
|
|
413
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
463
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
414
464
|
|
|
415
465
|
- name: cohere/medium-20220720
|
|
416
466
|
display_name: Cohere medium v20220720 (6.1B)
|
|
@@ -419,16 +469,16 @@ models:
|
|
|
419
469
|
access: limited
|
|
420
470
|
num_parameters: 6100000000
|
|
421
471
|
release_date: 2022-07-20
|
|
422
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
472
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
423
473
|
|
|
424
|
-
- name: cohere/small-20220720
|
|
474
|
+
- name: cohere/small-20220720
|
|
425
475
|
display_name: Cohere small v20220720 (410M)
|
|
426
476
|
description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
|
|
427
477
|
creator_organization_name: Cohere
|
|
428
478
|
access: limited
|
|
429
479
|
num_parameters: 410000000
|
|
430
480
|
release_date: 2022-07-20
|
|
431
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
481
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
432
482
|
|
|
433
483
|
- name: cohere/xlarge-20221108
|
|
434
484
|
display_name: Cohere xlarge v20221108 (52.4B)
|
|
@@ -437,34 +487,34 @@ models:
|
|
|
437
487
|
access: limited
|
|
438
488
|
num_parameters: 52400000000
|
|
439
489
|
release_date: 2022-11-08
|
|
440
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
490
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
441
491
|
|
|
442
|
-
- name: cohere/medium-20221108
|
|
492
|
+
- name: cohere/medium-20221108
|
|
443
493
|
display_name: Cohere medium v20221108 (6.1B)
|
|
444
494
|
description: Cohere medium v20221108 (6.1B parameters)
|
|
445
495
|
creator_organization_name: Cohere
|
|
446
496
|
access: limited
|
|
447
497
|
num_parameters: 6100000000
|
|
448
498
|
release_date: 2022-11-08
|
|
449
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
499
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
450
500
|
|
|
451
|
-
- name: cohere/command-medium-beta
|
|
501
|
+
- name: cohere/command-medium-beta
|
|
452
502
|
display_name: Command beta (6.1B)
|
|
453
|
-
description:
|
|
503
|
+
description: Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
|
|
454
504
|
creator_organization_name: Cohere
|
|
455
505
|
access: limited
|
|
456
506
|
num_parameters: 6100000000
|
|
457
507
|
release_date: 2022-11-08
|
|
458
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
508
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
459
509
|
|
|
460
|
-
- name: cohere/command-xlarge-beta
|
|
510
|
+
- name: cohere/command-xlarge-beta
|
|
461
511
|
display_name: Command beta (52.4B)
|
|
462
|
-
description:
|
|
512
|
+
description: Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
|
|
463
513
|
creator_organization_name: Cohere
|
|
464
514
|
access: limited
|
|
465
515
|
num_parameters: 52400000000
|
|
466
516
|
release_date: 2022-11-08
|
|
467
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
517
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
468
518
|
|
|
469
519
|
- name: cohere/command
|
|
470
520
|
display_name: Command
|
|
@@ -837,8 +887,7 @@ models:
|
|
|
837
887
|
|
|
838
888
|
- name: google/gemma-2b
|
|
839
889
|
display_name: Gemma (2B)
|
|
840
|
-
|
|
841
|
-
description: TBD
|
|
890
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
842
891
|
creator_organization_name: Google
|
|
843
892
|
access: open
|
|
844
893
|
release_date: 2024-02-21
|
|
@@ -846,8 +895,7 @@ models:
|
|
|
846
895
|
|
|
847
896
|
- name: google/gemma-2b-it
|
|
848
897
|
display_name: Gemma Instruct (2B)
|
|
849
|
-
|
|
850
|
-
description: TBD
|
|
898
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
851
899
|
creator_organization_name: Google
|
|
852
900
|
access: open
|
|
853
901
|
release_date: 2024-02-21
|
|
@@ -855,8 +903,7 @@ models:
|
|
|
855
903
|
|
|
856
904
|
- name: google/gemma-7b
|
|
857
905
|
display_name: Gemma (7B)
|
|
858
|
-
|
|
859
|
-
description: TBD
|
|
906
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
860
907
|
creator_organization_name: Google
|
|
861
908
|
access: open
|
|
862
909
|
release_date: 2024-02-21
|
|
@@ -864,12 +911,42 @@ models:
|
|
|
864
911
|
|
|
865
912
|
- name: google/gemma-7b-it
|
|
866
913
|
display_name: Gemma Instruct (7B)
|
|
867
|
-
|
|
868
|
-
description: TBD
|
|
914
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
869
915
|
creator_organization_name: Google
|
|
870
916
|
access: open
|
|
871
917
|
release_date: 2024-02-21
|
|
872
|
-
|
|
918
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
919
|
+
|
|
920
|
+
- name: google/gemma-2-9b
|
|
921
|
+
display_name: Gemma 2 (9B)
|
|
922
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
923
|
+
creator_organization_name: Google
|
|
924
|
+
access: open
|
|
925
|
+
release_date: 2024-06-27
|
|
926
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
927
|
+
|
|
928
|
+
- name: google/gemma-2-9b-it
|
|
929
|
+
display_name: Gemma 2 Instruct (9B)
|
|
930
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
931
|
+
creator_organization_name: Google
|
|
932
|
+
access: open
|
|
933
|
+
release_date: 2024-06-27
|
|
934
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
935
|
+
|
|
936
|
+
- name: google/gemma-2-27b
|
|
937
|
+
display_name: Gemma 2 (27B)
|
|
938
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
939
|
+
creator_organization_name: Google
|
|
940
|
+
access: open
|
|
941
|
+
release_date: 2024-06-27
|
|
942
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
943
|
+
|
|
944
|
+
- name: google/gemma-2-27b-it
|
|
945
|
+
display_name: Gemma 2 Instruct (27B)
|
|
946
|
+
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
|
|
947
|
+
creator_organization_name: Google
|
|
948
|
+
access: open
|
|
949
|
+
release_date: 2024-06-27
|
|
873
950
|
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
874
951
|
|
|
875
952
|
- name: google/paligemma-3b-mix-224
|
|
@@ -1361,25 +1438,115 @@ models:
|
|
|
1361
1438
|
|
|
1362
1439
|
- name: meta/llama-3-8b
|
|
1363
1440
|
display_name: Llama 3 (8B)
|
|
1364
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1441
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1365
1442
|
creator_organization_name: Meta
|
|
1366
1443
|
access: open
|
|
1367
1444
|
num_parameters: 8000000000
|
|
1368
1445
|
release_date: 2024-04-18
|
|
1369
1446
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1447
|
+
|
|
1448
|
+
- name: meta/llama-3-8b-instruct-turbo
|
|
1449
|
+
display_name: Llama 3 Instruct Turbo (8B)
|
|
1450
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1451
|
+
creator_organization_name: Meta
|
|
1452
|
+
access: open
|
|
1453
|
+
num_parameters: 8000000000
|
|
1454
|
+
release_date: 2024-07-18
|
|
1455
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1456
|
+
|
|
1457
|
+
- name: meta/llama-3-8b-instruct-lite
|
|
1458
|
+
display_name: Llama 3 Instruct Lite (8B)
|
|
1459
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1460
|
+
creator_organization_name: Meta
|
|
1461
|
+
access: open
|
|
1462
|
+
num_parameters: 8000000000
|
|
1463
|
+
release_date: 2024-07-18
|
|
1464
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1370
1465
|
|
|
1371
1466
|
- name: meta/llama-3-70b
|
|
1372
1467
|
display_name: Llama 3 (70B)
|
|
1373
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1468
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1374
1469
|
creator_organization_name: Meta
|
|
1375
1470
|
access: open
|
|
1376
1471
|
num_parameters: 70000000000
|
|
1377
1472
|
release_date: 2024-04-18
|
|
1378
1473
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1474
|
+
|
|
1475
|
+
- name: meta/llama-3-70b-instruct-turbo
|
|
1476
|
+
display_name: Llama 3 Instruct Turbo (70B)
|
|
1477
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1478
|
+
creator_organization_name: Meta
|
|
1479
|
+
access: open
|
|
1480
|
+
num_parameters: 70000000000
|
|
1481
|
+
release_date: 2024-07-18
|
|
1482
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1483
|
+
|
|
1484
|
+
- name: meta/llama-3-70b-instruct-lite
|
|
1485
|
+
display_name: Llama 3 Instruct Lite (70B)
|
|
1486
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1487
|
+
creator_organization_name: Meta
|
|
1488
|
+
access: open
|
|
1489
|
+
num_parameters: 70000000000
|
|
1490
|
+
release_date: 2024-07-18
|
|
1491
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1492
|
+
|
|
1493
|
+
- name: meta/llama-3.1-8b-instruct-turbo
|
|
1494
|
+
display_name: Llama 3.1 Instruct Turbo (8B)
|
|
1495
|
+
description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1496
|
+
creator_organization_name: Meta
|
|
1497
|
+
access: open
|
|
1498
|
+
num_parameters: 8000000000
|
|
1499
|
+
release_date: 2024-07-23
|
|
1500
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1501
|
+
|
|
1502
|
+
- name: meta/llama-3.1-70b-instruct-turbo
|
|
1503
|
+
display_name: Llama 3.1 Instruct Turbo (70B)
|
|
1504
|
+
description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1505
|
+
creator_organization_name: Meta
|
|
1506
|
+
access: open
|
|
1507
|
+
num_parameters: 70000000000
|
|
1508
|
+
release_date: 2024-07-23
|
|
1509
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1510
|
+
|
|
1511
|
+
- name: meta/llama-3.1-405b-instruct-turbo
|
|
1512
|
+
display_name: Llama 3.1 Instruct Turbo (405B)
|
|
1513
|
+
description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1514
|
+
creator_organization_name: Meta
|
|
1515
|
+
access: open
|
|
1516
|
+
num_parameters: 405000000000
|
|
1517
|
+
release_date: 2024-07-23
|
|
1518
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1519
|
+
|
|
1520
|
+
- name: meta/llama-3.2-3b-instruct-turbo
|
|
1521
|
+
display_name: Llama 3.2 Instruct Turbo (3B)
|
|
1522
|
+
description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1523
|
+
creator_organization_name: Meta
|
|
1524
|
+
access: open
|
|
1525
|
+
num_parameters: 3210000000
|
|
1526
|
+
release_date: 2024-09-25
|
|
1527
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1528
|
+
|
|
1529
|
+
- name: meta/llama-3.2-11b-vision-instruct-turbo
|
|
1530
|
+
display_name: Llama 3.2 Vision Instruct Turbo (11B)
|
|
1531
|
+
description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1532
|
+
creator_organization_name: Meta
|
|
1533
|
+
access: open
|
|
1534
|
+
num_parameters: 10700000000
|
|
1535
|
+
release_date: 2024-09-25
|
|
1536
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1537
|
+
|
|
1538
|
+
- name: meta/llama-3.2-90b-vision-instruct-turbo
|
|
1539
|
+
display_name: Llama 3.2 Vision Instruct Turbo (90B)
|
|
1540
|
+
description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1541
|
+
creator_organization_name: Meta
|
|
1542
|
+
access: open
|
|
1543
|
+
num_parameters: 88600000000
|
|
1544
|
+
release_date: 2024-09-25
|
|
1545
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1379
1546
|
|
|
1380
1547
|
- name: meta/llama-3-8b-chat
|
|
1381
1548
|
display_name: Llama 3 Instruct (8B)
|
|
1382
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1549
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1383
1550
|
creator_organization_name: Meta
|
|
1384
1551
|
access: open
|
|
1385
1552
|
num_parameters: 8000000000
|
|
@@ -1388,7 +1555,7 @@ models:
|
|
|
1388
1555
|
|
|
1389
1556
|
- name: meta/llama-3-70b-chat
|
|
1390
1557
|
display_name: Llama 3 Instruct (70B)
|
|
1391
|
-
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1558
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
1392
1559
|
creator_organization_name: Meta
|
|
1393
1560
|
access: open
|
|
1394
1561
|
num_parameters: 70000000000
|
|
@@ -1413,7 +1580,14 @@ models:
|
|
|
1413
1580
|
release_date: 2024-04-18
|
|
1414
1581
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1415
1582
|
|
|
1416
|
-
|
|
1583
|
+
- name: meta/llama-guard-3-8b
|
|
1584
|
+
display_name: Llama Guard 3 (8B)
|
|
1585
|
+
description: Llama Guard 3 is an 8B parameter Llama 3.1-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
|
|
1586
|
+
creator_organization_name: Meta
|
|
1587
|
+
access: open
|
|
1588
|
+
num_parameters: 8000000000
|
|
1589
|
+
release_date: 2024-07-23
|
|
1590
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1417
1591
|
|
|
1418
1592
|
|
|
1419
1593
|
# Microsoft/NVIDIA
|
|
@@ -1424,7 +1598,7 @@ models:
|
|
|
1424
1598
|
access: closed
|
|
1425
1599
|
num_parameters: 530000000000
|
|
1426
1600
|
release_date: 2022-01-28
|
|
1427
|
-
tags: [
|
|
1601
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1428
1602
|
|
|
1429
1603
|
- name: microsoft/TNLGv2_7B
|
|
1430
1604
|
display_name: TNLG v2 (6.7B)
|
|
@@ -1433,7 +1607,7 @@ models:
|
|
|
1433
1607
|
access: closed
|
|
1434
1608
|
num_parameters: 6700000000
|
|
1435
1609
|
release_date: 2022-01-28
|
|
1436
|
-
tags: [
|
|
1610
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1437
1611
|
|
|
1438
1612
|
- name: microsoft/llava-1.5-7b-hf
|
|
1439
1613
|
display_name: LLaVA 1.5 (7B)
|
|
@@ -1507,6 +1681,24 @@ models:
|
|
|
1507
1681
|
release_date: 2023-10-05
|
|
1508
1682
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1509
1683
|
|
|
1684
|
+
- name: microsoft/phi-3-small-8k-instruct
|
|
1685
|
+
display_name: Phi-3 (7B)
|
|
1686
|
+
description: Phi-3-Small-8K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
|
|
1687
|
+
creator_organization_name: Microsoft
|
|
1688
|
+
access: open
|
|
1689
|
+
num_parameters: 7000000000
|
|
1690
|
+
release_date: 2024-05-21
|
|
1691
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1692
|
+
|
|
1693
|
+
- name: microsoft/phi-3-medium-4k-instruct
|
|
1694
|
+
display_name: Phi-3 (14B)
|
|
1695
|
+
description: Phi-3-Medium-4K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
|
|
1696
|
+
creator_organization_name: Microsoft
|
|
1697
|
+
access: open
|
|
1698
|
+
num_parameters: 14000000000
|
|
1699
|
+
release_date: 2024-05-21
|
|
1700
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1701
|
+
|
|
1510
1702
|
# KAIST AI
|
|
1511
1703
|
- name: kaistai/prometheus-vision-13b-v1.0-hf
|
|
1512
1704
|
display_name: LLaVA + Vicuna-v1.5 (13B)
|
|
@@ -1663,7 +1855,7 @@ models:
|
|
|
1663
1855
|
num_parameters: 46700000000
|
|
1664
1856
|
# Blog post: https://mistral.ai/news/mixtral-of-experts/
|
|
1665
1857
|
release_date: 2023-12-11
|
|
1666
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1858
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1667
1859
|
|
|
1668
1860
|
- name: mistralai/mixtral-8x22b
|
|
1669
1861
|
display_name: Mixtral (8x22B)
|
|
@@ -1694,13 +1886,11 @@ models:
|
|
|
1694
1886
|
|
|
1695
1887
|
- name: mistralai/mistral-small-2402
|
|
1696
1888
|
display_name: Mistral Small (2402)
|
|
1697
|
-
|
|
1698
|
-
description: TBD
|
|
1889
|
+
description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
1699
1890
|
creator_organization_name: Mistral AI
|
|
1700
1891
|
access: limited
|
|
1701
|
-
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1702
1892
|
release_date: 2023-02-26
|
|
1703
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1893
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1704
1894
|
|
|
1705
1895
|
- name: mistralai/mistral-medium-2312
|
|
1706
1896
|
display_name: Mistral Medium (2312)
|
|
@@ -1708,18 +1898,32 @@ models:
|
|
|
1708
1898
|
creator_organization_name: Mistral AI
|
|
1709
1899
|
access: limited
|
|
1710
1900
|
release_date: 2023-12-11
|
|
1711
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1901
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1712
1902
|
|
|
1713
1903
|
- name: mistralai/mistral-large-2402
|
|
1714
1904
|
display_name: Mistral Large (2402)
|
|
1715
|
-
|
|
1716
|
-
description: TBD
|
|
1905
|
+
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
1717
1906
|
creator_organization_name: Mistral AI
|
|
1718
1907
|
access: limited
|
|
1719
|
-
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1720
1908
|
release_date: 2023-02-26
|
|
1721
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG
|
|
1909
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1722
1910
|
|
|
1911
|
+
- name: mistralai/mistral-large-2407
|
|
1912
|
+
display_name: Mistral Large 2 (2407)
|
|
1913
|
+
description: Mistral Large 2 is a 123 billion parameter model that has a 128k context window and supports dozens of languages and 80+ coding languages. ([blog](https://mistral.ai/news/mistral-large-2407/))
|
|
1914
|
+
creator_organization_name: Mistral AI
|
|
1915
|
+
access: open
|
|
1916
|
+
num_parameters: 123000000000
|
|
1917
|
+
release_date: 2023-07-24
|
|
1918
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1919
|
+
|
|
1920
|
+
- name: mistralai/open-mistral-nemo-2407
|
|
1921
|
+
display_name: Mistral NeMo (2402)
|
|
1922
|
+
description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
|
|
1923
|
+
creator_organization_name: Mistral AI
|
|
1924
|
+
access: open
|
|
1925
|
+
release_date: 2024-07-18
|
|
1926
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1723
1927
|
|
|
1724
1928
|
# MosaicML
|
|
1725
1929
|
- name: mosaicml/mpt-7b
|
|
@@ -1798,7 +2002,13 @@ models:
|
|
|
1798
2002
|
release_date: 2019-09-17 # paper date
|
|
1799
2003
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
|
|
1800
2004
|
|
|
1801
|
-
|
|
2005
|
+
- name: nvidia/nemotron-4-340b-instruct
|
|
2006
|
+
display_name: Nemotron-4 Instruct (340B)
|
|
2007
|
+
description: Nemotron-4 Instruct (340B) is an open weights model sized to fit on a single DGX H100 with 8 GPUs when deployed in FP8 precision. 98% of the data used for model alignment was synthetically generated ([paper](https://arxiv.org/abs/2406.11704)).
|
|
2008
|
+
creator_organization_name: NVIDIA
|
|
2009
|
+
access: open
|
|
2010
|
+
release_date: 2024-06-17
|
|
2011
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1802
2012
|
|
|
1803
2013
|
# OpenAI
|
|
1804
2014
|
|
|
@@ -1836,97 +2046,95 @@ models:
|
|
|
1836
2046
|
|
|
1837
2047
|
# DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
|
|
1838
2048
|
|
|
1839
|
-
- name: openai/davinci
|
|
2049
|
+
- name: openai/davinci
|
|
1840
2050
|
display_name: davinci (175B)
|
|
1841
2051
|
description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1842
2052
|
creator_organization_name: OpenAI
|
|
1843
2053
|
access: limited
|
|
1844
2054
|
num_parameters: 175000000000
|
|
1845
2055
|
release_date: 2020-05-28
|
|
1846
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2056
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1847
2057
|
|
|
1848
|
-
- name: openai/curie
|
|
2058
|
+
- name: openai/curie
|
|
1849
2059
|
display_name: curie (6.7B)
|
|
1850
2060
|
description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1851
2061
|
creator_organization_name: OpenAI
|
|
1852
2062
|
access: limited
|
|
1853
2063
|
num_parameters: 6700000000
|
|
1854
2064
|
release_date: 2020-05-28
|
|
1855
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2065
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1856
2066
|
|
|
1857
|
-
- name: openai/babbage
|
|
2067
|
+
- name: openai/babbage
|
|
1858
2068
|
display_name: babbage (1.3B)
|
|
1859
2069
|
description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1860
2070
|
creator_organization_name: OpenAI
|
|
1861
2071
|
access: limited
|
|
1862
2072
|
num_parameters: 1300000000
|
|
1863
2073
|
release_date: 2020-05-28
|
|
1864
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2074
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1865
2075
|
|
|
1866
|
-
- name: openai/ada
|
|
2076
|
+
- name: openai/ada
|
|
1867
2077
|
display_name: ada (350M)
|
|
1868
2078
|
description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1869
2079
|
creator_organization_name: OpenAI
|
|
1870
2080
|
access: limited
|
|
1871
2081
|
num_parameters: 350000000
|
|
1872
2082
|
release_date: 2020-05-28
|
|
1873
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2083
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1874
2084
|
|
|
1875
|
-
- name: openai/text-davinci-003
|
|
2085
|
+
- name: openai/text-davinci-003
|
|
1876
2086
|
display_name: GPT-3.5 (text-davinci-003)
|
|
1877
2087
|
description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1878
2088
|
creator_organization_name: OpenAI
|
|
1879
2089
|
access: limited
|
|
1880
2090
|
num_parameters: 175000000000
|
|
1881
2091
|
release_date: 2022-11-28
|
|
1882
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2092
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1883
2093
|
|
|
1884
|
-
|
|
1885
|
-
# https://github.com/stanford-crfm/benchmarking/issues/359
|
|
1886
|
-
- name: openai/text-davinci-002 # DEPRECATED
|
|
2094
|
+
- name: openai/text-davinci-002
|
|
1887
2095
|
display_name: GPT-3.5 (text-davinci-002)
|
|
1888
2096
|
description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1889
2097
|
creator_organization_name: OpenAI
|
|
1890
2098
|
access: limited
|
|
1891
2099
|
num_parameters: 175000000000
|
|
1892
2100
|
release_date: 2022-01-27
|
|
1893
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2101
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1894
2102
|
|
|
1895
|
-
- name: openai/text-davinci-001
|
|
2103
|
+
- name: openai/text-davinci-001
|
|
1896
2104
|
display_name: GPT-3.5 (text-davinci-001)
|
|
1897
2105
|
description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1898
2106
|
creator_organization_name: OpenAI
|
|
1899
2107
|
access: limited
|
|
1900
2108
|
num_parameters: 175000000000
|
|
1901
2109
|
release_date: 2022-01-27
|
|
1902
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2110
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1903
2111
|
|
|
1904
|
-
- name: openai/text-curie-001
|
|
2112
|
+
- name: openai/text-curie-001
|
|
1905
2113
|
display_name: text-curie-001
|
|
1906
2114
|
description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1907
2115
|
creator_organization_name: OpenAI
|
|
1908
2116
|
access: limited
|
|
1909
2117
|
num_parameters: 6700000000
|
|
1910
2118
|
release_date: 2022-01-27
|
|
1911
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2119
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1912
2120
|
|
|
1913
|
-
- name: openai/text-babbage-001
|
|
2121
|
+
- name: openai/text-babbage-001
|
|
1914
2122
|
display_name: text-babbage-001
|
|
1915
2123
|
description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1916
2124
|
creator_organization_name: OpenAI
|
|
1917
2125
|
access: limited
|
|
1918
2126
|
num_parameters: 1300000000
|
|
1919
2127
|
release_date: 2022-01-27
|
|
1920
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2128
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1921
2129
|
|
|
1922
|
-
- name: openai/text-ada-001
|
|
2130
|
+
- name: openai/text-ada-001
|
|
1923
2131
|
display_name: text-ada-001
|
|
1924
2132
|
description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1925
2133
|
creator_organization_name: OpenAI
|
|
1926
2134
|
access: limited
|
|
1927
2135
|
num_parameters: 350000000
|
|
1928
2136
|
release_date: 2022-01-27
|
|
1929
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2137
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1930
2138
|
|
|
1931
2139
|
|
|
1932
2140
|
## GPT 3.5 Turbo Models
|
|
@@ -2044,16 +2252,32 @@ models:
|
|
|
2044
2252
|
creator_organization_name: OpenAI
|
|
2045
2253
|
access: limited
|
|
2046
2254
|
release_date: 2024-04-09
|
|
2047
|
-
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2255
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2048
2256
|
|
|
2049
2257
|
- name: openai/gpt-4o-2024-05-13
|
|
2050
2258
|
display_name: GPT-4o (2024-05-13)
|
|
2051
|
-
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs.
|
|
2259
|
+
description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
|
|
2052
2260
|
creator_organization_name: OpenAI
|
|
2053
2261
|
access: limited
|
|
2054
2262
|
release_date: 2024-04-09
|
|
2055
2263
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2056
2264
|
|
|
2265
|
+
- name: openai/gpt-4o-2024-08-06
|
|
2266
|
+
display_name: GPT-4o (2024-08-06)
|
|
2267
|
+
description: GPT-4o (2024-08-06) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
|
|
2268
|
+
creator_organization_name: OpenAI
|
|
2269
|
+
access: limited
|
|
2270
|
+
release_date: 2024-08-06
|
|
2271
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2272
|
+
|
|
2273
|
+
- name: openai/gpt-4o-mini-2024-07-18
|
|
2274
|
+
display_name: GPT-4o mini (2024-07-18)
|
|
2275
|
+
description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
|
|
2276
|
+
creator_organization_name: OpenAI
|
|
2277
|
+
access: limited
|
|
2278
|
+
release_date: 2024-07-18
|
|
2279
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2280
|
+
|
|
2057
2281
|
- name: openai/gpt-4-vision-preview
|
|
2058
2282
|
# According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
|
|
2059
2283
|
display_name: GPT-4V (1106 preview)
|
|
@@ -2071,33 +2295,50 @@ models:
|
|
|
2071
2295
|
release_date: 2023-11-06
|
|
2072
2296
|
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2073
2297
|
|
|
2298
|
+
## o1 Models
|
|
2299
|
+
- name: openai/o1-preview-2024-09-12
|
|
2300
|
+
display_name: o1-preview (2024-09-12)
|
|
2301
|
+
description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
2302
|
+
creator_organization_name: OpenAI
|
|
2303
|
+
access: limited
|
|
2304
|
+
release_date: 2024-09-12
|
|
2305
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2306
|
+
|
|
2307
|
+
- name: openai/o1-mini-2024-09-12
|
|
2308
|
+
display_name: o1-mini (2024-09-12)
|
|
2309
|
+
description: o1-mini is a cost-effective reasoning model for applications that require reasoning without broad world knowledge. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/))
|
|
2310
|
+
creator_organization_name: OpenAI
|
|
2311
|
+
access: limited
|
|
2312
|
+
release_date: 2024-09-12
|
|
2313
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2314
|
+
|
|
2074
2315
|
## Codex Models
|
|
2075
2316
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
2076
2317
|
|
|
2077
|
-
- name: openai/code-davinci-002
|
|
2318
|
+
- name: openai/code-davinci-002
|
|
2078
2319
|
display_name: code-davinci-002
|
|
2079
2320
|
description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
|
|
2080
2321
|
creator_organization_name: OpenAI
|
|
2081
2322
|
access: limited
|
|
2082
2323
|
release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
|
|
2083
|
-
tags: [CODE_MODEL_TAG]
|
|
2324
|
+
tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
|
|
2084
2325
|
|
|
2085
|
-
- name: openai/code-davinci-001
|
|
2326
|
+
- name: openai/code-davinci-001
|
|
2086
2327
|
display_name: code-davinci-001
|
|
2087
2328
|
description: code-davinci-001 model
|
|
2088
2329
|
creator_organization_name: OpenAI
|
|
2089
2330
|
access: limited
|
|
2090
2331
|
release_date: 2021-07-01 # Paper date
|
|
2091
|
-
tags: [CODE_MODEL_TAG]
|
|
2332
|
+
tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
|
|
2092
2333
|
|
|
2093
|
-
- name: openai/code-cushman-001
|
|
2334
|
+
- name: openai/code-cushman-001
|
|
2094
2335
|
display_name: code-cushman-001 (12B)
|
|
2095
2336
|
description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
|
|
2096
2337
|
creator_organization_name: OpenAI
|
|
2097
2338
|
access: limited
|
|
2098
2339
|
num_parameters: 12000000000
|
|
2099
2340
|
release_date: 2021-07-01 # Paper date
|
|
2100
|
-
tags: [CODE_MODEL_TAG]
|
|
2341
|
+
tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
|
|
2101
2342
|
|
|
2102
2343
|
|
|
2103
2344
|
## Text Similarity Models
|
|
@@ -2107,41 +2348,41 @@ models:
|
|
|
2107
2348
|
# DEPRECATED: Announced on July 06 2023 that first generation embeddings models
|
|
2108
2349
|
# will be shut down on January 04 2024.
|
|
2109
2350
|
|
|
2110
|
-
- name: openai/text-similarity-davinci-001
|
|
2351
|
+
- name: openai/text-similarity-davinci-001
|
|
2111
2352
|
display_name: text-similarity-davinci-001
|
|
2112
2353
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
2113
2354
|
creator_organization_name: OpenAI
|
|
2114
2355
|
access: limited
|
|
2115
2356
|
num_parameters: 175000000000
|
|
2116
2357
|
release_date: 2022-01-25 # Blog post date
|
|
2117
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2358
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
2118
2359
|
|
|
2119
|
-
- name: openai/text-similarity-curie-001
|
|
2360
|
+
- name: openai/text-similarity-curie-001
|
|
2120
2361
|
display_name: text-similarity-curie-001
|
|
2121
2362
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
2122
2363
|
creator_organization_name: OpenAI
|
|
2123
2364
|
access: limited
|
|
2124
2365
|
num_parameters: 6700000000
|
|
2125
2366
|
release_date: 2022-01-25 # Blog post date
|
|
2126
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2367
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
2127
2368
|
|
|
2128
|
-
- name: openai/text-similarity-babbage-001
|
|
2369
|
+
- name: openai/text-similarity-babbage-001
|
|
2129
2370
|
display_name: text-similarity-babbage-001
|
|
2130
2371
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
2131
2372
|
creator_organization_name: OpenAI
|
|
2132
2373
|
access: limited
|
|
2133
2374
|
num_parameters: 1300000000
|
|
2134
2375
|
release_date: 2022-01-25 # Blog post date
|
|
2135
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2376
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
2136
2377
|
|
|
2137
|
-
- name: openai/text-similarity-ada-001
|
|
2378
|
+
- name: openai/text-similarity-ada-001
|
|
2138
2379
|
display_name: text-similarity-ada-001
|
|
2139
2380
|
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
2140
2381
|
creator_organization_name: OpenAI
|
|
2141
2382
|
access: limited
|
|
2142
2383
|
num_parameters: 350000000
|
|
2143
2384
|
release_date: 2022-01-25 # Blog post date
|
|
2144
|
-
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
2385
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
|
|
2145
2386
|
|
|
2146
2387
|
- name: openai/text-embedding-ada-002
|
|
2147
2388
|
display_name: text-embedding-ada-002
|
|
@@ -2197,6 +2438,34 @@ models:
|
|
|
2197
2438
|
release_date: 2023-11-06
|
|
2198
2439
|
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
2199
2440
|
|
|
2441
|
+
# OpenThaiGPT
|
|
2442
|
+
- name: openthaigpt/openthaigpt-1.0.0-7b-chat
|
|
2443
|
+
display_name: OpenThaiGPT v1.0.0 (7B)
|
|
2444
|
+
description: OpenThaiGPT v1.0.0 (7B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
|
|
2445
|
+
creator_organization_name: OpenThaiGPT
|
|
2446
|
+
access: open
|
|
2447
|
+
num_parameters: 7000000000
|
|
2448
|
+
release_date: 2024-04-08
|
|
2449
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2450
|
+
|
|
2451
|
+
- name: openthaigpt/openthaigpt-1.0.0-13b-chat
|
|
2452
|
+
display_name: OpenThaiGPT v1.0.0 (13B)
|
|
2453
|
+
description: OpenThaiGPT v1.0.0 (13B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
|
|
2454
|
+
creator_organization_name: OpenThaiGPT
|
|
2455
|
+
access: open
|
|
2456
|
+
num_parameters: 13000000000
|
|
2457
|
+
release_date: 2024-04-08
|
|
2458
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2459
|
+
|
|
2460
|
+
- name: openthaigpt/openthaigpt-1.0.0-70b-chat
|
|
2461
|
+
display_name: OpenThaiGPT v1.0.0 (70B)
|
|
2462
|
+
description: OpenThaiGPT v1.0.0 (70B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
|
|
2463
|
+
creator_organization_name: OpenThaiGPT
|
|
2464
|
+
access: open
|
|
2465
|
+
num_parameters: 70000000000
|
|
2466
|
+
release_date: 2024-04-08
|
|
2467
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2468
|
+
|
|
2200
2469
|
# Qwen
|
|
2201
2470
|
|
|
2202
2471
|
- name: qwen/qwen-7b
|
|
@@ -2311,7 +2580,7 @@ models:
|
|
|
2311
2580
|
access: open
|
|
2312
2581
|
num_parameters: 7000000000
|
|
2313
2582
|
release_date: 2024-04-04
|
|
2314
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
2583
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2315
2584
|
|
|
2316
2585
|
- name: sail/sailor-7b-chat
|
|
2317
2586
|
display_name: Sailor Chat (7B)
|
|
@@ -2329,7 +2598,7 @@ models:
|
|
|
2329
2598
|
access: open
|
|
2330
2599
|
num_parameters: 14000000000
|
|
2331
2600
|
release_date: 2024-04-04
|
|
2332
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
2601
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2333
2602
|
|
|
2334
2603
|
- name: sail/sailor-14b-chat
|
|
2335
2604
|
display_name: Sailor Chat (14B)
|
|
@@ -2350,10 +2619,74 @@ models:
|
|
|
2350
2619
|
release_date: 2022-03-25
|
|
2351
2620
|
tags: [] # TODO: add tags
|
|
2352
2621
|
|
|
2622
|
+
# SambaNova
|
|
2623
|
+
- name: sambanova/sambalingo-thai-base
|
|
2624
|
+
display_name: SambaLingo-Thai-Base
|
|
2625
|
+
description: SambaLingo-Thai-Base is a pretrained bi-lingual Thai and English model that adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2626
|
+
creator_organization_name: SambaLingo
|
|
2627
|
+
access: open
|
|
2628
|
+
num_parameters: 7000000000
|
|
2629
|
+
release_date: 2024-04-08
|
|
2630
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2631
|
+
|
|
2632
|
+
- name: sambanova/sambalingo-thai-chat
|
|
2633
|
+
display_name: SambaLingo-Thai-Chat
|
|
2634
|
+
description: SambaLingo-Thai-Chat is a chat model trained using direct preference optimization on SambaLingo-Thai-Base. SambaLingo-Thai-Base adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2635
|
+
creator_organization_name: SambaLingo
|
|
2636
|
+
access: open
|
|
2637
|
+
num_parameters: 7000000000
|
|
2638
|
+
release_date: 2024-04-08
|
|
2639
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2640
|
+
|
|
2641
|
+
- name: sambanova/sambalingo-thai-base-70b
|
|
2642
|
+
display_name: SambaLingo-Thai-Base-70B
|
|
2643
|
+
description: SambaLingo-Thai-Base-70B is a pretrained bi-lingual Thai and English model that adapts Llama 2 (70B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2644
|
+
creator_organization_name: SambaLingo
|
|
2645
|
+
access: open
|
|
2646
|
+
num_parameters: 70000000000
|
|
2647
|
+
release_date: 2024-04-08
|
|
2648
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2649
|
+
|
|
2650
|
+
- name: sambanova/sambalingo-thai-chat-70b
|
|
2651
|
+
display_name: SambaLingo-Thai-Chat-70B
|
|
2652
|
+
description: SambaLingo-Thai-Chat-70B is a chat model trained using direct preference optimization on SambaLingo-Thai-Base-70B. SambaLingo-Thai-Base-70B adapts Llama 2 (7B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
|
|
2653
|
+
creator_organization_name: SambaLingo
|
|
2654
|
+
access: open
|
|
2655
|
+
num_parameters: 70000000000
|
|
2656
|
+
release_date: 2024-04-08
|
|
2657
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2658
|
+
|
|
2353
2659
|
# SCB10X
|
|
2660
|
+
- name: scb10x/typhoon-7b
|
|
2661
|
+
display_name: Typhoon (7B)
|
|
2662
|
+
description: Typhoon (7B) is pretrained Thai large language model with 7 billion parameters based on Mistral 7B. ([paper](https://arxiv.org/abs/2312.13951))
|
|
2663
|
+
creator_organization_name: SCB10X
|
|
2664
|
+
access: open
|
|
2665
|
+
num_parameters: 7000000000
|
|
2666
|
+
release_date: 2023-12-21
|
|
2667
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2668
|
+
|
|
2669
|
+
- name: scb10x/typhoon-v1.5-8b
|
|
2670
|
+
display_name: Typhoon v1.5 (8B)
|
|
2671
|
+
description: Typhoon v1.5 (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2672
|
+
creator_organization_name: SCB10X
|
|
2673
|
+
access: open
|
|
2674
|
+
num_parameters: 8000000000
|
|
2675
|
+
release_date: 2024-05-08
|
|
2676
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2677
|
+
|
|
2678
|
+
- name: scb10x/typhoon-v1.5-8b-instruct
|
|
2679
|
+
display_name: Typhoon v1.5 Instruct (8B)
|
|
2680
|
+
description: Typhoon v1.5 Instruct (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2681
|
+
creator_organization_name: SCB10X
|
|
2682
|
+
access: open
|
|
2683
|
+
num_parameters: 8000000000
|
|
2684
|
+
release_date: 2024-05-08
|
|
2685
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2686
|
+
|
|
2354
2687
|
- name: scb10x/typhoon-v1.5-72b
|
|
2355
2688
|
display_name: Typhoon v1.5 (72B)
|
|
2356
|
-
description: Typhoon v1.5 (72B) is pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2689
|
+
description: Typhoon v1.5 (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2357
2690
|
creator_organization_name: SCB10X
|
|
2358
2691
|
access: open
|
|
2359
2692
|
num_parameters: 72000000000
|
|
@@ -2362,13 +2695,50 @@ models:
|
|
|
2362
2695
|
|
|
2363
2696
|
- name: scb10x/typhoon-v1.5-72b-instruct
|
|
2364
2697
|
display_name: Typhoon v1.5 Instruct (72B)
|
|
2365
|
-
description: Typhoon v1.5 Instruct (72B) is pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2698
|
+
description: Typhoon v1.5 Instruct (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
|
|
2366
2699
|
creator_organization_name: SCB10X
|
|
2367
2700
|
access: open
|
|
2368
2701
|
num_parameters: 72000000000
|
|
2369
2702
|
release_date: 2024-05-08
|
|
2370
2703
|
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2371
2704
|
|
|
2705
|
+
- name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
|
|
2706
|
+
display_name: Typhoon 1.5X instruct (8B)
|
|
2707
|
+
description: Llama-3-Typhoon-1.5X-8B-instruct is a 8 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
|
|
2708
|
+
creator_organization_name: SCB10X
|
|
2709
|
+
access: open
|
|
2710
|
+
num_parameters: 8000000000
|
|
2711
|
+
release_date: 2024-05-29
|
|
2712
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2713
|
+
|
|
2714
|
+
- name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
|
|
2715
|
+
display_name: Typhoon 1.5X instruct (70B)
|
|
2716
|
+
description: Llama-3-Typhoon-1.5X-70B-instruct is a 70 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
|
|
2717
|
+
creator_organization_name: SCB10X
|
|
2718
|
+
access: open
|
|
2719
|
+
num_parameters: 70000000000
|
|
2720
|
+
release_date: 2024-05-29
|
|
2721
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2722
|
+
|
|
2723
|
+
# Alibaba DAMO Academy
|
|
2724
|
+
- name: damo/seallm-7b-v2
|
|
2725
|
+
display_name: SeaLLM v2 (7B)
|
|
2726
|
+
description: SeaLLM v2 is a multilingual LLM for Southeast Asian (SEA) languages trained from Mistral (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
|
|
2727
|
+
creator_organization_name: Alibaba DAMO Academy
|
|
2728
|
+
access: open
|
|
2729
|
+
num_parameters: 7000000000
|
|
2730
|
+
release_date: 2024-02-02
|
|
2731
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2732
|
+
|
|
2733
|
+
- name: damo/seallm-7b-v2.5
|
|
2734
|
+
display_name: SeaLLM v2.5 (7B)
|
|
2735
|
+
description: SeaLLM is a multilingual LLM for Southeast Asian (SEA) languages trained from Gemma (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
|
|
2736
|
+
creator_organization_name: Alibaba DAMO Academy
|
|
2737
|
+
access: open
|
|
2738
|
+
num_parameters: 7000000000
|
|
2739
|
+
release_date: 2024-04-12
|
|
2740
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2741
|
+
|
|
2372
2742
|
# Snowflake
|
|
2373
2743
|
- name: snowflake/snowflake-arctic-instruct
|
|
2374
2744
|
display_name: Arctic Instruct
|
|
@@ -2538,7 +2908,7 @@ models:
|
|
|
2538
2908
|
release_date: 2022-08-04
|
|
2539
2909
|
# Inference with echo=True is not feasible -- in the prompt encoding phase, they use
|
|
2540
2910
|
# bidirectional attention and do not perform predictions on them.
|
|
2541
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
2911
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
2542
2912
|
|
|
2543
2913
|
- name: tsinghua/codegeex # NOT SUPPORTED
|
|
2544
2914
|
display_name: CodeGeeX (13B)
|
|
@@ -2573,7 +2943,6 @@ models:
|
|
|
2573
2943
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2574
2944
|
|
|
2575
2945
|
- name: writer/palmyra-instruct-30
|
|
2576
|
-
deprecated: true # Internal error
|
|
2577
2946
|
display_name: InstructPalmyra (30B)
|
|
2578
2947
|
description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
|
|
2579
2948
|
creator_organization_name: Writer
|
|
@@ -2581,10 +2950,9 @@ models:
|
|
|
2581
2950
|
num_parameters: 30000000000
|
|
2582
2951
|
release_date: 2023-02-16
|
|
2583
2952
|
# Does not support echo
|
|
2584
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2953
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2585
2954
|
|
|
2586
2955
|
- name: writer/palmyra-e
|
|
2587
|
-
deprecated: true # Internal error
|
|
2588
2956
|
display_name: Palmyra E (30B)
|
|
2589
2957
|
description: Palmyra E (30B)
|
|
2590
2958
|
creator_organization_name: Writer
|
|
@@ -2592,7 +2960,7 @@ models:
|
|
|
2592
2960
|
num_parameters: 30000000000
|
|
2593
2961
|
release_date: 2023-03-03
|
|
2594
2962
|
# Does not support echo
|
|
2595
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2963
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2596
2964
|
|
|
2597
2965
|
- name: writer/silk-road
|
|
2598
2966
|
display_name: Silk Road (35B)
|
|
@@ -2654,6 +3022,40 @@ models:
|
|
|
2654
3022
|
# Does not support echo
|
|
2655
3023
|
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
2656
3024
|
|
|
3025
|
+
- name: writer/palmyra-x-004
|
|
3026
|
+
display_name: Palmyra-X-004
|
|
3027
|
+
description: Palmyra-X-004 language model with a large context window of up to 128,000 tokens that excels in processing and understanding complex tasks.
|
|
3028
|
+
creator_organization_name: Writer
|
|
3029
|
+
access: limited
|
|
3030
|
+
release_date: 2024-09-12
|
|
3031
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3032
|
+
|
|
3033
|
+
- name: writer/palmyra-med-32k
|
|
3034
|
+
display_name: Palmyra-Med 32K (70B)
|
|
3035
|
+
description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
3036
|
+
creator_organization_name: Writer
|
|
3037
|
+
access: open
|
|
3038
|
+
num_parameters: 70600000000
|
|
3039
|
+
release_date: 2024-07-31
|
|
3040
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3041
|
+
|
|
3042
|
+
- name: writer/palmyra-med
|
|
3043
|
+
display_name: Palmyra-Med (70B)
|
|
3044
|
+
description: Palmyra-Med (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
3045
|
+
creator_organization_name: Writer
|
|
3046
|
+
access: open
|
|
3047
|
+
num_parameters: 70600000000
|
|
3048
|
+
release_date: 2024-07-31
|
|
3049
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3050
|
+
|
|
3051
|
+
- name: writer/palmyra-fin-32k
|
|
3052
|
+
display_name: Palmyra-Fin 32K (70B)
|
|
3053
|
+
description: Palmyra-Fin 32K (70B) is a model finetuned from Palmyra-X-003 intended for financial applications.
|
|
3054
|
+
creator_organization_name: Writer
|
|
3055
|
+
access: open
|
|
3056
|
+
num_parameters: 70600000000
|
|
3057
|
+
release_date: 2024-07-31
|
|
3058
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2657
3059
|
|
|
2658
3060
|
# Yandex
|
|
2659
3061
|
- name: yandex/yalm
|