mteb 2.4.1__py3-none-any.whl → 2.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. mteb/benchmarks/benchmark.py +31 -13
  2. mteb/benchmarks/benchmarks/benchmarks.py +2 -2
  3. mteb/cache.py +36 -7
  4. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  5. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  6. mteb/models/model_implementations/andersborges.py +12 -0
  7. mteb/models/model_implementations/bge_models.py +43 -0
  8. mteb/models/model_implementations/codefuse_models.py +144 -0
  9. mteb/models/model_implementations/dino_models.py +152 -0
  10. mteb/models/model_implementations/emillykkejensen_models.py +18 -0
  11. mteb/models/model_implementations/euler_models.py +6 -0
  12. mteb/models/model_implementations/fa_models.py +50 -0
  13. mteb/models/model_implementations/facebookai.py +44 -0
  14. mteb/models/model_implementations/gte_models.py +69 -0
  15. mteb/models/model_implementations/kalm_models.py +38 -0
  16. mteb/models/model_implementations/kblab.py +6 -0
  17. mteb/models/model_implementations/kowshik24_models.py +9 -0
  18. mteb/models/model_implementations/misc_models.py +293 -0
  19. mteb/models/model_implementations/mod_models.py +10 -23
  20. mteb/models/model_implementations/mxbai_models.py +6 -0
  21. mteb/models/model_implementations/nomic_models.py +8 -0
  22. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +5 -3
  23. mteb/models/model_implementations/pylate_models.py +33 -0
  24. mteb/models/model_implementations/ru_sentence_models.py +22 -0
  25. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
  26. mteb/models/model_implementations/sentence_transformers_models.py +39 -0
  27. mteb/models/model_implementations/spartan8806_atles_champion.py +7 -0
  28. mteb/models/model_implementations/ua_sentence_models.py +9 -0
  29. mteb/models/model_implementations/vi_vn_models.py +33 -0
  30. mteb/results/benchmark_results.py +22 -4
  31. mteb/tasks/classification/tur/__init__.py +4 -0
  32. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  33. mteb/tasks/retrieval/kor/__init__.py +2 -1
  34. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  35. {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/METADATA +1 -1
  36. {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/RECORD +40 -35
  37. {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/WHEEL +0 -0
  38. {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/entry_points.txt +0 -0
  39. {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/licenses/LICENSE +0 -0
  40. {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,16 @@
1
+ from __future__ import annotations
2
+
1
3
  from collections.abc import Iterable, Sequence
2
4
  from dataclasses import dataclass, field
3
5
  from typing import TYPE_CHECKING, Literal
4
6
 
5
7
  import pandas as pd
6
8
 
7
- from mteb.benchmarks._create_table import (
8
- _create_per_language_table_from_benchmark_results,
9
- _create_per_task_table_from_benchmark_results,
10
- _create_summary_table_from_benchmark_results,
11
- _create_summary_table_mean_public_private,
12
- _create_summary_table_mean_subset,
13
- _create_summary_table_mean_task_type,
14
- )
15
- from mteb.results import BenchmarkResults
9
+ from mteb.abstasks.abstask import AbsTask
16
10
  from mteb.types import StrURL
17
11
 
18
12
  if TYPE_CHECKING:
19
- from mteb.abstasks import AbsTask
13
+ from mteb.results import BenchmarkResults
20
14
 
21
15
 
22
16
  @dataclass
@@ -43,7 +37,7 @@ class Benchmark:
43
37
  """
44
38
 
45
39
  name: str
46
- tasks: Sequence["AbsTask"]
40
+ tasks: Sequence[AbsTask]
47
41
  description: str | None = None
48
42
  reference: StrURL | None = None
49
43
  citation: str | None = None
@@ -53,13 +47,13 @@ class Benchmark:
53
47
  display_name: str | None = None
54
48
  language_view: list[str] | Literal["all"] = field(default_factory=list)
55
49
 
56
- def __iter__(self) -> Iterable["AbsTask"]:
50
+ def __iter__(self) -> Iterable[AbsTask]:
57
51
  return iter(self.tasks)
58
52
 
59
53
  def __len__(self) -> int:
60
54
  return len(self.tasks)
61
55
 
62
- def __getitem__(self, index: int) -> "AbsTask":
56
+ def __getitem__(self, index: int) -> AbsTask:
63
57
  return self.tasks[index]
64
58
 
65
59
  def _create_summary_table(
@@ -70,6 +64,10 @@ class Benchmark:
70
64
  Returns:
71
65
  A pandas DataFrame representing the summary results.
72
66
  """
67
+ from mteb.benchmarks._create_table import (
68
+ _create_summary_table_from_benchmark_results,
69
+ )
70
+
73
71
  return _create_summary_table_from_benchmark_results(benchmark_results)
74
72
 
75
73
  def _create_per_task_table(
@@ -80,6 +78,10 @@ class Benchmark:
80
78
  Returns:
81
79
  A pandas DataFrame representing the per-task results.
82
80
  """
81
+ from mteb.benchmarks._create_table import (
82
+ _create_per_task_table_from_benchmark_results,
83
+ )
84
+
83
85
  return _create_per_task_table_from_benchmark_results(benchmark_results)
84
86
 
85
87
  def _create_per_language_table(
@@ -90,6 +92,10 @@ class Benchmark:
90
92
  Returns:
91
93
  A pandas DataFrame representing the per-language results.
92
94
  """
95
+ from mteb.benchmarks._create_table import (
96
+ _create_per_language_table_from_benchmark_results,
97
+ )
98
+
93
99
  if self.language_view == "all" or len(self.language_view) > 0:
94
100
  return _create_per_language_table_from_benchmark_results(
95
101
  benchmark_results, self.language_view
@@ -111,6 +117,10 @@ class RtebBenchmark(Benchmark):
111
117
  def _create_summary_table(
112
118
  self, benchmark_results: BenchmarkResults
113
119
  ) -> pd.DataFrame:
120
+ from mteb.benchmarks._create_table import (
121
+ _create_summary_table_mean_public_private,
122
+ )
123
+
114
124
  joint_table = _create_summary_table_mean_public_private(benchmark_results)
115
125
  # For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
116
126
  joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
@@ -123,6 +133,8 @@ class HUMEBenchmark(Benchmark):
123
133
  def _create_summary_table(
124
134
  self, benchmark_results: BenchmarkResults
125
135
  ) -> pd.DataFrame:
136
+ from mteb.benchmarks._create_table import _create_summary_table_mean_subset
137
+
126
138
  return _create_summary_table_mean_subset(benchmark_results)
127
139
 
128
140
 
@@ -132,6 +144,8 @@ class MIEBBenchmark(Benchmark):
132
144
  def _create_summary_table(
133
145
  self, benchmark_results: BenchmarkResults
134
146
  ) -> pd.DataFrame:
147
+ from mteb.benchmarks._create_table import _create_summary_table_mean_task_type
148
+
135
149
  return _create_summary_table_mean_task_type(benchmark_results)
136
150
 
137
151
 
@@ -141,6 +155,10 @@ class VidoreBenchmark(Benchmark):
141
155
  def _create_summary_table(
142
156
  self, benchmark_results: BenchmarkResults
143
157
  ) -> pd.DataFrame:
158
+ from mteb.benchmarks._create_table import (
159
+ _create_summary_table_mean_public_private,
160
+ )
161
+
144
162
  joint_table = _create_summary_table_mean_public_private(benchmark_results)
145
163
  # For ViDoRe (V1, V2, V3): all tasks are Document Understanding type, so Document Understanding column = Mean (Task)
146
164
  joint_table = joint_table.rename(
@@ -435,7 +435,7 @@ MTEB_RETRIEVAL_MEDICAL = Benchmark(
435
435
  ],
436
436
  ),
437
437
  description="A curated set of MTEB tasks designed to evaluate systems in the context of medical information retrieval.",
438
- reference="",
438
+ reference=None,
439
439
  citation=None,
440
440
  )
441
441
 
@@ -2589,7 +2589,7 @@ HUME = HUMEBenchmark(
2589
2589
  ],
2590
2590
  ),
2591
2591
  description="The HUME benchmark is designed to evaluate the performance of text embedding models and humans on a comparable set of tasks. This captures areas where models perform better than human annotators and the reverse. In the paper, we go further into the analysis and what conclusions can be drawn.",
2592
- reference="Coming soon (in review)",
2592
+ reference=None,
2593
2593
  citation=None,
2594
2594
  contacts=["AdnanElAssadi56", "KennethEnevoldsen", "isaac-chung", "Samoed"],
2595
2595
  )
mteb/cache.py CHANGED
@@ -8,7 +8,9 @@ from collections.abc import Sequence
8
8
  from pathlib import Path
9
9
  from typing import cast
10
10
 
11
+ import mteb
11
12
  from mteb.abstasks import AbsTask
13
+ from mteb.benchmarks.benchmark import Benchmark
12
14
  from mteb.models import ModelMeta
13
15
  from mteb.results import BenchmarkResults, ModelResult, TaskResult
14
16
  from mteb.types import ModelName, Revision
@@ -195,12 +197,14 @@ class ResultCache:
195
197
  self,
196
198
  remote: str = "https://github.com/embeddings-benchmark/results",
197
199
  download_latest: bool = True,
200
+ revision: str | None = None,
198
201
  ) -> Path:
199
202
  """Downloads the latest version of the results repository from GitHub to a local cache directory. Required git to be installed.
200
203
 
201
204
  Args:
202
205
  remote: The URL of the results repository on GitHub.
203
206
  download_latest: If True it will download the latest version of the repository, otherwise it will only update the existing repository.
207
+ revision: If specified, it will checkout the given revision after cloning or pulling the repository.
204
208
 
205
209
  Returns:
206
210
  The path to the local cache directory.
@@ -228,14 +232,27 @@ class ResultCache:
228
232
  )
229
233
  raise ValueError(msg)
230
234
 
231
- if download_latest:
235
+ if revision or download_latest:
232
236
  logger.info(
233
- f"remote repository already exists in {results_directory}, updating it using git pull"
237
+ f"remote repository already exists in {results_directory}, fetching updates"
238
+ )
239
+ subprocess.run(
240
+ ["git", "fetch", "--all", "--tags"],
241
+ cwd=results_directory,
242
+ check=True,
234
243
  )
235
- subprocess.run(["git", "pull"], cwd=results_directory)
236
244
  else:
237
245
  logger.debug(
238
- f"Results repository already exists in {results_directory}, skipping update, set download_latest=True to update it"
246
+ f"Results repository already exists in {results_directory}, skipping update, "
247
+ f"set download_latest=True to update it"
248
+ )
249
+
250
+ if revision:
251
+ logger.info(f"Checking out revision '{revision}'")
252
+ subprocess.run(
253
+ ["git", "checkout", revision],
254
+ cwd=results_directory,
255
+ check=True,
239
256
  )
240
257
  return results_directory
241
258
 
@@ -243,8 +260,15 @@ class ResultCache:
243
260
  f"No results repository found in {results_directory}, cloning it from {remote}"
244
261
  )
245
262
 
263
+ clone_cmd = ["git", "clone", "--depth", "1"]
264
+
265
+ if revision:
266
+ logger.info(f"Cloning repository at revision '{revision}'")
267
+ clone_cmd.append(f"--revision={revision}")
268
+ clone_cmd.extend([remote, "remote"])
269
+
246
270
  subprocess.run(
247
- ["git", "clone", "--depth", "1", remote, "remote"],
271
+ clone_cmd,
248
272
  cwd=self.cache_path,
249
273
  check=True,
250
274
  )
@@ -443,7 +467,7 @@ class ResultCache:
443
467
  def load_results(
444
468
  self,
445
469
  models: Sequence[str] | Sequence[ModelMeta] | None = None,
446
- tasks: Sequence[str] | Sequence[AbsTask] | None = None,
470
+ tasks: Sequence[str] | Sequence[AbsTask] | Benchmark | str | None = None,
447
471
  require_model_meta: bool = True,
448
472
  include_remote: bool = True,
449
473
  validate_and_filter: bool = False,
@@ -453,7 +477,8 @@ class ResultCache:
453
477
 
454
478
  Args:
455
479
  models: A list of model names to load the results for. If None it will load the results for all models.
456
- tasks: A list of task names to load the results for. If None it will load the results for all tasks.
480
+ tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
481
+ If None it will load the results for all tasks.
457
482
  require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
458
483
  extract the model name and revision from the path.
459
484
  include_remote: If True, it will include results from the remote repository.
@@ -475,6 +500,9 @@ class ResultCache:
475
500
  ... require_model_meta=True,
476
501
  ... )
477
502
  """
503
+ if isinstance(tasks, str):
504
+ tasks = mteb.get_benchmark(tasks)
505
+
478
506
  paths = self.get_cache_paths(
479
507
  models=models,
480
508
  tasks=tasks,
@@ -524,6 +552,7 @@ class ResultCache:
524
552
 
525
553
  benchmark_results = BenchmarkResults(
526
554
  model_results=models_results,
555
+ benchmark=tasks if isinstance(tasks, Benchmark) else None,
527
556
  )
528
557
 
529
558
  return benchmark_results
@@ -0,0 +1,54 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 193,
4
+ "number_texts_intersect_with_train": 0,
5
+ "text_statistics": {
6
+ "total_text_length": 1543015,
7
+ "min_text_length": 492,
8
+ "average_text_length": 7994.896373056995,
9
+ "max_text_length": 49510,
10
+ "unique_texts": 193
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 2,
18
+ "labels": {
19
+ "1": {
20
+ "count": 177
21
+ },
22
+ "0": {
23
+ "count": 16
24
+ }
25
+ }
26
+ }
27
+ },
28
+ "train": {
29
+ "num_samples": 870,
30
+ "number_texts_intersect_with_train": null,
31
+ "text_statistics": {
32
+ "total_text_length": 6968132,
33
+ "min_text_length": 259,
34
+ "average_text_length": 8009.347126436782,
35
+ "max_text_length": 74490,
36
+ "unique_texts": 870
37
+ },
38
+ "image_statistics": null,
39
+ "label_statistics": {
40
+ "min_labels_per_text": 1,
41
+ "average_label_per_text": 1.0,
42
+ "max_labels_per_text": 1,
43
+ "unique_labels": 2,
44
+ "labels": {
45
+ "1": {
46
+ "count": 755
47
+ },
48
+ "0": {
49
+ "count": 115
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 6734,
4
+ "number_of_characters": 718835,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 523388,
7
+ "min_text_length": 352,
8
+ "average_text_length": 545.1958333333333,
9
+ "max_text_length": 2952,
10
+ "unique_texts": 960
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 195447,
15
+ "min_text_length": 5,
16
+ "average_text_length": 33.84949774852788,
17
+ "max_text_length": 110,
18
+ "unique_texts": 5764
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 5774,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 960
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -24,6 +24,12 @@ model2vecdk = ModelMeta(
24
24
  training_datasets=set(), # distilled
25
25
  public_training_code="https://github.com/andersborges/dkmodel2vec",
26
26
  public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
27
+ citation="""@article{minishlab2024model2vec,
28
+ author = {Tulkens, Stephan and {van Dongen}, Thomas},
29
+ title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
30
+ year = {2024},
31
+ url = {https://github.com/MinishLab/model2vec}
32
+ }""",
27
33
  )
28
34
 
29
35
 
@@ -48,4 +54,10 @@ model2vecdk_stem = ModelMeta(
48
54
  training_datasets=set(), # distilled
49
55
  public_training_code="https://github.com/andersborges/dkmodel2vec",
50
56
  public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
57
+ citation="""@article{minishlab2024model2vec,
58
+ author = {Tulkens, Stephan and {van Dongen}, Thomas},
59
+ title = {Model2Vec: Fast State-of-the-Art Static Embeddings},
60
+ year = {2024},
61
+ url = {https://github.com/MinishLab/model2vec}
62
+ }""",
51
63
  )
@@ -411,6 +411,7 @@ bge_small_zh = ModelMeta(
411
411
  public_training_data=None,
412
412
  training_datasets=bge_chinese_training_data,
413
413
  superseded_by="BAAI/bge-small-zh-v1.5",
414
+ citation=BGE_15_CITATION,
414
415
  )
415
416
 
416
417
  bge_base_zh = ModelMeta(
@@ -436,6 +437,7 @@ bge_base_zh = ModelMeta(
436
437
  public_training_data=None,
437
438
  training_datasets=bge_chinese_training_data,
438
439
  superseded_by="BAAI/bge-base-zh-v1.5",
440
+ citation=BGE_15_CITATION,
439
441
  )
440
442
 
441
443
  bge_large_zh = ModelMeta(
@@ -461,6 +463,7 @@ bge_large_zh = ModelMeta(
461
463
  public_training_data=None,
462
464
  training_datasets=bge_chinese_training_data,
463
465
  superseded_by="BAAI/bge-large-zh-v1.5",
466
+ citation=BGE_15_CITATION,
464
467
  )
465
468
 
466
469
  bge_small_en = ModelMeta(
@@ -486,6 +489,7 @@ bge_small_en = ModelMeta(
486
489
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
487
490
  training_datasets=bge_training_data,
488
491
  superseded_by="BAAI/bge-small-en-v1.5",
492
+ citation=BGE_15_CITATION,
489
493
  )
490
494
 
491
495
  bge_base_en = ModelMeta(
@@ -511,6 +515,7 @@ bge_base_en = ModelMeta(
511
515
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
512
516
  training_datasets=bge_training_data,
513
517
  superseded_by="BAAI/bge-base-en-v1.5",
518
+ citation=BGE_15_CITATION,
514
519
  )
515
520
 
516
521
  bge_large_en = ModelMeta(
@@ -536,6 +541,7 @@ bge_large_en = ModelMeta(
536
541
  public_training_data="https://data.baai.ac.cn/details/BAAI-MTP",
537
542
  training_datasets=bge_training_data,
538
543
  superseded_by="BAAI/bge-large-en-v1.5",
544
+ citation=BGE_15_CITATION,
539
545
  )
540
546
 
541
547
 
@@ -561,6 +567,7 @@ bge_small_zh_v1_5 = ModelMeta(
561
567
  public_training_code=None,
562
568
  public_training_data=None,
563
569
  training_datasets=bge_chinese_training_data,
570
+ citation=BGE_15_CITATION,
564
571
  )
565
572
 
566
573
  bge_base_zh_v1_5 = ModelMeta(
@@ -585,6 +592,7 @@ bge_base_zh_v1_5 = ModelMeta(
585
592
  public_training_code=None,
586
593
  public_training_data=None,
587
594
  training_datasets=bge_chinese_training_data,
595
+ citation=BGE_15_CITATION,
588
596
  )
589
597
 
590
598
  bge_large_zh_v1_5 = ModelMeta(
@@ -609,6 +617,7 @@ bge_large_zh_v1_5 = ModelMeta(
609
617
  public_training_code=None,
610
618
  public_training_data=None,
611
619
  training_datasets=bge_chinese_training_data,
620
+ citation=BGE_15_CITATION,
612
621
  )
613
622
 
614
623
  bge_m3 = ModelMeta(
@@ -630,6 +639,14 @@ bge_m3 = ModelMeta(
630
639
  public_training_code=None,
631
640
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
632
641
  training_datasets=bge_m3_training_data,
642
+ citation="""@misc{bge-m3,
643
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
644
+ author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
645
+ year={2024},
646
+ eprint={2402.03216},
647
+ archivePrefix={arXiv},
648
+ primaryClass={cs.CL}
649
+ }""",
633
650
  )
634
651
 
635
652
  # Contents of cfli/bge-full-data
@@ -722,6 +739,24 @@ bge_multilingual_gemma2 = ModelMeta(
722
739
  }
723
740
  | bge_full_data
724
741
  | bge_m3_training_data,
742
+ citation="""@misc{bge-m3,
743
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
744
+ author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
745
+ year={2024},
746
+ eprint={2402.03216},
747
+ archivePrefix={arXiv},
748
+ primaryClass={cs.CL}
749
+ }
750
+
751
+
752
+ @misc{bge_embedding,
753
+ title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
754
+ author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
755
+ year={2023},
756
+ eprint={2309.07597},
757
+ archivePrefix={arXiv},
758
+ primaryClass={cs.CL}
759
+ }""",
725
760
  )
726
761
 
727
762
  bge_en_icl = ModelMeta(
@@ -778,6 +813,14 @@ bge_m3_unsupervised = ModelMeta(
778
813
  public_training_code="https://github.com/FlagOpen/FlagEmbedding",
779
814
  public_training_data="https://huggingface.co/datasets/cfli/bge-full-data",
780
815
  training_datasets=bge_m3_training_data,
816
+ citation="""@misc{bge-m3,
817
+ title={BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
818
+ author={Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
819
+ year={2024},
820
+ eprint={2402.03216},
821
+ archivePrefix={arXiv},
822
+ primaryClass={cs.CL}
823
+ }""",
781
824
  )
782
825
 
783
826
  manu__bge_m3_custom_fr = ModelMeta(
@@ -1,5 +1,6 @@
1
1
  from mteb.models import ModelMeta
2
2
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
3
+ from mteb.models.model_meta import ScoringFunction
3
4
  from mteb.types import PromptType
4
5
 
5
6
  F2LLM_CITATION = """@article{2025F2LLM,
@@ -74,6 +75,22 @@ training_datasets = {
74
75
  "TwentyNewsgroupsClustering",
75
76
  }
76
77
 
78
+ c2llm_training_datasets = {
79
+ "CodeSearchNet",
80
+ "CodeSearchNetRetrieval",
81
+ "CodeSearchNetCCRetrieval",
82
+ "CodeEditSearchRetrieval",
83
+ "CodeFeedbackMT",
84
+ "CodeFeedbackST",
85
+ "CodeTransOceanContest",
86
+ "CodeTransOceanDL",
87
+ "COIRCodeSearchNetRetrieval",
88
+ "CosQA",
89
+ "StackOverflowQA",
90
+ "SyntheticText2SQL",
91
+ "AdvTrain",
92
+ }
93
+
77
94
  prompts_dict = {
78
95
  "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not counterfactual.",
79
96
  "Banking77Classification": "Given an online banking query, find the corresponding intents.",
@@ -119,6 +136,77 @@ prompts_dict = {
119
136
  }
120
137
 
121
138
 
139
+ c2llm_prompts_dict = {
140
+ "CodeEditSearchRetrieval": {
141
+ "query": "Retrieve the diff code that relevant the following query:\n",
142
+ "document": "Retrieved Answer:",
143
+ },
144
+ "CodeSearchNetRetrieval": {
145
+ "query": "Retrieve the code that solves the following query:\n",
146
+ "document": "Retrieved Answer:",
147
+ },
148
+ "AppsRetrieval": {
149
+ "query": "Given a problem description from a programming contest, retrieve code examples that can assist in solving it.\n",
150
+ "document": "Retrieved Answer:",
151
+ },
152
+ "CodeFeedbackMT": {
153
+ "query": "Given a multi-turn conversation history that includes both text and code, retrieve relevant multi-modal answers composed of text and code that address the ongoing discussion.\n",
154
+ "document": "Retrieved Answer:",
155
+ },
156
+ "CodeFeedbackST": {
157
+ "query": "Given a single-turn question composed of text and code, retrieve suitable answers that also mix text and code to provide helpful feedback.\n",
158
+ "document": "Retrieved Answer:",
159
+ },
160
+ "CodeSearchNetCCRetrieval": {
161
+ "query": "Given an initial code segment, retrieve the subsequent segment that continues the code.\n",
162
+ "document": "Retrieved Answer:",
163
+ },
164
+ "CodeTransOceanContest": {
165
+ "query": "Given a Python code snippet, retrieve its semantically equivalent version written in C++.\n",
166
+ "document": "Retrieved Answer:",
167
+ },
168
+ "CodeTransOceanDL": {
169
+ "query": "Given a Python code snippet, retrieve its semantically equivalent version written in C++.\n",
170
+ "document": "Retrieved Answer:",
171
+ },
172
+ "COIRCodeSearchNetRetrieval": {
173
+ "query": "Given a code snippet, retrieve its corresponding document string that summarizes its functionality.\n",
174
+ "document": "Retrieved Answer:",
175
+ },
176
+ "CosQA": {
177
+ "query": "Given a query from a web search, retrieve code that is helpful in addressing the query.\n",
178
+ "document": "Retrieved Answer:",
179
+ },
180
+ "StackOverflowQA": {
181
+ "query": "Given a question combining text and code, retrieve relevant answers that also contain both text and code snippets and can address the question.\n",
182
+ "document": "Retrieved Answer:",
183
+ },
184
+ "SyntheticText2SQL": {
185
+ "query": "Given a natural language question, retrieve SQL queries that serve as appropriate responses.\n",
186
+ "document": "Retrieved Answer:",
187
+ },
188
+ }
189
+
190
+ c2llm_languages = [
191
+ "eng-Latn",
192
+ "zho-Hans",
193
+ "python-Code",
194
+ "javascript-Code",
195
+ "go-Code",
196
+ "ruby-Code",
197
+ "java-Code",
198
+ "php-Code",
199
+ ]
200
+
201
+ c2llm_loader_kwargs = dict(
202
+ trust_remote_code=True,
203
+ prompts_dict=c2llm_prompts_dict,
204
+ apply_instruction_to_passages=True,
205
+ max_seq_length=2048,
206
+ padding_side="left",
207
+ )
208
+
209
+
122
210
  def instruction_template(
123
211
  instruction: str, prompt_type: PromptType | None = None
124
212
  ) -> str:
@@ -218,3 +306,59 @@ F2LLM_4B = ModelMeta(
218
306
  training_datasets=training_datasets,
219
307
  citation=F2LLM_CITATION,
220
308
  )
309
+
310
+ C2LLM_0B5 = ModelMeta(
311
+ loader=InstructSentenceTransformerModel,
312
+ loader_kwargs=c2llm_loader_kwargs,
313
+ name="codefuse-ai/C2LLM-0.5B",
314
+ revision="f08c18be03de42c6e388948a1804d4b271a953a2",
315
+ release_date="2025-12-22",
316
+ languages=c2llm_languages,
317
+ n_parameters=497252096,
318
+ memory_usage_mb=948.0,
319
+ max_tokens=32768,
320
+ embed_dim=896,
321
+ license="apache-2.0",
322
+ open_weights=True,
323
+ public_training_code=None,
324
+ public_training_data=None,
325
+ framework=["PyTorch", "Sentence Transformers"],
326
+ reference="https://huggingface.co/codefuse-ai/C2LLM-0.5B",
327
+ similarity_fn_name=ScoringFunction.COSINE,
328
+ use_instructions=True,
329
+ training_datasets=c2llm_training_datasets,
330
+ adapted_from=None,
331
+ superseded_by=None,
332
+ modalities=["text"],
333
+ is_cross_encoder=None,
334
+ citation=None,
335
+ contacts=None,
336
+ )
337
+
338
+ C2LLM_7B = ModelMeta(
339
+ loader=InstructSentenceTransformerModel,
340
+ loader_kwargs=c2llm_loader_kwargs,
341
+ name="codefuse-ai/C2LLM-7B",
342
+ revision="c1dc16d6d64eb962c783bfb36a6d9c2f24a86dca",
343
+ release_date="2025-12-22",
344
+ languages=c2llm_languages,
345
+ n_parameters=7667028992,
346
+ memory_usage_mb=14624.0,
347
+ max_tokens=32768,
348
+ embed_dim=3584,
349
+ license="apache-2.0",
350
+ open_weights=True,
351
+ public_training_code=None,
352
+ public_training_data=None,
353
+ framework=["PyTorch", "Sentence Transformers"],
354
+ reference="https://huggingface.co/codefuse-ai/C2LLM-7B",
355
+ similarity_fn_name=ScoringFunction.COSINE,
356
+ use_instructions=True,
357
+ training_datasets=c2llm_training_datasets,
358
+ adapted_from=None,
359
+ superseded_by=None,
360
+ modalities=["text"],
361
+ is_cross_encoder=None,
362
+ citation=None,
363
+ contacts=None,
364
+ )