evalscope 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (48) hide show
  1. evalscope/backend/__init__.py +0 -3
  2. evalscope/backend/opencompass/backend_manager.py +2 -0
  3. evalscope/backend/opencompass/tasks/eval_datasets.py +2 -2
  4. evalscope/backend/rag_eval/__init__.py +3 -0
  5. evalscope/backend/rag_eval/backend_manager.py +68 -0
  6. evalscope/backend/rag_eval/cmteb/__init__.py +4 -0
  7. evalscope/backend/rag_eval/cmteb/arguments.py +59 -0
  8. evalscope/backend/rag_eval/cmteb/base.py +89 -0
  9. evalscope/backend/rag_eval/cmteb/task_template.py +83 -0
  10. evalscope/backend/rag_eval/cmteb/tasks/Classification.py +302 -0
  11. evalscope/backend/rag_eval/cmteb/tasks/Clustering.py +252 -0
  12. evalscope/backend/rag_eval/cmteb/tasks/PairClassification.py +113 -0
  13. evalscope/backend/rag_eval/cmteb/tasks/Reranking.py +153 -0
  14. evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py +345 -0
  15. evalscope/backend/rag_eval/cmteb/tasks/STS.py +302 -0
  16. evalscope/backend/rag_eval/cmteb/tasks/__init__.py +64 -0
  17. evalscope/backend/rag_eval/ragas/__init__.py +2 -0
  18. evalscope/backend/rag_eval/ragas/arguments.py +37 -0
  19. evalscope/backend/rag_eval/ragas/task_template.py +117 -0
  20. evalscope/backend/vlm_eval_kit/backend_manager.py +1 -2
  21. evalscope/backend/vlm_eval_kit/custom_dataset.py +1 -1
  22. evalscope/benchmarks/benchmark.py +1 -1
  23. evalscope/evaluator/evaluator.py +4 -3
  24. evalscope/metrics/bundled_rouge_score/rouge_scorer.py +19 -0
  25. evalscope/models/api/__init__.py +3 -0
  26. evalscope/models/api/openai_api.py +228 -0
  27. evalscope/perf/http_client.py +5 -5
  28. evalscope/run.py +4 -0
  29. evalscope/third_party/longbench_write/__init__.py +3 -0
  30. evalscope/third_party/longbench_write/eval.py +284 -0
  31. evalscope/third_party/longbench_write/infer.py +217 -0
  32. evalscope/third_party/longbench_write/longbench_write.py +88 -0
  33. evalscope/third_party/longbench_write/resources/__init__.py +1 -0
  34. evalscope/third_party/longbench_write/resources/judge.txt +31 -0
  35. evalscope/third_party/longbench_write/resources/longbench_write.jsonl +120 -0
  36. evalscope/third_party/longbench_write/resources/longbench_write_en.jsonl +60 -0
  37. evalscope/third_party/longbench_write/resources/longwrite_ruler.jsonl +48 -0
  38. evalscope/third_party/longbench_write/tools/__init__.py +1 -0
  39. evalscope/third_party/longbench_write/tools/data_etl.py +155 -0
  40. evalscope/third_party/longbench_write/utils.py +37 -0
  41. evalscope/utils/logger.py +44 -14
  42. evalscope/utils/task_utils.py +3 -0
  43. evalscope/version.py +2 -2
  44. {evalscope-0.5.3.dist-info → evalscope-0.5.5.dist-info}/METADATA +46 -60
  45. {evalscope-0.5.3.dist-info → evalscope-0.5.5.dist-info}/RECORD +48 -18
  46. {evalscope-0.5.3.dist-info → evalscope-0.5.5.dist-info}/WHEEL +0 -0
  47. {evalscope-0.5.3.dist-info → evalscope-0.5.5.dist-info}/entry_points.txt +0 -0
  48. {evalscope-0.5.3.dist-info → evalscope-0.5.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,252 @@
1
+ import itertools
2
+
3
+ from datasets import Dataset, DatasetDict
4
+
5
+ from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
6
+ from mteb.abstasks.AbsTaskClusteringFast import (
7
+ AbsTaskClusteringFast,
8
+ check_label_distribution,
9
+ )
10
+ from mteb.abstasks.TaskMetadata import TaskMetadata
11
+
12
+ NUM_SAMPLES = 2048
13
+
14
+
15
+ class CLSClusteringFastS2S(AbsTaskClusteringFast):
16
+ max_document_to_embed = NUM_SAMPLES
17
+ max_fraction_of_documents_to_embed = None
18
+
19
+ metadata = TaskMetadata(
20
+ name="CLSClusteringS2S",
21
+ description="Clustering of titles from CLS dataset. Clustering of 13 sets on the main category.",
22
+ reference="https://arxiv.org/abs/2209.05034",
23
+ dataset={
24
+ "path": "C-MTEB/CLSClusteringS2S",
25
+ "revision": "e458b3f5414b62b7f9f83499ac1f5497ae2e869f",
26
+ },
27
+ type="Clustering",
28
+ category="s2s",
29
+ modalities=["text"],
30
+ eval_splits=["test"],
31
+ eval_langs=["cmn-Hans"],
32
+ main_score="v_measure",
33
+ date=("2022-01-01", "2022-09-12"),
34
+ domains=["Academic", "Written"],
35
+ task_subtypes=["Thematic clustering", "Topic classification"],
36
+ license="Apache-2.0",
37
+ annotations_creators="derived",
38
+ dialect=[],
39
+ sample_creation="found",
40
+ bibtex_citation="""@misc{li2022csl,
41
+ title={CSL: A Large-scale Chinese Scientific Literature Dataset},
42
+ author={Yudong Li and Yuqing Zhang and Zhe Zhao and Linlin Shen and Weijie Liu and Weiquan Mao and Hui Zhang},
43
+ year={2022},
44
+ eprint={2209.05034},
45
+ archivePrefix={arXiv},
46
+ primaryClass={cs.CL}
47
+ }""",
48
+ descriptive_stats={
49
+ "n_samples": {"test": NUM_SAMPLES},
50
+ "avg_character_length": {},
51
+ },
52
+ )
53
+
54
+ def dataset_transform(self):
55
+ ds = {}
56
+ for split in self.metadata.eval_splits:
57
+ labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
58
+ sentences = list(
59
+ itertools.chain.from_iterable(self.dataset[split]["sentences"])
60
+ )
61
+
62
+ check_label_distribution(self.dataset[split])
63
+
64
+ ds[split] = Dataset.from_dict({"labels": labels, "sentences": sentences})
65
+ self.dataset = DatasetDict(ds)
66
+ self.dataset = self.stratified_subsampling(
67
+ self.dataset,
68
+ self.seed,
69
+ self.metadata.eval_splits,
70
+ label="labels",
71
+ n_samples=NUM_SAMPLES,
72
+ )
73
+
74
+
75
+ class CLSClusteringFastP2P(AbsTaskClusteringFast):
76
+ max_document_to_embed = NUM_SAMPLES
77
+ max_fraction_of_documents_to_embed = None
78
+
79
+ metadata = TaskMetadata(
80
+ name="CLSClusteringP2P",
81
+ description="Clustering of titles + abstract from CLS dataset. Clustering of 13 sets on the main category.",
82
+ reference="https://arxiv.org/abs/2209.05034",
83
+ dataset={
84
+ "path": "C-MTEB/CLSClusteringP2P",
85
+ "revision": "4b6227591c6c1a73bc76b1055f3b7f3588e72476",
86
+ },
87
+ type="Clustering",
88
+ category="p2p",
89
+ modalities=["text"],
90
+ eval_splits=["test"],
91
+ eval_langs=["cmn-Hans"],
92
+ main_score="v_measure",
93
+ date=("2022-01-01", "2022-09-12"),
94
+ domains=["Academic", "Written"],
95
+ task_subtypes=["Thematic clustering", "Topic classification"],
96
+ license="Apache-2.0",
97
+ annotations_creators="derived",
98
+ dialect=[],
99
+ sample_creation="found",
100
+ bibtex_citation="""@misc{li2022csl,
101
+ title={CSL: A Large-scale Chinese Scientific Literature Dataset},
102
+ author={Yudong Li and Yuqing Zhang and Zhe Zhao and Linlin Shen and Weijie Liu and Weiquan Mao and Hui Zhang},
103
+ year={2022},
104
+ eprint={2209.05034},
105
+ archivePrefix={arXiv},
106
+ primaryClass={cs.CL}
107
+ }""",
108
+ descriptive_stats={
109
+ "n_samples": {"test": NUM_SAMPLES},
110
+ "avg_character_length": {},
111
+ },
112
+ )
113
+
114
+ def dataset_transform(self):
115
+ ds = {}
116
+ for split in self.metadata.eval_splits:
117
+ labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
118
+ sentences = list(
119
+ itertools.chain.from_iterable(self.dataset[split]["sentences"])
120
+ )
121
+
122
+ check_label_distribution(self.dataset[split])
123
+
124
+ ds[split] = Dataset.from_dict({"labels": labels, "sentences": sentences})
125
+ self.dataset = DatasetDict(ds)
126
+ self.dataset = self.stratified_subsampling(
127
+ self.dataset,
128
+ self.seed,
129
+ self.metadata.eval_splits,
130
+ label="labels",
131
+ n_samples=NUM_SAMPLES,
132
+ )
133
+
134
+
135
+ class ThuNewsClusteringFastS2S(AbsTaskClusteringFast):
136
+ max_document_to_embed = NUM_SAMPLES
137
+ max_fraction_of_documents_to_embed = None
138
+
139
+ metadata = TaskMetadata(
140
+ name="ThuNewsClusteringS2S",
141
+ dataset={
142
+ "path": "C-MTEB/ThuNewsClusteringS2S",
143
+ "revision": "8a8b2caeda43f39e13c4bc5bea0f8a667896e10d",
144
+ },
145
+ description="Clustering of titles from the THUCNews dataset",
146
+ reference="http://thuctc.thunlp.org/",
147
+ type="Clustering",
148
+ category="s2s",
149
+ modalities=["text"],
150
+ eval_splits=["test"],
151
+ eval_langs=["cmn-Hans"],
152
+ main_score="v_measure",
153
+ date=("2006-01-01", "2007-01-01"),
154
+ domains=["News", "Written"],
155
+ task_subtypes=["Thematic clustering", "Topic classification"],
156
+ license="Not specified",
157
+ annotations_creators="derived",
158
+ dialect=[],
159
+ sample_creation="found",
160
+ bibtex_citation="""@software{THUCTC,
161
+ author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.},
162
+ title = {THUCTC: An Efficient Chinese Text Classifier},
163
+ year = {2016},
164
+ note = {THU Chinese Text Classification Toolkit},
165
+ publisher = {THU Natural Language Processing Lab},
166
+ url = {https://github.com/thunlp/THUCTC}
167
+ }""",
168
+ descriptive_stats={
169
+ "n_samples": {"test": NUM_SAMPLES},
170
+ "avg_character_length": {},
171
+ },
172
+ )
173
+
174
+ def dataset_transform(self):
175
+ ds = {}
176
+ for split in self.metadata.eval_splits:
177
+ labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
178
+ sentences = list(
179
+ itertools.chain.from_iterable(self.dataset[split]["sentences"])
180
+ )
181
+
182
+ check_label_distribution(self.dataset[split])
183
+
184
+ ds[split] = Dataset.from_dict({"labels": labels, "sentences": sentences})
185
+ self.dataset = DatasetDict(ds)
186
+ self.dataset = self.stratified_subsampling(
187
+ self.dataset,
188
+ self.seed,
189
+ self.metadata.eval_splits,
190
+ label="labels",
191
+ n_samples=NUM_SAMPLES,
192
+ )
193
+
194
+
195
+ class ThuNewsClusteringFastP2P(AbsTaskClusteringFast):
196
+ max_document_to_embed = NUM_SAMPLES
197
+ max_fraction_of_documents_to_embed = None
198
+
199
+ metadata = TaskMetadata(
200
+ name="ThuNewsClusteringP2P",
201
+ dataset={
202
+ "path": "C-MTEB/ThuNewsClusteringP2P",
203
+ "revision": "5798586b105c0434e4f0fe5e767abe619442cf93",
204
+ },
205
+ description="Clustering of titles + abstracts from the THUCNews dataset",
206
+ reference="http://thuctc.thunlp.org/",
207
+ type="Clustering",
208
+ category="p2p",
209
+ modalities=["text"],
210
+ eval_splits=["test"],
211
+ eval_langs=["cmn-Hans"],
212
+ main_score="v_measure",
213
+ date=("2006-01-01", "2007-01-01"),
214
+ domains=["News", "Written"],
215
+ task_subtypes=["Thematic clustering", "Topic classification"],
216
+ license="Not specified",
217
+ annotations_creators="derived",
218
+ dialect=[],
219
+ sample_creation="found",
220
+ bibtex_citation="""@software{THUCTC,
221
+ author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.},
222
+ title = {THUCTC: An Efficient Chinese Text Classifier},
223
+ year = {2016},
224
+ note = {THU Chinese Text Classification Toolkit},
225
+ publisher = {THU Natural Language Processing Lab},
226
+ url = {https://github.com/thunlp/THUCTC}
227
+ }""",
228
+ descriptive_stats={
229
+ "n_samples": {"test": NUM_SAMPLES},
230
+ "avg_character_length": {},
231
+ },
232
+ )
233
+
234
+ def dataset_transform(self):
235
+ ds = {}
236
+ for split in self.metadata.eval_splits:
237
+ labels = list(itertools.chain.from_iterable(self.dataset[split]["labels"]))
238
+ sentences = list(
239
+ itertools.chain.from_iterable(self.dataset[split]["sentences"])
240
+ )
241
+
242
+ check_label_distribution(self.dataset[split])
243
+
244
+ ds[split] = Dataset.from_dict({"labels": labels, "sentences": sentences})
245
+ self.dataset = DatasetDict(ds)
246
+ self.dataset = self.stratified_subsampling(
247
+ self.dataset,
248
+ self.seed,
249
+ self.metadata.eval_splits,
250
+ label="labels",
251
+ n_samples=NUM_SAMPLES,
252
+ )
@@ -0,0 +1,113 @@
1
+ from mteb.abstasks.AbsTaskPairClassification import AbsTaskPairClassification
2
+ from mteb.abstasks.TaskMetadata import TaskMetadata
3
+
4
+
5
+ class Ocnli(AbsTaskPairClassification):
6
+ metadata = TaskMetadata(
7
+ name="Ocnli",
8
+ description="Original Chinese Natural Language Inference dataset",
9
+ reference="https://arxiv.org/abs/2010.05444",
10
+ dataset={
11
+ "path": "C-MTEB/OCNLI",
12
+ "revision": "66e76a618a34d6d565d5538088562851e6daa7ec",
13
+ },
14
+ type="PairClassification",
15
+ category="s2s",
16
+ modalities=["text"],
17
+ eval_splits=["validation"],
18
+ eval_langs=["cmn-Hans"],
19
+ main_score="max_accuracy",
20
+ date=None,
21
+ domains=None,
22
+ task_subtypes=None,
23
+ license=None,
24
+ annotations_creators=None,
25
+ dialect=None,
26
+ sample_creation=None,
27
+ bibtex_citation="""@misc{hu2020ocnli,
28
+ title={OCNLI: Original Chinese Natural Language Inference},
29
+ author={Hai Hu and Kyle Richardson and Liang Xu and Lu Li and Sandra Kuebler and Lawrence S. Moss},
30
+ year={2020},
31
+ eprint={2010.05444},
32
+ archivePrefix={arXiv},
33
+ primaryClass={cs.CL}
34
+ }""",
35
+ descriptive_stats={"n_samples": None, "avg_character_length": None},
36
+ )
37
+
38
+ def dataset_transform(self):
39
+ self.dataset = self.dataset.rename_column("sent1", "sentence1")
40
+ self.dataset = self.dataset.rename_column("sent2", "sentence2")
41
+
42
+
43
+ class Cmnli(AbsTaskPairClassification):
44
+ metadata = TaskMetadata(
45
+ name="Cmnli",
46
+ description="Chinese Multi-Genre NLI",
47
+ reference="https://huggingface.co/datasets/clue/viewer/cmnli",
48
+ dataset={
49
+ "path": "C-MTEB/CMNLI",
50
+ "revision": "41bc36f332156f7adc9e38f53777c959b2ae9766",
51
+ },
52
+ type="PairClassification",
53
+ category="s2s",
54
+ modalities=["text"],
55
+ eval_splits=["validation", "test"],
56
+ eval_langs=["cmn-Hans"],
57
+ main_score="max_accuracy",
58
+ date=None,
59
+ domains=None,
60
+ task_subtypes=None,
61
+ license=None,
62
+ annotations_creators=None,
63
+ dialect=None,
64
+ sample_creation=None,
65
+ bibtex_citation="""@inproceedings{xu-etal-2020-clue,
66
+ title = "{CLUE}: A {C}hinese Language Understanding Evaluation Benchmark",
67
+ author = "Xu, Liang and
68
+ Hu, Hai and
69
+ Zhang, Xuanwei and
70
+ Li, Lu and
71
+ Cao, Chenjie and
72
+ Li, Yudong and
73
+ Xu, Yechen and
74
+ Sun, Kai and
75
+ Yu, Dian and
76
+ Yu, Cong and
77
+ Tian, Yin and
78
+ Dong, Qianqian and
79
+ Liu, Weitang and
80
+ Shi, Bo and
81
+ Cui, Yiming and
82
+ Li, Junyi and
83
+ Zeng, Jun and
84
+ Wang, Rongzhao and
85
+ Xie, Weijian and
86
+ Li, Yanting and
87
+ Patterson, Yina and
88
+ Tian, Zuoyu and
89
+ Zhang, Yiwen and
90
+ Zhou, He and
91
+ Liu, Shaoweihua and
92
+ Zhao, Zhe and
93
+ Zhao, Qipeng and
94
+ Yue, Cong and
95
+ Zhang, Xinrui and
96
+ Yang, Zhengliang and
97
+ Richardson, Kyle and
98
+ Lan, Zhenzhong",
99
+ booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
100
+ month = dec,
101
+ year = "2020",
102
+ address = "Barcelona, Spain (Online)",
103
+ publisher = "International Committee on Computational Linguistics",
104
+ url = "https://aclanthology.org/2020.coling-main.419",
105
+ doi = "10.18653/v1/2020.coling-main.419",
106
+ pages = "4762--4772",
107
+ }""",
108
+ descriptive_stats={"n_samples": None, "avg_character_length": None},
109
+ )
110
+
111
+ def dataset_transform(self):
112
+ self.dataset = self.dataset.rename_column("sent1", "sentence1")
113
+ self.dataset = self.dataset.rename_column("sent2", "sentence2")
@@ -0,0 +1,153 @@
1
+ from typing import Any
2
+ from mteb.abstasks.AbsTask import ScoresDict
3
+ from mteb.abstasks.AbsTaskReranking import AbsTaskReranking
4
+ from mteb.abstasks.TaskMetadata import HFSubset, TaskMetadata
5
+ from mteb.encoder_interface import Encoder, EncoderWithQueryCorpusEncode
6
+
7
+
8
+ class T2Reranking(AbsTaskReranking):
9
+ metadata = TaskMetadata(
10
+ name="T2Reranking",
11
+ description="T2Ranking: A large-scale Chinese Benchmark for Passage Ranking",
12
+ reference="https://arxiv.org/abs/2304.03679",
13
+ dataset={
14
+ "path": "C-MTEB/T2Reranking",
15
+ "revision": "76631901a18387f85eaa53e5450019b87ad58ef9",
16
+ },
17
+ type="Reranking",
18
+ category="s2s",
19
+ modalities=["text"],
20
+ eval_splits=["dev"],
21
+ eval_langs=["cmn-Hans"],
22
+ main_score="map",
23
+ date=None,
24
+ form=None,
25
+ domains=None,
26
+ task_subtypes=None,
27
+ license=None,
28
+ annotations_creators=None,
29
+ dialect=None,
30
+ sample_creation=None,
31
+ bibtex_citation="""@misc{xie2023t2ranking,
32
+ title={T2Ranking: A large-scale Chinese Benchmark for Passage Ranking},
33
+ author={Xiaohui Xie and Qian Dong and Bingning Wang and Feiyang Lv and Ting Yao and Weinan Gan and Zhijing Wu and Xiangsheng Li and Haitao Li and Yiqun Liu and Jin Ma},
34
+ year={2023},
35
+ eprint={2304.03679},
36
+ archivePrefix={arXiv},
37
+ primaryClass={cs.IR}
38
+ }""",
39
+ descriptive_stats={"n_samples": None, "avg_character_length": None},
40
+ )
41
+
42
+
43
+ class MMarcoReranking(AbsTaskReranking):
44
+ metadata = TaskMetadata(
45
+ name="MMarcoReranking",
46
+ description="mMARCO is a multilingual version of the MS MARCO passage ranking dataset",
47
+ reference="https://github.com/unicamp-dl/mMARCO",
48
+ dataset={
49
+ "path": "C-MTEB/Mmarco-reranking",
50
+ "revision": "8e0c766dbe9e16e1d221116a3f36795fbade07f6",
51
+ },
52
+ type="Reranking",
53
+ category="s2s",
54
+ modalities=["text"],
55
+ eval_splits=["dev"],
56
+ eval_langs=["cmn-Hans"],
57
+ main_score="map",
58
+ date=None,
59
+ form=None,
60
+ domains=None,
61
+ task_subtypes=None,
62
+ license=None,
63
+ annotations_creators=None,
64
+ dialect=None,
65
+ sample_creation=None,
66
+ bibtex_citation="""@misc{bonifacio2021mmarco,
67
+ title={mMARCO: A Multilingual Version of MS MARCO Passage Ranking Dataset},
68
+ author={Luiz Henrique Bonifacio and Vitor Jeronymo and Hugo Queiroz Abonizio and Israel Campiotti and Marzieh Fadaee and and Roberto Lotufo and Rodrigo Nogueira},
69
+ year={2021},
70
+ eprint={2108.13897},
71
+ archivePrefix={arXiv},
72
+ primaryClass={cs.CL}
73
+ }""",
74
+ descriptive_stats={"n_samples": None, "avg_character_length": None},
75
+ )
76
+
77
+
78
+ class CMedQAv1(AbsTaskReranking):
79
+ metadata = TaskMetadata(
80
+ name="CMedQAv1",
81
+ description="Chinese community medical question answering",
82
+ reference="https://github.com/zhangsheng93/cMedQA",
83
+ dataset={
84
+ "path": "C-MTEB/CMedQAv1-reranking",
85
+ "revision": "8d7f1e942507dac42dc58017c1a001c3717da7df",
86
+ },
87
+ type="Reranking",
88
+ category="s2s",
89
+ modalities=["text"],
90
+ eval_splits=["test"],
91
+ eval_langs=["cmn-Hans"],
92
+ main_score="map",
93
+ date=("2017-01-01", "2017-07-26"),
94
+ domains=["Medical", "Written"],
95
+ task_subtypes=[],
96
+ license="Not specified",
97
+ annotations_creators="expert-annotated",
98
+ dialect=[],
99
+ sample_creation="found",
100
+ bibtex_citation="""@article{zhang2017chinese,
101
+ title={Chinese Medical Question Answer Matching Using End-to-End Character-Level Multi-Scale CNNs},
102
+ author={Zhang, Sheng and Zhang, Xin and Wang, Hui and Cheng, Jiajun and Li, Pei and Ding, Zhaoyun},
103
+ journal={Applied Sciences},
104
+ volume={7},
105
+ number={8},
106
+ pages={767},
107
+ year={2017},
108
+ publisher={Multidisciplinary Digital Publishing Institute}
109
+ }""",
110
+ descriptive_stats={
111
+ "n_samples": {"test": 2000},
112
+ "avg_character_length": {"test": 165},
113
+ },
114
+ )
115
+
116
+
117
+ class CMedQAv2(AbsTaskReranking):
118
+ metadata = TaskMetadata(
119
+ name="CMedQAv2",
120
+ description="Chinese community medical question answering",
121
+ reference="https://github.com/zhangsheng93/cMedQA2",
122
+ dataset={
123
+ "path": "C-MTEB/CMedQAv2-reranking",
124
+ "revision": "23d186750531a14a0357ca22cd92d712fd512ea0",
125
+ },
126
+ type="Reranking",
127
+ category="s2s",
128
+ modalities=["text"],
129
+ eval_splits=["test"],
130
+ eval_langs=["cmn-Hans"],
131
+ main_score="map",
132
+ date=None,
133
+ form=None,
134
+ domains=None,
135
+ task_subtypes=None,
136
+ license=None,
137
+ annotations_creators=None,
138
+ dialect=None,
139
+ sample_creation=None,
140
+ bibtex_citation="""@ARTICLE{8548603,
141
+ author={S. Zhang and X. Zhang and H. Wang and L. Guo and S. Liu},
142
+ journal={IEEE Access},
143
+ title={Multi-Scale Attentive Interaction Networks for Chinese Medical Question Answer Selection},
144
+ year={2018},
145
+ volume={6},
146
+ number={},
147
+ pages={74061-74071},
148
+ keywords={Biomedical imaging;Data mining;Semantics;Medical services;Feature extraction;Knowledge discovery;Medical question answering;interactive attention;deep learning;deep neural networks},
149
+ doi={10.1109/ACCESS.2018.2883637},
150
+ ISSN={2169-3536},
151
+ month={},}""",
152
+ descriptive_stats={"n_samples": None, "avg_character_length": None},
153
+ )