evalscope 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- evalscope/__init__.py +1 -1
- evalscope/arguments.py +73 -0
- evalscope/backend/base.py +5 -1
- evalscope/backend/opencompass/api_meta_template.py +8 -14
- evalscope/backend/opencompass/backend_manager.py +24 -15
- evalscope/backend/opencompass/tasks/eval_api.py +1 -6
- evalscope/backend/opencompass/tasks/eval_datasets.py +26 -28
- evalscope/backend/rag_eval/__init__.py +3 -3
- evalscope/backend/rag_eval/backend_manager.py +21 -25
- evalscope/backend/rag_eval/clip_benchmark/__init__.py +1 -1
- evalscope/backend/rag_eval/clip_benchmark/arguments.py +6 -6
- evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +62 -79
- evalscope/backend/rag_eval/clip_benchmark/task_template.py +29 -43
- evalscope/backend/rag_eval/clip_benchmark/tasks/image_caption.py +20 -22
- evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_classification.py +16 -23
- evalscope/backend/rag_eval/clip_benchmark/tasks/zeroshot_retrieval.py +14 -35
- evalscope/backend/rag_eval/clip_benchmark/utils/webdataset_convert.py +69 -90
- evalscope/backend/rag_eval/cmteb/__init__.py +3 -3
- evalscope/backend/rag_eval/cmteb/arguments.py +25 -27
- evalscope/backend/rag_eval/cmteb/base.py +22 -23
- evalscope/backend/rag_eval/cmteb/task_template.py +15 -17
- evalscope/backend/rag_eval/cmteb/tasks/Classification.py +98 -79
- evalscope/backend/rag_eval/cmteb/tasks/Clustering.py +17 -22
- evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py +17 -19
- evalscope/backend/rag_eval/cmteb/tasks/PairClassification.py +35 -29
- evalscope/backend/rag_eval/cmteb/tasks/Reranking.py +18 -5
- evalscope/backend/rag_eval/cmteb/tasks/Retrieval.py +163 -163
- evalscope/backend/rag_eval/cmteb/tasks/STS.py +126 -104
- evalscope/backend/rag_eval/cmteb/tasks/__init__.py +33 -34
- evalscope/backend/rag_eval/ragas/__init__.py +2 -2
- evalscope/backend/rag_eval/ragas/arguments.py +3 -8
- evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerCorrectness/correctness_prompt_chinese.json +9 -9
- evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerCorrectness/long_form_answer_prompt_chinese.json +2 -2
- evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerRelevancy/question_generation_chinese.json +3 -3
- evalscope/backend/rag_eval/ragas/prompts/chinese/ContextPrecision/context_precision_prompt_chinese.json +5 -5
- evalscope/backend/rag_eval/ragas/prompts/chinese/CustomNodeFilter/scoring_prompt_chinese.json +7 -0
- evalscope/backend/rag_eval/ragas/prompts/chinese/Faithfulness/nli_statements_message_chinese.json +8 -8
- evalscope/backend/rag_eval/ragas/prompts/chinese/Faithfulness/statement_prompt_chinese.json +5 -5
- evalscope/backend/rag_eval/ragas/prompts/chinese/HeadlinesExtractor/prompt_chinese.json +7 -5
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/concept_combination_prompt_chinese.json +2 -2
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/generate_query_reference_prompt_chinese.json +27 -4
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/theme_persona_matching_prompt_chinese.json +2 -2
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopSpecificQuerySynthesizer/generate_query_reference_prompt_chinese.json +27 -4
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopSpecificQuerySynthesizer/theme_persona_matching_prompt_chinese.json +2 -2
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiModalFaithfulness/faithfulness_prompt_chinese.json +2 -2
- evalscope/backend/rag_eval/ragas/prompts/chinese/MultiModalRelevance/relevance_prompt_chinese.json +5 -5
- evalscope/backend/rag_eval/ragas/prompts/chinese/NERExtractor/prompt_chinese.json +3 -3
- evalscope/backend/rag_eval/ragas/prompts/chinese/SingleHopSpecificQuerySynthesizer/generate_query_reference_prompt_chinese.json +21 -4
- evalscope/backend/rag_eval/ragas/prompts/chinese/SingleHopSpecificQuerySynthesizer/theme_persona_matching_prompt_chinese.json +3 -3
- evalscope/backend/rag_eval/ragas/prompts/chinese/SummaryExtractor/prompt_chinese.json +4 -4
- evalscope/backend/rag_eval/ragas/prompts/chinese/ThemesExtractor/prompt_chinese.json +2 -2
- evalscope/backend/rag_eval/ragas/prompts/persona_prompt.py +0 -1
- evalscope/backend/rag_eval/ragas/task_template.py +10 -15
- evalscope/backend/rag_eval/ragas/tasks/__init__.py +1 -1
- evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +45 -0
- evalscope/backend/rag_eval/ragas/tasks/build_transform.py +135 -0
- evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +17 -133
- evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +8 -18
- evalscope/backend/rag_eval/utils/clip.py +46 -50
- evalscope/backend/rag_eval/utils/embedding.py +12 -11
- evalscope/backend/rag_eval/utils/llm.py +8 -6
- evalscope/backend/rag_eval/utils/tools.py +12 -11
- evalscope/backend/vlm_eval_kit/__init__.py +1 -1
- evalscope/backend/vlm_eval_kit/custom_dataset.py +7 -8
- evalscope/benchmarks/arc/__init__.py +3 -2
- evalscope/benchmarks/arc/ai2_arc.py +19 -16
- evalscope/benchmarks/arc/arc_adapter.py +32 -24
- evalscope/benchmarks/bbh/__init__.py +1 -2
- evalscope/benchmarks/bbh/bbh_adapter.py +28 -25
- evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/disambiguation_qa.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/dyck_languages.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/formal_fallacies.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/geometric_shapes.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/hyperbaton.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/logical_deduction_five_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/logical_deduction_seven_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/logical_deduction_three_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/movie_recommendation.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/multistep_arithmetic_two.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/navigate.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/object_counting.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/penguins_in_a_table.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/reasoning_about_colored_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/ruin_names.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/salient_translation_error_detection.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/snarks.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/sports_understanding.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/temporal_sequences.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_five_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_seven_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt +1 -1
- evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt +1 -1
- evalscope/benchmarks/benchmark.py +16 -16
- evalscope/benchmarks/ceval/__init__.py +3 -2
- evalscope/benchmarks/ceval/ceval_adapter.py +80 -69
- evalscope/benchmarks/ceval/ceval_exam.py +18 -31
- evalscope/benchmarks/cmmlu/__init__.py +3 -2
- evalscope/benchmarks/cmmlu/cmmlu.py +87 -92
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py +109 -155
- evalscope/benchmarks/cmmlu/samples.jsonl +1 -1
- evalscope/benchmarks/competition_math/__init__.py +3 -2
- evalscope/benchmarks/competition_math/competition_math.py +7 -16
- evalscope/benchmarks/competition_math/competition_math_adapter.py +32 -34
- evalscope/benchmarks/data_adapter.py +24 -24
- evalscope/benchmarks/general_qa/__init__.py +3 -2
- evalscope/benchmarks/general_qa/general_qa_adapter.py +34 -38
- evalscope/benchmarks/gsm8k/__init__.py +1 -1
- evalscope/benchmarks/gsm8k/gsm8k.py +6 -12
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py +26 -24
- evalscope/benchmarks/hellaswag/__init__.py +3 -2
- evalscope/benchmarks/hellaswag/hellaswag.py +15 -19
- evalscope/benchmarks/hellaswag/hellaswag_adapter.py +27 -23
- evalscope/benchmarks/humaneval/__init__.py +1 -1
- evalscope/benchmarks/humaneval/humaneval.py +15 -18
- evalscope/benchmarks/humaneval/humaneval_adapter.py +0 -1
- evalscope/benchmarks/mmlu/__init__.py +3 -2
- evalscope/benchmarks/mmlu/mmlu.py +15 -29
- evalscope/benchmarks/mmlu/mmlu_adapter.py +85 -77
- evalscope/benchmarks/race/__init__.py +3 -2
- evalscope/benchmarks/race/race.py +21 -35
- evalscope/benchmarks/race/race_adapter.py +32 -29
- evalscope/benchmarks/race/samples.jsonl +1 -1
- evalscope/benchmarks/trivia_qa/__init__.py +3 -2
- evalscope/benchmarks/trivia_qa/samples.jsonl +1 -1
- evalscope/benchmarks/trivia_qa/trivia_qa.py +19 -34
- evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +27 -22
- evalscope/benchmarks/truthful_qa/__init__.py +3 -2
- evalscope/benchmarks/truthful_qa/truthful_qa.py +25 -29
- evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +36 -37
- evalscope/cli/cli.py +6 -5
- evalscope/cli/start_eval.py +31 -0
- evalscope/cli/start_perf.py +0 -3
- evalscope/cli/start_server.py +27 -41
- evalscope/config.py +119 -95
- evalscope/constants.py +61 -29
- evalscope/evaluator/__init__.py +1 -0
- evalscope/evaluator/evaluator.py +96 -377
- evalscope/evaluator/humaneval_evaluator.py +158 -0
- evalscope/evaluator/rating_eval.py +12 -33
- evalscope/evaluator/reviewer/auto_reviewer.py +47 -76
- evalscope/metrics/bundled_rouge_score/rouge_scorer.py +10 -20
- evalscope/metrics/code_metric.py +3 -9
- evalscope/metrics/math_accuracy.py +3 -6
- evalscope/metrics/metrics.py +21 -21
- evalscope/metrics/rouge_metric.py +11 -25
- evalscope/models/__init__.py +1 -2
- evalscope/models/api/openai_api.py +40 -29
- evalscope/models/custom/__init__.py +0 -1
- evalscope/models/custom/custom_model.py +3 -3
- evalscope/models/dummy_chat_model.py +7 -8
- evalscope/models/model_adapter.py +89 -156
- evalscope/models/openai_model.py +20 -20
- evalscope/perf/arguments.py +15 -3
- evalscope/perf/benchmark.py +7 -9
- evalscope/perf/http_client.py +3 -8
- evalscope/perf/main.py +10 -0
- evalscope/perf/plugin/api/custom_api.py +1 -2
- evalscope/perf/plugin/api/dashscope_api.py +1 -2
- evalscope/perf/plugin/api/openai_api.py +3 -4
- evalscope/perf/plugin/datasets/base.py +1 -2
- evalscope/perf/plugin/datasets/flickr8k.py +1 -2
- evalscope/perf/plugin/datasets/longalpaca.py +1 -2
- evalscope/perf/plugin/datasets/openqa.py +1 -2
- evalscope/perf/utils/analysis_result.py +1 -2
- evalscope/perf/utils/benchmark_util.py +1 -2
- evalscope/perf/utils/db_util.py +11 -8
- evalscope/perf/utils/local_server.py +19 -13
- evalscope/registry/config/cfg_arena_zhihu.yaml +1 -1
- evalscope/registry/tasks/arc.yaml +2 -3
- evalscope/registry/tasks/bbh.yaml +3 -4
- evalscope/registry/tasks/bbh_mini.yaml +3 -4
- evalscope/registry/tasks/ceval.yaml +3 -3
- evalscope/registry/tasks/ceval_mini.yaml +3 -4
- evalscope/registry/tasks/cmmlu.yaml +3 -3
- evalscope/registry/tasks/eval_qwen-7b-chat_v100.yaml +1 -1
- evalscope/registry/tasks/general_qa.yaml +1 -1
- evalscope/registry/tasks/gsm8k.yaml +2 -2
- evalscope/registry/tasks/mmlu.yaml +3 -3
- evalscope/registry/tasks/mmlu_mini.yaml +3 -3
- evalscope/run.py +184 -375
- evalscope/run_arena.py +20 -25
- evalscope/summarizer.py +16 -17
- evalscope/third_party/longbench_write/README.md +99 -42
- evalscope/third_party/longbench_write/default_task.json +1 -1
- evalscope/third_party/longbench_write/default_task.yaml +8 -7
- evalscope/third_party/longbench_write/eval.py +29 -28
- evalscope/third_party/longbench_write/infer.py +16 -104
- evalscope/third_party/longbench_write/longbench_write.py +5 -5
- evalscope/third_party/longbench_write/resources/judge.txt +1 -1
- evalscope/third_party/longbench_write/tools/data_etl.py +4 -5
- evalscope/third_party/longbench_write/utils.py +0 -1
- evalscope/third_party/toolbench_static/eval.py +14 -15
- evalscope/third_party/toolbench_static/infer.py +48 -69
- evalscope/third_party/toolbench_static/llm/swift_infer.py +4 -12
- evalscope/third_party/toolbench_static/requirements.txt +1 -1
- evalscope/third_party/toolbench_static/toolbench_static.py +3 -3
- evalscope/tools/combine_reports.py +25 -30
- evalscope/tools/rewrite_eval_results.py +14 -46
- evalscope/utils/__init__.py +0 -1
- evalscope/utils/arena_utils.py +18 -48
- evalscope/{perf/utils → utils}/chat_service.py +3 -4
- evalscope/utils/completion_parsers.py +3 -8
- evalscope/utils/logger.py +9 -7
- evalscope/utils/model_utils.py +11 -0
- evalscope/utils/utils.py +12 -138
- evalscope/version.py +2 -2
- {evalscope-0.7.1.dist-info → evalscope-0.8.0.dist-info}/METADATA +125 -120
- evalscope-0.8.0.dist-info/RECORD +285 -0
- tests/cli/test_run.py +54 -15
- tests/perf/test_perf.py +4 -0
- tests/rag/test_clip_benchmark.py +38 -38
- tests/rag/test_mteb.py +3 -2
- tests/rag/test_ragas.py +5 -5
- tests/swift/test_run_swift_eval.py +2 -3
- tests/swift/test_run_swift_vlm_eval.py +2 -3
- tests/swift/test_run_swift_vlm_jugde_eval.py +2 -3
- evalscope/backend/rag_eval/ragas/metrics/__init__.py +0 -2
- evalscope/backend/rag_eval/ragas/metrics/multi_modal_faithfulness.py +0 -91
- evalscope/backend/rag_eval/ragas/metrics/multi_modal_relevance.py +0 -99
- evalscope/cache.py +0 -98
- evalscope/models/template.py +0 -1446
- evalscope/run_ms.py +0 -140
- evalscope/utils/task_cfg_parser.py +0 -10
- evalscope/utils/task_utils.py +0 -22
- evalscope-0.7.1.dist-info/RECORD +0 -286
- {evalscope-0.7.1.dist-info → evalscope-0.8.0.dist-info}/LICENSE +0 -0
- {evalscope-0.7.1.dist-info → evalscope-0.8.0.dist-info}/WHEEL +0 -0
- {evalscope-0.7.1.dist-info → evalscope-0.8.0.dist-info}/entry_points.txt +0 -0
- {evalscope-0.7.1.dist-info → evalscope-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -4,19 +4,19 @@ from mteb.abstasks.TaskMetadata import TaskMetadata
|
|
|
4
4
|
|
|
5
5
|
class TNews(AbsTaskClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
|
-
name=
|
|
8
|
-
description=
|
|
9
|
-
reference=
|
|
7
|
+
name='TNews',
|
|
8
|
+
description='Short Text Classification for News',
|
|
9
|
+
reference='https://www.cluebenchmarks.com/introduce.html',
|
|
10
10
|
dataset={
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
'path': 'C-MTEB/TNews-classification',
|
|
12
|
+
'revision': '317f262bf1e6126357bbe89e875451e4b0938fe4',
|
|
13
13
|
},
|
|
14
|
-
type=
|
|
15
|
-
category=
|
|
16
|
-
modalities=[
|
|
17
|
-
eval_splits=[
|
|
18
|
-
eval_langs=[
|
|
19
|
-
main_score=
|
|
14
|
+
type='Classification',
|
|
15
|
+
category='s2s',
|
|
16
|
+
modalities=['text'],
|
|
17
|
+
eval_splits=['validation'],
|
|
18
|
+
eval_langs=['cmn-Hans'],
|
|
19
|
+
main_score='accuracy',
|
|
20
20
|
date=None,
|
|
21
21
|
domains=None,
|
|
22
22
|
task_subtypes=None,
|
|
@@ -67,31 +67,34 @@ class TNews(AbsTaskClassification):
|
|
|
67
67
|
doi = "10.18653/v1/2020.coling-main.419",
|
|
68
68
|
pages = "4762--4772",
|
|
69
69
|
}""",
|
|
70
|
-
descriptive_stats={
|
|
70
|
+
descriptive_stats={
|
|
71
|
+
'n_samples': None,
|
|
72
|
+
'avg_character_length': None
|
|
73
|
+
},
|
|
71
74
|
)
|
|
72
75
|
|
|
73
76
|
@property
|
|
74
77
|
def metadata_dict(self) -> dict[str, str]:
|
|
75
78
|
metadata_dict = super().metadata_dict
|
|
76
|
-
metadata_dict[
|
|
79
|
+
metadata_dict['samples_per_label'] = 32
|
|
77
80
|
return metadata_dict
|
|
78
81
|
|
|
79
82
|
|
|
80
83
|
class IFlyTek(AbsTaskClassification):
|
|
81
84
|
metadata = TaskMetadata(
|
|
82
|
-
name=
|
|
83
|
-
description=
|
|
84
|
-
reference=
|
|
85
|
+
name='IFlyTek',
|
|
86
|
+
description='Long Text classification for the description of Apps',
|
|
87
|
+
reference='https://www.cluebenchmarks.com/introduce.html',
|
|
85
88
|
dataset={
|
|
86
|
-
|
|
87
|
-
|
|
89
|
+
'path': 'C-MTEB/IFlyTek-classification',
|
|
90
|
+
'revision': '421605374b29664c5fc098418fe20ada9bd55f8a',
|
|
88
91
|
},
|
|
89
|
-
type=
|
|
90
|
-
category=
|
|
91
|
-
modalities=[
|
|
92
|
-
eval_splits=[
|
|
93
|
-
eval_langs=[
|
|
94
|
-
main_score=
|
|
92
|
+
type='Classification',
|
|
93
|
+
category='s2s',
|
|
94
|
+
modalities=['text'],
|
|
95
|
+
eval_splits=['validation'],
|
|
96
|
+
eval_langs=['cmn-Hans'],
|
|
97
|
+
main_score='accuracy',
|
|
95
98
|
date=None,
|
|
96
99
|
domains=None,
|
|
97
100
|
task_subtypes=None,
|
|
@@ -143,32 +146,36 @@ class IFlyTek(AbsTaskClassification):
|
|
|
143
146
|
pages = "4762--4772",
|
|
144
147
|
abstract = "The advent of natural language understanding (NLU) benchmarks for English, such as GLUE and SuperGLUE allows new NLU models to be evaluated across a diverse set of tasks. These comprehensive benchmarks have facilitated a broad range of research and applications in natural language processing (NLP). The problem, however, is that most such benchmarks are limited to English, which has made it difficult to replicate many of the successes in English NLU for other languages. To help remedy this issue, we introduce the first large-scale Chinese Language Understanding Evaluation (CLUE) benchmark. CLUE is an open-ended, community-driven project that brings together 9 tasks spanning several well-established single-sentence/sentence-pair classification tasks, as well as machine reading comprehension, all on original Chinese text. To establish results on these tasks, we report scores using an exhaustive set of current state-of-the-art pre-trained Chinese models (9 in total). We also introduce a number of supplementary datasets and additional tools to help facilitate further progress on Chinese NLU. Our benchmark is released at https://www.cluebenchmarks.com",
|
|
145
148
|
}""",
|
|
146
|
-
descriptive_stats={
|
|
149
|
+
descriptive_stats={
|
|
150
|
+
'n_samples': None,
|
|
151
|
+
'avg_character_length': None
|
|
152
|
+
},
|
|
147
153
|
)
|
|
148
154
|
|
|
149
155
|
@property
|
|
150
156
|
def metadata_dict(self) -> dict[str, str]:
|
|
151
157
|
metadata_dict = super().metadata_dict
|
|
152
|
-
metadata_dict[
|
|
153
|
-
metadata_dict[
|
|
158
|
+
metadata_dict['samples_per_label'] = 32
|
|
159
|
+
metadata_dict['n_experiments'] = 5
|
|
154
160
|
return metadata_dict
|
|
155
161
|
|
|
156
162
|
|
|
157
163
|
class MultilingualSentiment(AbsTaskClassification):
|
|
158
164
|
metadata = TaskMetadata(
|
|
159
|
-
name=
|
|
160
|
-
description=
|
|
161
|
-
|
|
165
|
+
name='MultilingualSentiment',
|
|
166
|
+
description=
|
|
167
|
+
'A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative',
|
|
168
|
+
reference='https://github.com/tyqiangz/multilingual-sentiment-datasets',
|
|
162
169
|
dataset={
|
|
163
|
-
|
|
164
|
-
|
|
170
|
+
'path': 'C-MTEB/MultilingualSentiment-classification',
|
|
171
|
+
'revision': '46958b007a63fdbf239b7672c25d0bea67b5ea1a',
|
|
165
172
|
},
|
|
166
|
-
type=
|
|
167
|
-
category=
|
|
168
|
-
modalities=[
|
|
169
|
-
eval_splits=[
|
|
170
|
-
eval_langs=[
|
|
171
|
-
main_score=
|
|
173
|
+
type='Classification',
|
|
174
|
+
category='s2s',
|
|
175
|
+
modalities=['text'],
|
|
176
|
+
eval_splits=['validation', 'test'],
|
|
177
|
+
eval_langs=['cmn-Hans'],
|
|
178
|
+
main_score='accuracy',
|
|
172
179
|
date=None,
|
|
173
180
|
domains=None,
|
|
174
181
|
task_subtypes=None,
|
|
@@ -177,31 +184,34 @@ class MultilingualSentiment(AbsTaskClassification):
|
|
|
177
184
|
dialect=None,
|
|
178
185
|
sample_creation=None,
|
|
179
186
|
bibtex_citation=None,
|
|
180
|
-
descriptive_stats={
|
|
187
|
+
descriptive_stats={
|
|
188
|
+
'n_samples': None,
|
|
189
|
+
'avg_character_length': None
|
|
190
|
+
},
|
|
181
191
|
)
|
|
182
192
|
|
|
183
193
|
@property
|
|
184
194
|
def metadata_dict(self) -> dict[str, str]:
|
|
185
195
|
metadata_dict = super().metadata_dict
|
|
186
|
-
metadata_dict[
|
|
196
|
+
metadata_dict['samples_per_label'] = 32
|
|
187
197
|
return metadata_dict
|
|
188
198
|
|
|
189
199
|
|
|
190
200
|
class JDReview(AbsTaskClassification):
|
|
191
201
|
metadata = TaskMetadata(
|
|
192
|
-
name=
|
|
193
|
-
description=
|
|
194
|
-
reference=
|
|
202
|
+
name='JDReview',
|
|
203
|
+
description='review for iphone',
|
|
204
|
+
reference='https://aclanthology.org/2023.nodalida-1.20/',
|
|
195
205
|
dataset={
|
|
196
|
-
|
|
197
|
-
|
|
206
|
+
'path': 'C-MTEB/JDReview-classification',
|
|
207
|
+
'revision': 'b7c64bd89eb87f8ded463478346f76731f07bf8b',
|
|
198
208
|
},
|
|
199
|
-
type=
|
|
200
|
-
category=
|
|
201
|
-
modalities=[
|
|
202
|
-
eval_splits=[
|
|
203
|
-
eval_langs=[
|
|
204
|
-
main_score=
|
|
209
|
+
type='Classification',
|
|
210
|
+
category='s2s',
|
|
211
|
+
modalities=['text'],
|
|
212
|
+
eval_splits=['test'],
|
|
213
|
+
eval_langs=['cmn-Hans'],
|
|
214
|
+
main_score='accuracy',
|
|
205
215
|
date=None,
|
|
206
216
|
domains=None,
|
|
207
217
|
task_subtypes=None,
|
|
@@ -215,31 +225,34 @@ class JDReview(AbsTaskClassification):
|
|
|
215
225
|
journal={arXiv preprint arXiv:2309.07597},
|
|
216
226
|
year={2023}
|
|
217
227
|
}""",
|
|
218
|
-
descriptive_stats={
|
|
228
|
+
descriptive_stats={
|
|
229
|
+
'n_samples': None,
|
|
230
|
+
'avg_character_length': None
|
|
231
|
+
},
|
|
219
232
|
)
|
|
220
233
|
|
|
221
234
|
@property
|
|
222
235
|
def metadata_dict(self) -> dict[str, str]:
|
|
223
236
|
metadata_dict = super().metadata_dict
|
|
224
|
-
metadata_dict[
|
|
237
|
+
metadata_dict['samples_per_label'] = 32
|
|
225
238
|
return metadata_dict
|
|
226
239
|
|
|
227
240
|
|
|
228
241
|
class OnlineShopping(AbsTaskClassification):
|
|
229
242
|
metadata = TaskMetadata(
|
|
230
|
-
name=
|
|
231
|
-
description=
|
|
232
|
-
reference=
|
|
243
|
+
name='OnlineShopping',
|
|
244
|
+
description='Sentiment Analysis of User Reviews on Online Shopping Websites',
|
|
245
|
+
reference='https://aclanthology.org/2023.nodalida-1.20/',
|
|
233
246
|
dataset={
|
|
234
|
-
|
|
235
|
-
|
|
247
|
+
'path': 'C-MTEB/OnlineShopping-classification',
|
|
248
|
+
'revision': 'e610f2ebd179a8fda30ae534c3878750a96db120',
|
|
236
249
|
},
|
|
237
|
-
type=
|
|
238
|
-
category=
|
|
239
|
-
modalities=[
|
|
240
|
-
eval_splits=[
|
|
241
|
-
eval_langs=[
|
|
242
|
-
main_score=
|
|
250
|
+
type='Classification',
|
|
251
|
+
category='s2s',
|
|
252
|
+
modalities=['text'],
|
|
253
|
+
eval_splits=['test'],
|
|
254
|
+
eval_langs=['cmn-Hans'],
|
|
255
|
+
main_score='accuracy',
|
|
243
256
|
date=None,
|
|
244
257
|
domains=None,
|
|
245
258
|
task_subtypes=None,
|
|
@@ -253,31 +266,34 @@ class OnlineShopping(AbsTaskClassification):
|
|
|
253
266
|
journal={arXiv preprint arXiv:2309.07597},
|
|
254
267
|
year={2023}
|
|
255
268
|
}""",
|
|
256
|
-
descriptive_stats={
|
|
269
|
+
descriptive_stats={
|
|
270
|
+
'n_samples': None,
|
|
271
|
+
'avg_character_length': None
|
|
272
|
+
},
|
|
257
273
|
)
|
|
258
274
|
|
|
259
275
|
@property
|
|
260
276
|
def metadata_dict(self) -> dict[str, str]:
|
|
261
277
|
metadata_dict = super().metadata_dict
|
|
262
|
-
metadata_dict[
|
|
278
|
+
metadata_dict['samples_per_label'] = 32
|
|
263
279
|
return metadata_dict
|
|
264
280
|
|
|
265
281
|
|
|
266
282
|
class Waimai(AbsTaskClassification):
|
|
267
283
|
metadata = TaskMetadata(
|
|
268
|
-
name=
|
|
269
|
-
description=
|
|
270
|
-
reference=
|
|
284
|
+
name='Waimai',
|
|
285
|
+
description='Sentiment Analysis of user reviews on takeaway platforms',
|
|
286
|
+
reference='https://aclanthology.org/2023.nodalida-1.20/',
|
|
271
287
|
dataset={
|
|
272
|
-
|
|
273
|
-
|
|
288
|
+
'path': 'C-MTEB/waimai-classification',
|
|
289
|
+
'revision': '339287def212450dcaa9df8c22bf93e9980c7023',
|
|
274
290
|
},
|
|
275
|
-
type=
|
|
276
|
-
category=
|
|
277
|
-
modalities=[
|
|
278
|
-
eval_splits=[
|
|
279
|
-
eval_langs=[
|
|
280
|
-
main_score=
|
|
291
|
+
type='Classification',
|
|
292
|
+
category='s2s',
|
|
293
|
+
modalities=['text'],
|
|
294
|
+
eval_splits=['test'],
|
|
295
|
+
eval_langs=['cmn-Hans'],
|
|
296
|
+
main_score='accuracy',
|
|
281
297
|
date=None,
|
|
282
298
|
domains=None,
|
|
283
299
|
task_subtypes=None,
|
|
@@ -291,12 +307,15 @@ class Waimai(AbsTaskClassification):
|
|
|
291
307
|
journal={arXiv preprint arXiv:2309.07597},
|
|
292
308
|
year={2023}
|
|
293
309
|
}""",
|
|
294
|
-
descriptive_stats={
|
|
310
|
+
descriptive_stats={
|
|
311
|
+
'n_samples': None,
|
|
312
|
+
'avg_character_length': None
|
|
313
|
+
},
|
|
295
314
|
)
|
|
296
315
|
|
|
297
316
|
@property
|
|
298
317
|
def metadata_dict(self) -> dict[str, str]:
|
|
299
318
|
metadata_dict = super().metadata_dict
|
|
300
|
-
metadata_dict[
|
|
319
|
+
metadata_dict['samples_per_label'] = 32
|
|
301
320
|
|
|
302
321
|
return metadata_dict
|
|
@@ -1,12 +1,7 @@
|
|
|
1
1
|
import itertools
|
|
2
|
-
|
|
3
2
|
from datasets import Dataset, DatasetDict
|
|
4
|
-
|
|
5
3
|
from mteb.abstasks.AbsTaskClustering import AbsTaskClustering
|
|
6
|
-
from mteb.abstasks.AbsTaskClusteringFast import
|
|
7
|
-
AbsTaskClusteringFast,
|
|
8
|
-
check_label_distribution,
|
|
9
|
-
)
|
|
4
|
+
from mteb.abstasks.AbsTaskClusteringFast import AbsTaskClusteringFast, check_label_distribution
|
|
10
5
|
from mteb.abstasks.TaskMetadata import TaskMetadata
|
|
11
6
|
|
|
12
7
|
NUM_SAMPLES = 2048
|
|
@@ -46,7 +41,9 @@ class CLSClusteringFastS2S(AbsTaskClusteringFast):
|
|
|
46
41
|
primaryClass={cs.CL}
|
|
47
42
|
}""", # noqa
|
|
48
43
|
descriptive_stats={
|
|
49
|
-
'n_samples': {
|
|
44
|
+
'n_samples': {
|
|
45
|
+
'test': NUM_SAMPLES
|
|
46
|
+
},
|
|
50
47
|
'avg_character_length': {},
|
|
51
48
|
},
|
|
52
49
|
)
|
|
@@ -55,9 +52,7 @@ class CLSClusteringFastS2S(AbsTaskClusteringFast):
|
|
|
55
52
|
ds = {}
|
|
56
53
|
for split in self.metadata.eval_splits:
|
|
57
54
|
labels = list(itertools.chain.from_iterable(self.dataset[split]['labels']))
|
|
58
|
-
sentences = list(
|
|
59
|
-
itertools.chain.from_iterable(self.dataset[split]['sentences'])
|
|
60
|
-
)
|
|
55
|
+
sentences = list(itertools.chain.from_iterable(self.dataset[split]['sentences']))
|
|
61
56
|
|
|
62
57
|
check_label_distribution(self.dataset[split])
|
|
63
58
|
|
|
@@ -106,7 +101,9 @@ class CLSClusteringFastP2P(AbsTaskClusteringFast):
|
|
|
106
101
|
primaryClass={cs.CL}
|
|
107
102
|
}""", # noqa
|
|
108
103
|
descriptive_stats={
|
|
109
|
-
'n_samples': {
|
|
104
|
+
'n_samples': {
|
|
105
|
+
'test': NUM_SAMPLES
|
|
106
|
+
},
|
|
110
107
|
'avg_character_length': {},
|
|
111
108
|
},
|
|
112
109
|
)
|
|
@@ -115,9 +112,7 @@ class CLSClusteringFastP2P(AbsTaskClusteringFast):
|
|
|
115
112
|
ds = {}
|
|
116
113
|
for split in self.metadata.eval_splits:
|
|
117
114
|
labels = list(itertools.chain.from_iterable(self.dataset[split]['labels']))
|
|
118
|
-
sentences = list(
|
|
119
|
-
itertools.chain.from_iterable(self.dataset[split]['sentences'])
|
|
120
|
-
)
|
|
115
|
+
sentences = list(itertools.chain.from_iterable(self.dataset[split]['sentences']))
|
|
121
116
|
|
|
122
117
|
check_label_distribution(self.dataset[split])
|
|
123
118
|
|
|
@@ -166,7 +161,9 @@ class ThuNewsClusteringFastS2S(AbsTaskClusteringFast):
|
|
|
166
161
|
url = {https://github.com/thunlp/THUCTC}
|
|
167
162
|
}""",
|
|
168
163
|
descriptive_stats={
|
|
169
|
-
'n_samples': {
|
|
164
|
+
'n_samples': {
|
|
165
|
+
'test': NUM_SAMPLES
|
|
166
|
+
},
|
|
170
167
|
'avg_character_length': {},
|
|
171
168
|
},
|
|
172
169
|
)
|
|
@@ -175,9 +172,7 @@ class ThuNewsClusteringFastS2S(AbsTaskClusteringFast):
|
|
|
175
172
|
ds = {}
|
|
176
173
|
for split in self.metadata.eval_splits:
|
|
177
174
|
labels = list(itertools.chain.from_iterable(self.dataset[split]['labels']))
|
|
178
|
-
sentences = list(
|
|
179
|
-
itertools.chain.from_iterable(self.dataset[split]['sentences'])
|
|
180
|
-
)
|
|
175
|
+
sentences = list(itertools.chain.from_iterable(self.dataset[split]['sentences']))
|
|
181
176
|
|
|
182
177
|
check_label_distribution(self.dataset[split])
|
|
183
178
|
|
|
@@ -226,7 +221,9 @@ class ThuNewsClusteringFastP2P(AbsTaskClusteringFast):
|
|
|
226
221
|
url = {https://github.com/thunlp/THUCTC}
|
|
227
222
|
}""",
|
|
228
223
|
descriptive_stats={
|
|
229
|
-
'n_samples': {
|
|
224
|
+
'n_samples': {
|
|
225
|
+
'test': NUM_SAMPLES
|
|
226
|
+
},
|
|
230
227
|
'avg_character_length': {},
|
|
231
228
|
},
|
|
232
229
|
)
|
|
@@ -235,9 +232,7 @@ class ThuNewsClusteringFastP2P(AbsTaskClusteringFast):
|
|
|
235
232
|
ds = {}
|
|
236
233
|
for split in self.metadata.eval_splits:
|
|
237
234
|
labels = list(itertools.chain.from_iterable(self.dataset[split]['labels']))
|
|
238
|
-
sentences = list(
|
|
239
|
-
itertools.chain.from_iterable(self.dataset[split]['sentences'])
|
|
240
|
-
)
|
|
235
|
+
sentences = list(itertools.chain.from_iterable(self.dataset[split]['sentences']))
|
|
241
236
|
|
|
242
237
|
check_label_distribution(self.dataset[split])
|
|
243
238
|
|
|
@@ -1,31 +1,29 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
1
|
from mteb import AbsTaskRetrieval
|
|
3
2
|
from mteb import HFDataLoader as CustomDataLoader
|
|
4
3
|
from mteb.abstasks.TaskMetadata import TaskMetadata
|
|
4
|
+
from typing import Optional
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class CustomRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata: TaskMetadata
|
|
9
9
|
ignore_identical_ids: bool = True
|
|
10
10
|
|
|
11
|
-
def __init__(
|
|
12
|
-
self, dataset_path: Optional[str] = "custom_eval/text/retrieval", **kwargs
|
|
13
|
-
):
|
|
11
|
+
def __init__(self, dataset_path: Optional[str] = 'custom_eval/text/retrieval', **kwargs):
|
|
14
12
|
super().__init__(**kwargs)
|
|
15
13
|
self.metadata = TaskMetadata(
|
|
16
|
-
name=
|
|
17
|
-
description=
|
|
14
|
+
name='CustomRetrieval',
|
|
15
|
+
description='CustomRetrieval Task',
|
|
18
16
|
reference=None,
|
|
19
17
|
dataset={
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
'path': dataset_path,
|
|
19
|
+
'revision': 'v1',
|
|
22
20
|
},
|
|
23
|
-
type=
|
|
24
|
-
category=
|
|
25
|
-
modalities=[
|
|
26
|
-
eval_splits=[
|
|
27
|
-
eval_langs=[
|
|
28
|
-
main_score=
|
|
21
|
+
type='Retrieval',
|
|
22
|
+
category='s2p',
|
|
23
|
+
modalities=['text'],
|
|
24
|
+
eval_splits=['test'],
|
|
25
|
+
eval_langs=['cmn-Hans'],
|
|
26
|
+
main_score='recall_at_5',
|
|
29
27
|
date=None,
|
|
30
28
|
domains=None,
|
|
31
29
|
task_subtypes=None,
|
|
@@ -33,7 +31,7 @@ class CustomRetrieval(AbsTaskRetrieval):
|
|
|
33
31
|
annotations_creators=None,
|
|
34
32
|
dialect=None,
|
|
35
33
|
sample_creation=None,
|
|
36
|
-
bibtex_citation=
|
|
34
|
+
bibtex_citation='',
|
|
37
35
|
descriptive_stats={},
|
|
38
36
|
)
|
|
39
37
|
|
|
@@ -41,17 +39,17 @@ class CustomRetrieval(AbsTaskRetrieval):
|
|
|
41
39
|
if self.data_loaded:
|
|
42
40
|
return
|
|
43
41
|
self.corpus, self.queries, self.relevant_docs = {}, {}, {}
|
|
44
|
-
dataset_path = self.metadata_dict[
|
|
42
|
+
dataset_path = self.metadata_dict['dataset']['path']
|
|
45
43
|
|
|
46
|
-
for split in kwargs.get(
|
|
44
|
+
for split in kwargs.get('eval_splits', self.metadata_dict['eval_splits']):
|
|
47
45
|
corpus, queries, qrels = CustomDataLoader(
|
|
48
46
|
data_folder=dataset_path,
|
|
49
47
|
streaming=False,
|
|
50
48
|
keep_in_memory=False,
|
|
51
49
|
).load(split=split)
|
|
52
50
|
# Conversion from DataSet
|
|
53
|
-
queries = {query[
|
|
54
|
-
corpus = {doc[
|
|
51
|
+
queries = {query['id']: query['text'] for query in queries}
|
|
52
|
+
corpus = {doc['id']: {'text': doc['text']} for doc in corpus}
|
|
55
53
|
self.corpus[split], self.queries[split], self.relevant_docs[split] = (
|
|
56
54
|
corpus,
|
|
57
55
|
queries,
|
|
@@ -4,19 +4,19 @@ from mteb.abstasks.TaskMetadata import TaskMetadata
|
|
|
4
4
|
|
|
5
5
|
class Ocnli(AbsTaskPairClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
|
-
name=
|
|
8
|
-
description=
|
|
9
|
-
reference=
|
|
7
|
+
name='Ocnli',
|
|
8
|
+
description='Original Chinese Natural Language Inference dataset',
|
|
9
|
+
reference='https://arxiv.org/abs/2010.05444',
|
|
10
10
|
dataset={
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
'path': 'C-MTEB/OCNLI',
|
|
12
|
+
'revision': '66e76a618a34d6d565d5538088562851e6daa7ec',
|
|
13
13
|
},
|
|
14
|
-
type=
|
|
15
|
-
category=
|
|
16
|
-
modalities=[
|
|
17
|
-
eval_splits=[
|
|
18
|
-
eval_langs=[
|
|
19
|
-
main_score=
|
|
14
|
+
type='PairClassification',
|
|
15
|
+
category='s2s',
|
|
16
|
+
modalities=['text'],
|
|
17
|
+
eval_splits=['validation'],
|
|
18
|
+
eval_langs=['cmn-Hans'],
|
|
19
|
+
main_score='max_accuracy',
|
|
20
20
|
date=None,
|
|
21
21
|
domains=None,
|
|
22
22
|
task_subtypes=None,
|
|
@@ -25,36 +25,39 @@ class Ocnli(AbsTaskPairClassification):
|
|
|
25
25
|
dialect=None,
|
|
26
26
|
sample_creation=None,
|
|
27
27
|
bibtex_citation="""@misc{hu2020ocnli,
|
|
28
|
-
title={OCNLI: Original Chinese Natural Language Inference},
|
|
28
|
+
title={OCNLI: Original Chinese Natural Language Inference},
|
|
29
29
|
author={Hai Hu and Kyle Richardson and Liang Xu and Lu Li and Sandra Kuebler and Lawrence S. Moss},
|
|
30
30
|
year={2020},
|
|
31
31
|
eprint={2010.05444},
|
|
32
32
|
archivePrefix={arXiv},
|
|
33
33
|
primaryClass={cs.CL}
|
|
34
34
|
}""",
|
|
35
|
-
descriptive_stats={
|
|
35
|
+
descriptive_stats={
|
|
36
|
+
'n_samples': None,
|
|
37
|
+
'avg_character_length': None
|
|
38
|
+
},
|
|
36
39
|
)
|
|
37
40
|
|
|
38
41
|
def dataset_transform(self):
|
|
39
|
-
self.dataset = self.dataset.rename_column(
|
|
40
|
-
self.dataset = self.dataset.rename_column(
|
|
42
|
+
self.dataset = self.dataset.rename_column('sent1', 'sentence1')
|
|
43
|
+
self.dataset = self.dataset.rename_column('sent2', 'sentence2')
|
|
41
44
|
|
|
42
45
|
|
|
43
46
|
class Cmnli(AbsTaskPairClassification):
|
|
44
47
|
metadata = TaskMetadata(
|
|
45
|
-
name=
|
|
46
|
-
description=
|
|
47
|
-
reference=
|
|
48
|
+
name='Cmnli',
|
|
49
|
+
description='Chinese Multi-Genre NLI',
|
|
50
|
+
reference='https://huggingface.co/datasets/clue/viewer/cmnli',
|
|
48
51
|
dataset={
|
|
49
|
-
|
|
50
|
-
|
|
52
|
+
'path': 'C-MTEB/CMNLI',
|
|
53
|
+
'revision': '41bc36f332156f7adc9e38f53777c959b2ae9766',
|
|
51
54
|
},
|
|
52
|
-
type=
|
|
53
|
-
category=
|
|
54
|
-
modalities=[
|
|
55
|
-
eval_splits=[
|
|
56
|
-
eval_langs=[
|
|
57
|
-
main_score=
|
|
55
|
+
type='PairClassification',
|
|
56
|
+
category='s2s',
|
|
57
|
+
modalities=['text'],
|
|
58
|
+
eval_splits=['validation', 'test'],
|
|
59
|
+
eval_langs=['cmn-Hans'],
|
|
60
|
+
main_score='max_accuracy',
|
|
58
61
|
date=None,
|
|
59
62
|
domains=None,
|
|
60
63
|
task_subtypes=None,
|
|
@@ -105,9 +108,12 @@ class Cmnli(AbsTaskPairClassification):
|
|
|
105
108
|
doi = "10.18653/v1/2020.coling-main.419",
|
|
106
109
|
pages = "4762--4772",
|
|
107
110
|
}""",
|
|
108
|
-
descriptive_stats={
|
|
111
|
+
descriptive_stats={
|
|
112
|
+
'n_samples': None,
|
|
113
|
+
'avg_character_length': None
|
|
114
|
+
},
|
|
109
115
|
)
|
|
110
116
|
|
|
111
117
|
def dataset_transform(self):
|
|
112
|
-
self.dataset = self.dataset.rename_column(
|
|
113
|
-
self.dataset = self.dataset.rename_column(
|
|
118
|
+
self.dataset = self.dataset.rename_column('sent1', 'sentence1')
|
|
119
|
+
self.dataset = self.dataset.rename_column('sent2', 'sentence2')
|
|
@@ -33,7 +33,10 @@ class T2Reranking(AbsTaskReranking):
|
|
|
33
33
|
archivePrefix={arXiv},
|
|
34
34
|
primaryClass={cs.IR}
|
|
35
35
|
}""", # noqa
|
|
36
|
-
descriptive_stats={
|
|
36
|
+
descriptive_stats={
|
|
37
|
+
'n_samples': None,
|
|
38
|
+
'avg_character_length': None
|
|
39
|
+
},
|
|
37
40
|
)
|
|
38
41
|
|
|
39
42
|
|
|
@@ -68,7 +71,10 @@ class MMarcoReranking(AbsTaskReranking):
|
|
|
68
71
|
archivePrefix={arXiv},
|
|
69
72
|
primaryClass={cs.CL}
|
|
70
73
|
}""", # noqa
|
|
71
|
-
descriptive_stats={
|
|
74
|
+
descriptive_stats={
|
|
75
|
+
'n_samples': None,
|
|
76
|
+
'avg_character_length': None
|
|
77
|
+
},
|
|
72
78
|
)
|
|
73
79
|
|
|
74
80
|
|
|
@@ -105,8 +111,12 @@ class CMedQAv1(AbsTaskReranking):
|
|
|
105
111
|
publisher={Multidisciplinary Digital Publishing Institute}
|
|
106
112
|
}""",
|
|
107
113
|
descriptive_stats={
|
|
108
|
-
'n_samples': {
|
|
109
|
-
|
|
114
|
+
'n_samples': {
|
|
115
|
+
'test': 2000
|
|
116
|
+
},
|
|
117
|
+
'avg_character_length': {
|
|
118
|
+
'test': 165
|
|
119
|
+
},
|
|
110
120
|
},
|
|
111
121
|
)
|
|
112
122
|
|
|
@@ -146,5 +156,8 @@ keywords={Biomedical imaging;Data mining;Semantics;Medical services;Feature extr
|
|
|
146
156
|
doi={10.1109/ACCESS.2018.2883637},
|
|
147
157
|
ISSN={2169-3536},
|
|
148
158
|
month={},}""", # noqa
|
|
149
|
-
descriptive_stats={
|
|
159
|
+
descriptive_stats={
|
|
160
|
+
'n_samples': None,
|
|
161
|
+
'avg_character_length': None
|
|
162
|
+
},
|
|
150
163
|
)
|