evalscope 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (97) hide show
  1. evalscope/api/benchmark/__init__.py +1 -1
  2. evalscope/api/benchmark/adapters/__init__.py +2 -0
  3. evalscope/api/benchmark/adapters/default_data_adapter.py +1 -0
  4. evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
  5. evalscope/api/benchmark/adapters/text2image_adapter.py +7 -6
  6. evalscope/api/benchmark/adapters/vision_language_adapter.py +6 -0
  7. evalscope/api/benchmark/benchmark.py +35 -0
  8. evalscope/api/benchmark/meta.py +6 -0
  9. evalscope/api/dataset/dataset.py +6 -6
  10. evalscope/api/dataset/loader.py +2 -1
  11. evalscope/api/evaluator/cache.py +24 -1
  12. evalscope/api/evaluator/state.py +12 -1
  13. evalscope/api/messages/__init__.py +1 -0
  14. evalscope/api/messages/chat_message.py +47 -2
  15. evalscope/api/metric/scorer.py +15 -7
  16. evalscope/api/mixin/__init__.py +0 -1
  17. evalscope/api/model/generate_config.py +1 -3
  18. evalscope/api/model/model.py +4 -1
  19. evalscope/app/app.py +3 -0
  20. evalscope/app/ui/single_model.py +3 -3
  21. evalscope/app/utils/data_utils.py +7 -7
  22. evalscope/app/utils/env_utils.py +12 -0
  23. evalscope/app/utils/text_utils.py +14 -12
  24. evalscope/arguments.py +2 -4
  25. evalscope/backend/opencompass/backend_manager.py +0 -2
  26. evalscope/backend/rag_eval/utils/embedding.py +9 -1
  27. evalscope/benchmarks/bfcl/bfcl_adapter.py +2 -6
  28. evalscope/benchmarks/bfcl/generation.py +2 -2
  29. evalscope/benchmarks/ceval/ceval_adapter.py +1 -2
  30. evalscope/benchmarks/data_collection/data_collection_adapter.py +23 -19
  31. evalscope/benchmarks/frames/frames_adapter.py +2 -1
  32. evalscope/benchmarks/general_arena/general_arena_adapter.py +5 -1
  33. evalscope/benchmarks/ifeval/instructions_util.py +2 -3
  34. evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
  35. evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
  36. evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
  37. evalscope/benchmarks/math_vista/math_vista_adapter.py +129 -0
  38. evalscope/benchmarks/mmmu/__init__.py +0 -0
  39. evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
  40. evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
  41. evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +129 -0
  42. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +5 -1
  43. evalscope/benchmarks/tau_bench/generation.py +1 -1
  44. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +15 -19
  45. evalscope/benchmarks/text2image/__init__.py +0 -0
  46. evalscope/benchmarks/{aigc/t2i → text2image}/evalmuse_adapter.py +3 -1
  47. evalscope/benchmarks/{aigc/t2i → text2image}/genai_bench_adapter.py +2 -2
  48. evalscope/benchmarks/{aigc/t2i → text2image}/general_t2i_adapter.py +1 -1
  49. evalscope/benchmarks/{aigc/t2i → text2image}/hpdv2_adapter.py +7 -2
  50. evalscope/benchmarks/{aigc/t2i → text2image}/tifa_adapter.py +1 -0
  51. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +1 -2
  52. evalscope/cli/start_app.py +7 -1
  53. evalscope/cli/start_perf.py +7 -1
  54. evalscope/config.py +72 -13
  55. evalscope/constants.py +8 -0
  56. evalscope/evaluator/evaluator.py +6 -4
  57. evalscope/metrics/llm_judge.py +19 -7
  58. evalscope/models/image_edit_model.py +125 -0
  59. evalscope/models/model_apis.py +20 -0
  60. evalscope/models/openai_compatible.py +3 -0
  61. evalscope/models/text2image_model.py +2 -2
  62. evalscope/models/utils/openai.py +7 -4
  63. evalscope/perf/benchmark.py +2 -0
  64. evalscope/perf/utils/benchmark_util.py +8 -5
  65. evalscope/perf/utils/local_server.py +3 -0
  66. evalscope/report/__init__.py +0 -1
  67. evalscope/report/generator.py +8 -87
  68. evalscope/run.py +9 -5
  69. evalscope/third_party/toolbench_static/llm/swift_infer.py +0 -4
  70. evalscope/utils/chat_service.py +1 -1
  71. evalscope/utils/import_utils.py +23 -1
  72. evalscope/utils/io_utils.py +42 -1
  73. evalscope/utils/model_utils.py +4 -3
  74. evalscope/utils/multi_choices.py +23 -6
  75. evalscope/version.py +2 -2
  76. {evalscope-1.0.0.dist-info → evalscope-1.0.1.dist-info}/METADATA +12 -15
  77. {evalscope-1.0.0.dist-info → evalscope-1.0.1.dist-info}/RECORD +94 -80
  78. tests/benchmark/test_eval.py +30 -31
  79. tests/benchmark/test_image_edit.py +65 -0
  80. tests/benchmark/test_vlm.py +80 -0
  81. tests/cli/test_all.py +83 -43
  82. tests/cli/test_collection.py +8 -5
  83. tests/cli/test_reasoning.py +81 -0
  84. tests/common.py +73 -0
  85. tests/perf/test_perf.py +4 -2
  86. tests/rag/test_clip_benchmark.py +0 -3
  87. evalscope/api/mixin/dataset_mixin.py +0 -105
  88. evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +0 -44
  89. tests/aigc/__init__.py +0 -1
  90. /evalscope/benchmarks/{aigc → image_edit}/__init__.py +0 -0
  91. /evalscope/benchmarks/{aigc/i2i → image_edit/gedit}/__init__.py +0 -0
  92. /evalscope/benchmarks/{aigc/t2i → math_vista}/__init__.py +0 -0
  93. {evalscope-1.0.0.dist-info → evalscope-1.0.1.dist-info}/LICENSE +0 -0
  94. {evalscope-1.0.0.dist-info → evalscope-1.0.1.dist-info}/WHEEL +0 -0
  95. {evalscope-1.0.0.dist-info → evalscope-1.0.1.dist-info}/entry_points.txt +0 -0
  96. {evalscope-1.0.0.dist-info → evalscope-1.0.1.dist-info}/top_level.txt +0 -0
  97. /tests/{aigc → benchmark}/test_t2i.py +0 -0
@@ -47,7 +47,6 @@ class OpenCompassBackendManager(BackendManager):
47
47
  datasets: list, the datasets.
48
48
  models: list, the models.
49
49
  work_dir (Optional): str, the working directory. Default to None, which means the current directory.
50
- dry_run (Optional): bool, the dry-run flag. Default to False.
51
50
  debug (Optional): bool, the debug flag. Default to False.
52
51
  reuse (Optional): str, reuse previous outputs & results. Default to None.
53
52
  generation_kwargs (Optional): dict, the generation config. Default to {}.
@@ -140,7 +139,6 @@ class OpenCompassBackendManager(BackendManager):
140
139
  cmd_str = f'python -m run_oc ' \
141
140
  f'--models {" ".join(self.args.models)} ' \
142
141
  f'--datasets {" ".join(self.args.datasets)} ' \
143
- f'{self.get_restore_arg("dry-run", self.args.dry_run)} ' \
144
142
  f'{self.get_arg_with_default("work-dir", self.args.work_dir)}'
145
143
 
146
144
  elif cmd_mode == CmdMode.SCRIPT:
@@ -164,6 +164,13 @@ class CrossEncoderModel(BaseModel):
164
164
  max_length=self.max_seq_length,
165
165
  automodel_args=self.model_kwargs,
166
166
  )
167
+ self.tokenizer = self.model.tokenizer
168
+ # set pad token
169
+ if self.tokenizer.pad_token is None:
170
+ self.tokenizer.pad_token = self.tokenizer.eos_token
171
+ if ('pad_token_id' not in self.model.config) or (self.model.config.pad_token_id is None):
172
+ self.model.config.update({'pad_token_id': self.tokenizer.eos_token_id})
173
+
167
174
  self.supported_encode_params = get_supported_params(self.model.predict)
168
175
 
169
176
  def predict(self, sentences: List[List[str]], **kwargs) -> Tensor:
@@ -189,6 +196,7 @@ class APIEmbeddingModel(BaseModel):
189
196
  self.openai_api_base = kwargs.get('api_base')
190
197
  self.openai_api_key = kwargs.get('api_key')
191
198
  self.dimensions = kwargs.get('dimensions')
199
+ self.check_embedding_ctx_length = kwargs.get('check_embedding_ctx_length', False)
192
200
  self.framework = ['API']
193
201
 
194
202
  self.model = OpenAIEmbeddings(
@@ -196,7 +204,7 @@ class APIEmbeddingModel(BaseModel):
196
204
  openai_api_base=self.openai_api_base,
197
205
  openai_api_key=self.openai_api_key,
198
206
  dimensions=self.dimensions,
199
- check_embedding_ctx_length=False
207
+ check_embedding_ctx_length=self.check_embedding_ctx_length,
200
208
  )
201
209
 
202
210
  super().__init__(model_name_or_path=self.model_name, **kwargs)
@@ -1,4 +1,3 @@
1
- import importlib
2
1
  import json
3
2
  import re
4
3
  import traceback
@@ -12,6 +11,7 @@ from evalscope.api.metric import Score
12
11
  from evalscope.api.model import Model, ModelOutput
13
12
  from evalscope.api.registry import register_benchmark
14
13
  from evalscope.constants import Tags
14
+ from evalscope.utils.import_utils import check_import
15
15
  from evalscope.utils.logger import get_logger
16
16
 
17
17
  logger = get_logger()
@@ -67,11 +67,7 @@ class BFCLAdapter(DefaultDataAdapter):
67
67
  def __init__(self, **kwargs):
68
68
  super().__init__(**kwargs)
69
69
 
70
- spec = importlib.util.find_spec('bfcl_eval')
71
- if spec is None:
72
- raise ImportError(
73
- '`bfcl_eval` not found, please install it with `pip install bfcl-eval==2025.6.16` before evaluating.'
74
- )
70
+ check_import('bfcl_eval', package='bfcl-eval==2025.6.16', raise_error=True)
75
71
 
76
72
  self.category_map = SUBJECT_MAPPING
77
73
  self.reformat_subset = True
@@ -78,7 +78,7 @@ def generate_turn(model: Model, row: dict[str, Any]):
78
78
  if isinstance(message, str):
79
79
  result = message
80
80
  else:
81
- result = message.content
81
+ result = message.text
82
82
 
83
83
  logger.debug(f'Turn:{turn_idx} Step:{n_steps} Result: {result}')
84
84
  current_responses.append(result)
@@ -186,7 +186,7 @@ def generate_turn_with_tools(model: Model, row: dict[str, Any]):
186
186
  logger.error(f'Error converting tool calls to function call strings: {e}')
187
187
  tool_call_strs = None
188
188
  else:
189
- model_responses = [message.content]
189
+ model_responses = [message.text]
190
190
  tool_call_strs = None
191
191
 
192
192
  current_responses.extend(model_responses)
@@ -1,10 +1,9 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
- from functools import partial
4
3
  from typing import Any, Dict
5
4
 
6
5
  from evalscope.api.benchmark import BenchmarkMeta, MultiChoiceAdapter
7
- from evalscope.api.dataset import Dataset, RemoteDataLoader, Sample
6
+ from evalscope.api.dataset import Sample
8
7
  from evalscope.api.registry import register_benchmark
9
8
  from evalscope.constants import Tags
10
9
  from evalscope.utils.logger import get_logger
@@ -6,9 +6,7 @@ from typing import Any, Dict, List
6
6
  from evalscope.api.benchmark import BenchmarkMeta, DataAdapter, DefaultDataAdapter
7
7
  from evalscope.api.dataset import DatasetDict, LocalDataLoader, Sample
8
8
  from evalscope.api.evaluator import TaskState
9
- from evalscope.api.metric import Score
10
9
  from evalscope.api.metric.scorer import AggScore, SampleScore
11
- from evalscope.api.model.model import Model
12
10
  from evalscope.api.registry import get_benchmark, register_benchmark
13
11
  from evalscope.config import TaskConfig
14
12
  from evalscope.constants import DataCollection, Tags
@@ -23,7 +21,11 @@ logger = get_logger()
23
21
  BenchmarkMeta(
24
22
  name=DataCollection.NAME,
25
23
  dataset_id='', # dataset_id need to be set
26
- description='Data collection',
24
+ description='Custom Data collection, mixing multiple evaluation datasets for '
25
+ 'a unified evaluation, aiming to use less data to achieve a more comprehensive '
26
+ 'assessment of the model\'s capabilities. '
27
+ '[Usage Reference](https://evalscope.readthedocs.io/zh-cn/latest/advanced_guides/collection/index.html)',
28
+ tags=[Tags.CUSTOM],
27
29
  metric_list=['acc'],
28
30
  eval_split='test',
29
31
  prompt_template='',
@@ -55,9 +57,10 @@ class DataCollectionAdapter(DefaultDataAdapter):
55
57
  data_id_or_path=dataset_path,
56
58
  split=self.eval_split,
57
59
  sample_fields=self.record_to_sample,
58
- subset=self.default_subset,
60
+ subset='test', # NOTE: using hardcoded test subset
59
61
  limit=self.limit,
60
- repeats=self.repeats
62
+ repeats=self.repeats,
63
+ shuffle=self.shuffle,
61
64
  ).load()
62
65
 
63
66
  test_dataset = DatasetDict({self.default_subset: dataset})
@@ -95,7 +98,6 @@ class DataCollectionAdapter(DefaultDataAdapter):
95
98
 
96
99
  # load dataset args
97
100
  dataset_args = copy.deepcopy(self._task_config.dataset_args)
98
- common_args = dataset_args.get(DataCollection.NAME, {})
99
101
 
100
102
  # Iterate through each sample in the dataset
101
103
  dataset = self.test_dataset[self.default_subset]
@@ -108,7 +110,6 @@ class DataCollectionAdapter(DefaultDataAdapter):
108
110
 
109
111
  # update dataset args
110
112
  cur_dataset_args = dataset_args.get(dataset_name, {})
111
- cur_dataset_args.update(common_args)
112
113
 
113
114
  # Initialize dataset adapter
114
115
  if dataset_name not in self.dataset_adapters:
@@ -141,19 +142,22 @@ class DataCollectionAdapter(DefaultDataAdapter):
141
142
  data = []
142
143
  for sample_score in sample_scores:
143
144
  collection_info = sample_score.sample_metadata[DataCollection.INFO]
144
- for metric_name, value in sample_score.score.value.items():
145
- data.append(
146
- dict(
147
- task_type=collection_info['task_type'],
148
- categories=tuple(collection_info['categories']),
149
- dataset_name=collection_info['dataset_name'],
150
- subset_name=collection_info['subset_name'],
151
- tags=collection_info['tags'],
152
- sample_id=sample_score.sample_id,
153
- metric=metric_name,
154
- score=value
155
- )
145
+ main_score = sample_score.score.main_value
146
+ main_metric = sample_score.score.main_score_name
147
+
148
+ # use main score
149
+ data.append(
150
+ dict(
151
+ task_type=collection_info['task_type'],
152
+ categories=tuple(collection_info['categories']),
153
+ dataset_name=collection_info['dataset_name'],
154
+ subset_name=collection_info['subset_name'],
155
+ tags=collection_info['tags'],
156
+ sample_id=sample_score.sample_id,
157
+ metric=main_metric,
158
+ score=main_score
156
159
  )
160
+ )
157
161
 
158
162
  df = pd.DataFrame(data)
159
163
 
@@ -61,7 +61,8 @@ class FramesAdapter(DefaultDataAdapter):
61
61
  sample_fields=self.record_to_sample,
62
62
  subset='test',
63
63
  limit=self.limit,
64
- repeats=self.repeats
64
+ repeats=self.repeats,
65
+ shuffle=self.shuffle,
65
66
  ).load()
66
67
 
67
68
  test_dataset = DatasetDict({'test': dataset})
@@ -75,7 +75,11 @@ class GeneralArenaAdapter(DefaultDataAdapter):
75
75
  dataset_dict = {}
76
76
  for subset_name, samples in datasets.items():
77
77
  dataset = DictDataLoader(
78
- dict_list=samples, limit=self.limit, repeats=self.repeats, sample_fields=self.record_to_sample
78
+ dict_list=samples,
79
+ limit=self.limit,
80
+ shuffle=self.shuffle,
81
+ repeats=self.repeats,
82
+ sample_fields=self.record_to_sample
79
83
  ).load()
80
84
  dataset_dict[subset_name] = dataset
81
85
 
@@ -14,7 +14,6 @@
14
14
  """Utility library of instructions."""
15
15
 
16
16
  import functools
17
- import immutabledict
18
17
  import nltk
19
18
  import os
20
19
  import random
@@ -1551,7 +1550,7 @@ WORD_LIST = [
1551
1550
  ] # pylint: disable=line-too-long
1552
1551
 
1553
1552
  # ISO 639-1 codes to language names.
1554
- LANGUAGE_CODES = immutabledict.immutabledict({
1553
+ LANGUAGE_CODES = {
1555
1554
  'en': 'English',
1556
1555
  'es': 'Spanish',
1557
1556
  'pt': 'Portuguese',
@@ -1582,7 +1581,7 @@ LANGUAGE_CODES = immutabledict.immutabledict({
1582
1581
  'pa': 'Punjabi',
1583
1582
  'ml': 'Malayalam',
1584
1583
  'fi': 'Finnish',
1585
- })
1584
+ }
1586
1585
 
1587
1586
  _ALPHABETS = '([A-Za-z])'
1588
1587
  _PREFIXES = '(Mr|St|Mrs|Ms|Dr)[.]'
@@ -0,0 +1,138 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import copy
3
+ import os
4
+ from typing import Any, Dict, List
5
+
6
+ from evalscope.api.benchmark import BenchmarkMeta, ImageEditAdapter
7
+ from evalscope.api.dataset import Sample
8
+ from evalscope.api.evaluator.state import TaskState
9
+ from evalscope.api.messages import ChatMessage, ChatMessageUser, Content, ContentImage, ContentText
10
+ from evalscope.api.metric.scorer import Score
11
+ from evalscope.api.registry import register_benchmark
12
+ from evalscope.constants import FileConstants, Tags
13
+ from evalscope.utils.io_utils import bytes_to_base64
14
+ from evalscope.utils.logger import get_logger
15
+
16
+ logger = get_logger()
17
+
18
+ SUBSET_LIST = [
19
+ 'background_change', 'color_alter', 'material_alter', 'motion_change', 'ps_human', 'style_change', 'subject-add',
20
+ 'subject-remove', 'subject-replace', 'text_change', 'tone_transfer'
21
+ ]
22
+
23
+ LANGUAGE_LIST = ['en', 'cn']
24
+
25
+
26
+ @register_benchmark(
27
+ BenchmarkMeta(
28
+ name='gedit',
29
+ pretty_name='GEdit-Bench',
30
+ dataset_id='stepfun-ai/GEdit-Bench',
31
+ description='GEdit-Bench Image Editing Benchmark, grounded in real-world '
32
+ 'usages is developed to support more authentic and '
33
+ 'comprehensive evaluation of image editing models.',
34
+ tags=[Tags.IMAGE_EDITING],
35
+ subset_list=SUBSET_LIST,
36
+ metric_list=['Semantic Consistency', 'Perceptual Similarity'],
37
+ few_shot_num=0,
38
+ train_split=None,
39
+ eval_split='train',
40
+ extra_params={'language': f'# language of the instruction, choose from {LANGUAGE_LIST}, default to `en`'}
41
+ )
42
+ )
43
+ class GEditAdapter(ImageEditAdapter):
44
+
45
+ def __init__(self, **kwargs):
46
+ super().__init__(**kwargs)
47
+
48
+ self.language = self.extra_params.get('language', 'en')
49
+ if self.language not in LANGUAGE_LIST:
50
+ logger.warning(f"Invalid language '{self.language}', fallback to 'en'")
51
+ self.language = 'en'
52
+ self.reformat_subset = True
53
+ self._use_llm_judge = True
54
+
55
+ self.load_prompt()
56
+
57
+ def load_prompt(self):
58
+ from . import vie_prompts
59
+
60
+ self.context = vie_prompts._context_no_delimit
61
+ self.SC_prompt = '\n'.join([
62
+ self.context, vie_prompts._prompts_0shot_two_image_edit_rule, vie_prompts._prompts_0shot_tie_rule_SC
63
+ ])
64
+ self.PQ_prompt = '\n'.join([self.context, vie_prompts._prompts_0shot_rule_PQ])
65
+
66
+ def record_to_sample(self, record: Dict[str, Any]) -> Sample:
67
+ record = copy.deepcopy(record)
68
+
69
+ # Process instruction and image
70
+ instruction = record['instruction']
71
+ image_bytes = record['input_image']['bytes']
72
+ input_image = bytes_to_base64(image_bytes, format='png', add_header=True)
73
+ record['input_image'] = input_image
74
+ record[FileConstants.ID] = record['key']
75
+ del record['input_image_raw']
76
+
77
+ text_content = ContentText(text=instruction)
78
+ image_content = ContentImage(image=input_image)
79
+
80
+ messages: List[ChatMessage] = [
81
+ ChatMessageUser(content=[text_content, image_content]),
82
+ ]
83
+
84
+ return Sample(input=messages, subset_key=record['task_type'], metadata=record)
85
+
86
+ def sample_filter(self, sample: Sample) -> bool:
87
+ language = sample.metadata.get('instruction_language', 'en')
88
+ return super().sample_filter(sample) and language == self.language
89
+
90
+ def llm_match_score(self, original_prediction, filtered_prediction, reference, task_state: TaskState) -> Score:
91
+ import math
92
+
93
+ from .utils import mllm_output_to_dict
94
+
95
+ metadata = task_state.metadata
96
+ text_prompt = metadata['instruction']
97
+ input_image = metadata['input_image'] # base64 image
98
+ edited_image = metadata[FileConstants.IMAGE_PATH] # local image path
99
+ _SC_prompt = self.SC_prompt.replace('<instruction>', text_prompt)
100
+
101
+ # Initialize the score object with prediction details
102
+ score = Score(
103
+ extracted_prediction=edited_image,
104
+ prediction=edited_image,
105
+ )
106
+
107
+ # Build prompts
108
+ SC_prompt_final = [
109
+ ChatMessageUser(
110
+ content=[
111
+ ContentImage(image=input_image),
112
+ ContentImage(image=edited_image),
113
+ ContentText(text=_SC_prompt)
114
+ ]
115
+ )
116
+ ]
117
+ PQ_prompt_final = [
118
+ ChatMessageUser(content=[ContentImage(image=edited_image),
119
+ ContentText(text=self.PQ_prompt)])
120
+ ]
121
+
122
+ guess_if_cannot_parse = True
123
+ result_SC = self.llm_judge.judge(messages=SC_prompt_final)
124
+ result_PQ = self.llm_judge.judge(messages=PQ_prompt_final)
125
+ SC_dict = mllm_output_to_dict(result_SC, give_up_parsing=guess_if_cannot_parse)
126
+ PQ_dict = mllm_output_to_dict(result_PQ, give_up_parsing=guess_if_cannot_parse)
127
+
128
+ SC_score = min(SC_dict['score'])
129
+ PQ_score = min(PQ_dict['score'])
130
+ O_score = math.sqrt(SC_score * PQ_score)
131
+
132
+ score.value = {'Semantic Consistency': SC_score, 'Perceptual Quality': PQ_score, 'Overall': O_score}
133
+ score.main_score_name = 'Overall'
134
+ score.metadata = {
135
+ 'SC_dict': SC_dict,
136
+ 'PQ_dict': PQ_dict,
137
+ }
138
+ return score