evalscope 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (176) hide show
  1. evalscope/api/benchmark/__init__.py +8 -1
  2. evalscope/api/benchmark/adapters/__init__.py +1 -0
  3. evalscope/api/benchmark/adapters/default_data_adapter.py +12 -0
  4. evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
  5. evalscope/api/benchmark/benchmark.py +14 -0
  6. evalscope/api/dataset/dataset.py +21 -0
  7. evalscope/api/dataset/loader.py +6 -2
  8. evalscope/api/mixin/sandbox_mixin.py +32 -54
  9. evalscope/api/model/generate_config.py +6 -0
  10. evalscope/app/ui/multi_model.py +6 -1
  11. evalscope/app/ui/single_model.py +8 -2
  12. evalscope/app/utils/data_utils.py +3 -2
  13. evalscope/app/utils/visualization.py +2 -2
  14. evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
  15. evalscope/benchmarks/ai2d/ai2d_adapter.py +3 -2
  16. evalscope/benchmarks/bfcl/bfcl_adapter.py +11 -46
  17. evalscope/benchmarks/blink/__init__.py +0 -0
  18. evalscope/benchmarks/blink/blink_adapter.py +61 -0
  19. evalscope/benchmarks/chartqa/__init__.py +0 -0
  20. evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
  21. evalscope/benchmarks/chartqa/utils.py +38 -0
  22. evalscope/benchmarks/data_collection/data_collection_adapter.py +2 -1
  23. evalscope/benchmarks/docvqa/__init__.py +0 -0
  24. evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
  25. evalscope/benchmarks/general_arena/general_arena_adapter.py +1 -1
  26. evalscope/benchmarks/general_arena/utils.py +2 -1
  27. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
  28. evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
  29. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +23 -4
  30. evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
  31. evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +158 -0
  32. evalscope/benchmarks/hle/hle_adapter.py +3 -2
  33. evalscope/benchmarks/humaneval/humaneval_adapter.py +2 -1
  34. evalscope/benchmarks/infovqa/__init__.py +0 -0
  35. evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
  36. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +3 -1
  37. evalscope/benchmarks/math_verse/__init__.py +0 -0
  38. evalscope/benchmarks/math_verse/math_verse_adapter.py +100 -0
  39. evalscope/benchmarks/math_vision/__init__.py +0 -0
  40. evalscope/benchmarks/math_vision/math_vision_adapter.py +111 -0
  41. evalscope/benchmarks/math_vista/math_vista_adapter.py +6 -26
  42. evalscope/benchmarks/mm_bench/mm_bench_adapter.py +2 -2
  43. evalscope/benchmarks/mmmu/mmmu_adapter.py +1 -1
  44. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +1 -1
  45. evalscope/benchmarks/ner/__init__.py +0 -0
  46. evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
  47. evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
  48. evalscope/benchmarks/ner/copious_adapter.py +85 -0
  49. evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
  50. evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
  51. evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
  52. evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
  53. evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
  54. evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
  55. evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
  56. evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
  57. evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
  58. evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
  59. evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
  60. evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
  61. evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
  62. evalscope/benchmarks/ocr_bench/__init__.py +0 -0
  63. evalscope/benchmarks/ocr_bench/ocr_bench_adapter.py +101 -0
  64. evalscope/benchmarks/ocr_bench_v2/IoUscore_metric.py +87 -0
  65. evalscope/benchmarks/ocr_bench_v2/TEDS_metric.py +963 -0
  66. evalscope/benchmarks/ocr_bench_v2/__init__.py +0 -0
  67. evalscope/benchmarks/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
  68. evalscope/benchmarks/ocr_bench_v2/page_ocr_metric.py +50 -0
  69. evalscope/benchmarks/ocr_bench_v2/parallel.py +46 -0
  70. evalscope/benchmarks/ocr_bench_v2/spotting_eval/__init__.py +0 -0
  71. evalscope/benchmarks/ocr_bench_v2/spotting_eval/readme.txt +26 -0
  72. evalscope/benchmarks/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
  73. evalscope/benchmarks/ocr_bench_v2/spotting_eval/script.py +481 -0
  74. evalscope/benchmarks/ocr_bench_v2/spotting_metric.py +179 -0
  75. evalscope/benchmarks/ocr_bench_v2/utils.py +433 -0
  76. evalscope/benchmarks/ocr_bench_v2/vqa_metric.py +254 -0
  77. evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
  78. evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
  79. evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
  80. evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
  81. evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
  82. evalscope/benchmarks/poly_math/__init__.py +0 -0
  83. evalscope/benchmarks/poly_math/poly_math_adapter.py +127 -0
  84. evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
  85. evalscope/benchmarks/pope/__init__.py +0 -0
  86. evalscope/benchmarks/pope/pope_adapter.py +111 -0
  87. evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
  88. evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
  89. evalscope/benchmarks/simple_vqa/__init__.py +0 -0
  90. evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
  91. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +1 -1
  92. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +1 -1
  93. evalscope/benchmarks/visu_logic/__init__.py +0 -0
  94. evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
  95. evalscope/benchmarks/zerobench/__init__.py +0 -0
  96. evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
  97. evalscope/constants.py +4 -0
  98. evalscope/evaluator/evaluator.py +72 -79
  99. evalscope/metrics/math_parser.py +14 -0
  100. evalscope/metrics/metric.py +52 -1
  101. evalscope/metrics/metrics.py +16 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +0 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +0 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +0 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +0 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +0 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +0 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +0 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +0 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +0 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +2 -6
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +2 -6
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +2 -6
  115. evalscope/models/utils/openai.py +4 -0
  116. evalscope/perf/arguments.py +24 -4
  117. evalscope/perf/benchmark.py +74 -89
  118. evalscope/perf/http_client.py +31 -16
  119. evalscope/perf/main.py +15 -2
  120. evalscope/perf/plugin/api/base.py +9 -7
  121. evalscope/perf/plugin/api/custom_api.py +13 -58
  122. evalscope/perf/plugin/api/default_api.py +179 -79
  123. evalscope/perf/plugin/api/openai_api.py +4 -3
  124. evalscope/perf/plugin/datasets/base.py +21 -0
  125. evalscope/perf/plugin/datasets/custom.py +2 -3
  126. evalscope/perf/plugin/datasets/line_by_line.py +2 -3
  127. evalscope/perf/plugin/datasets/longalpaca.py +2 -3
  128. evalscope/perf/plugin/datasets/openqa.py +2 -4
  129. evalscope/perf/plugin/datasets/random_dataset.py +1 -3
  130. evalscope/perf/utils/benchmark_util.py +36 -22
  131. evalscope/perf/utils/db_util.py +14 -19
  132. evalscope/perf/utils/local_server.py +0 -44
  133. evalscope/perf/utils/log_utils.py +21 -6
  134. evalscope/report/__init__.py +11 -2
  135. evalscope/report/combinator.py +52 -2
  136. evalscope/run.py +4 -0
  137. evalscope/utils/function_utils.py +195 -12
  138. evalscope/utils/io_utils.py +74 -0
  139. evalscope/utils/json_schema.py +8 -6
  140. evalscope/utils/logger.py +49 -17
  141. evalscope/utils/multi_choices.py +16 -1
  142. evalscope/utils/ner.py +377 -0
  143. evalscope/version.py +2 -2
  144. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/METADATA +239 -393
  145. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/RECORD +140 -98
  146. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/WHEEL +1 -1
  147. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/top_level.txt +0 -1
  148. tests/__init__.py +0 -1
  149. tests/benchmark/__init__.py +0 -1
  150. tests/benchmark/test_eval.py +0 -429
  151. tests/benchmark/test_image_edit.py +0 -65
  152. tests/benchmark/test_sandbox.py +0 -81
  153. tests/benchmark/test_t2i.py +0 -142
  154. tests/benchmark/test_vlm.py +0 -137
  155. tests/cli/__init__.py +0 -1
  156. tests/cli/test_all.py +0 -269
  157. tests/cli/test_collection.py +0 -99
  158. tests/cli/test_custom.py +0 -268
  159. tests/cli/test_reasoning.py +0 -81
  160. tests/common.py +0 -73
  161. tests/perf/__init__.py +0 -1
  162. tests/perf/test_perf.py +0 -206
  163. tests/rag/test_clip_benchmark.py +0 -87
  164. tests/rag/test_mteb.py +0 -213
  165. tests/rag/test_ragas.py +0 -128
  166. tests/swift/__init__.py +0 -1
  167. tests/swift/test_run_swift_eval.py +0 -146
  168. tests/swift/test_run_swift_vlm_eval.py +0 -128
  169. tests/swift/test_run_swift_vlm_jugde_eval.py +0 -157
  170. tests/test_run_all.py +0 -12
  171. tests/utils.py +0 -13
  172. tests/vlm/__init__.py +0 -1
  173. tests/vlm/test_vlmeval.py +0 -102
  174. {tests/rag → evalscope/benchmarks/aa_lcr}/__init__.py +0 -0
  175. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info}/entry_points.txt +0 -0
  176. {evalscope-1.0.2.dist-info → evalscope-1.1.1.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,349 @@
1
+ # flake8: noqa
2
+ import numpy as np
3
+ import os
4
+ import sys
5
+ from collections import defaultdict
6
+ from typing import Dict, List
7
+
8
+ from evalscope.utils import get_logger
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ class End2EndEvaluator():
14
+
15
+ def __init__(
16
+ self,
17
+ prediction: List,
18
+ reference: List,
19
+ metrics: Dict,
20
+ match_method: str = 'quick_match',
21
+ filter_types: dict = None
22
+ ):
23
+
24
+ self.match_method = match_method
25
+ self.references = reference
26
+ self.predictions = prediction
27
+ self.dafault_metircs_dict = metrics
28
+
29
+ filtered_gt_samples = []
30
+ if filter_types:
31
+ for gt_sample in self.references:
32
+ select_flag = True
33
+ for k, v in filter_types.items():
34
+ if gt_sample['page_info']['page_attribute'][k] != v:
35
+ select_flag = False
36
+ if select_flag:
37
+ filtered_gt_samples.append(gt_sample)
38
+ else:
39
+ filtered_gt_samples = self.references #[{},{},{}]
40
+ self.references = filtered_gt_samples
41
+
42
+ def score(self) -> dict:
43
+ samples = self.get_matched_elements(self.references, self.predictions)
44
+ metrics = self.process_generated_metric_results(samples)
45
+ return metrics
46
+
47
+ def get_page_elements(self, selected_annos):
48
+ saved_element_dict = defaultdict(list)
49
+ related_truncated = []
50
+ truncated_all = {}
51
+ for relation in selected_annos['extra']['relation']: # Handle truncated text issues
52
+ if relation['relation_type'] == 'truncated':
53
+ truncated_all[relation['source_anno_id']] = ''
54
+ truncated_all[relation['target_anno_id']] = ''
55
+ exist_flag = False
56
+ for merge_list in related_truncated:
57
+ if relation['source_anno_id'] in merge_list or relation[
58
+ 'target_anno_id'] in merge_list: # Consider cases where three text blocks may need to be merged
59
+ merge_list.append(relation['source_anno_id'])
60
+ merge_list.append(relation['target_anno_id'])
61
+ exist_flag = True
62
+ if not exist_flag:
63
+ related_truncated.append([relation['source_anno_id'], relation['target_anno_id']])
64
+
65
+ for item in selected_annos['layout_dets']:
66
+ if item['anno_id'] not in truncated_all.keys():
67
+ saved_element_dict[item['category_type']].append(item)
68
+ else:
69
+ truncated_all[item['anno_id']] = item
70
+
71
+ for merge_list in related_truncated:
72
+ text_block_list = [truncated_all[key] for key in merge_list]
73
+ sorted_block = sorted(text_block_list, key=lambda x: x['order'])
74
+ text = ''
75
+ for block in sorted_block:
76
+ text += block['text']
77
+ merged_block = {
78
+ 'category_type': sorted_block[0]['category_type'], # Directly use information from the first block
79
+ 'order': sorted_block[0]['order'],
80
+ 'anno_id': sorted_block[0]['anno_id'],
81
+ 'text': text,
82
+ 'merge_list': sorted_block
83
+ }
84
+ saved_element_dict[sorted_block[0]['category_type']].append(merged_block)
85
+
86
+ return saved_element_dict
87
+
88
+ def get_page_elements_list(self, gt_page_elements, category_list):
89
+ element_list = []
90
+ for category_type in category_list:
91
+ if gt_page_elements.get(category_type):
92
+ element_list.extend(gt_page_elements[category_type])
93
+ return element_list
94
+
95
+ def get_sorted_text_list(self, selected_annos):
96
+ # txt_type: text, latex, html
97
+ text_list = []
98
+ for item in selected_annos:
99
+ if item.get('order'):
100
+ order = item['order']
101
+ else:
102
+ order = 0
103
+ # 【txt_type,selecte_annos]
104
+ text_list.append((order, item))
105
+ sorted_text_list = sorted(text_list, key=lambda x: x[0])
106
+ return [_[1] for _ in sorted_text_list]
107
+
108
+ def filtered_out_ignore(self, items, ignore_category_list):
109
+ filted_items = []
110
+ for item in items:
111
+ if item['gt_category_type'] not in ignore_category_list:
112
+ filted_items.append(item)
113
+ return filted_items
114
+
115
+ def get_order_paired(self, order_match_s, img_name):
116
+ matched = [(item['gt_position'], item['pred_position'])
117
+ for item in order_match_s
118
+ if (item['gt_position'] != [''] and item['pred_position'] != '')]
119
+ gt_idx_all = [item['gt_position'] for item in order_match_s if (item['gt_position'] != [''])]
120
+ read_order_pred = [i[0] for i in sorted(matched, key=lambda x: x[1])]
121
+ read_order_gt = sum(gt_idx_all, []) # Convert to one-dimensional list
122
+ read_order_gt = [x for x in read_order_gt if x]
123
+ gt = sorted(read_order_gt)
124
+ pred = sum(read_order_pred, [])
125
+ pred = [x for x in pred if x]
126
+ if len(pred) > 0 or len(gt) > 0:
127
+ import Levenshtein
128
+ edit = Levenshtein.distance(gt, pred) / max(len(pred), len(gt))
129
+ return {'gt': gt, 'pred': pred, 'img_id': img_name, 'edit': edit}
130
+ else:
131
+ return {} # If both GT and pred are empty for the page, return empty
132
+
133
+ def formula_format(self, formula_matches, img_name):
134
+ # formated_list = []
135
+ for i, item in enumerate(formula_matches):
136
+ item['img_id'] = img_name + '_' + str(i)
137
+ return formula_matches
138
+
139
+ def get_matched_elements(self, references: list, predictions: list) -> dict:
140
+ from .metrics import recogition_end2end_base_dataset, recogition_end2end_table_dataset
141
+
142
+ plain_text_match = []
143
+ display_formula_match = []
144
+ html_table_match = []
145
+ latex_table_match = []
146
+ order_match = []
147
+
148
+ for i, sample in enumerate(references):
149
+ img_name = os.path.basename(sample['page_info']['image_path'])
150
+ pred_content = predictions[i]
151
+ result = self.process_get_matched_elements(sample, pred_content, img_name)
152
+ [
153
+ plain_text_match_clean, formated_display_formula, latex_table_match_s, html_table_match_s,
154
+ order_match_single
155
+ ] = result
156
+
157
+ if order_match_single:
158
+ order_match.append(order_match_single)
159
+ if plain_text_match_clean:
160
+ plain_text_match.extend(plain_text_match_clean)
161
+ if formated_display_formula:
162
+ display_formula_match.extend(formated_display_formula)
163
+ if latex_table_match_s:
164
+ latex_table_match.extend(latex_table_match_s)
165
+ if html_table_match_s:
166
+ html_table_match.extend(html_table_match_s)
167
+
168
+ if len(latex_table_match) > len(html_table_match):
169
+ table_match = latex_table_match
170
+ table_format = 'latex'
171
+ else:
172
+ table_match = html_table_match
173
+ table_format = 'html'
174
+
175
+ matched_samples_all = {
176
+ 'text_block': recogition_end2end_base_dataset(plain_text_match),
177
+ 'display_formula': recogition_end2end_base_dataset(display_formula_match),
178
+ 'table': recogition_end2end_table_dataset(table_match, table_format),
179
+ 'reading_order': recogition_end2end_base_dataset(order_match)
180
+ }
181
+
182
+ return matched_samples_all
183
+
184
+ def process_get_matched_elements(self, sample, pred_content, img_name):
185
+ from func_timeout import FunctionTimedOut, func_timeout
186
+
187
+ from .utils import match_gt2pred_no_split, match_gt2pred_quick, match_gt2pred_simple, md_tex_filter
188
+
189
+ if self.match_method == 'simple_match': # add match choice
190
+ match_gt2pred = match_gt2pred_simple
191
+ elif self.match_method == 'quick_match':
192
+ match_gt2pred = match_gt2pred_quick
193
+ elif self.match_method == 'no_split':
194
+ match_gt2pred = match_gt2pred_no_split
195
+ else:
196
+ match_gt2pred = match_gt2pred_quick
197
+
198
+ pred_dataset = md_tex_filter(pred_content)
199
+ gt_page_elements = self.get_page_elements(sample)
200
+
201
+ text_all = self.get_page_elements_list(
202
+ gt_page_elements, [
203
+ 'text_block', 'title', 'code_txt', 'code_txt_caption', 'reference', 'equation_caption',
204
+ 'figure_caption', 'figure_footnote', 'table_caption', 'table_footnote', 'code_algorithm',
205
+ 'code_algorithm_caption', 'header', 'footer', 'page_footnote', 'page_number'
206
+ ]
207
+ )
208
+
209
+ display_formula_match_s = []
210
+ plain_text_match_clean = []
211
+ latex_table_match_s = []
212
+ html_table_match_s = []
213
+ order_match_single = []
214
+ if text_all:
215
+ gt_text_list = self.get_sorted_text_list(text_all)
216
+ try:
217
+ plain_text_match_s = func_timeout(
218
+ 30, match_gt2pred, args=(gt_text_list, pred_dataset['text_all'], 'text', img_name)
219
+ )
220
+ except FunctionTimedOut as e:
221
+ logger.warning(f'Time out for plain text match of {img_name}, match_gt2pred_simple will be used.')
222
+ plain_text_match_s = match_gt2pred_simple(gt_text_list, pred_dataset['text_all'], 'text', img_name)
223
+ logger.error(str(e))
224
+ raise e
225
+
226
+ if not plain_text_match_s:
227
+ logger.warning(f'No text match of {img_name}. The plain text match will be empty.')
228
+ else:
229
+ plain_text_match_clean = self.filtered_out_ignore(
230
+ plain_text_match_s, [
231
+ 'figure_caption', 'figure_footnote', 'table_caption', 'table_footnote', 'code_algorithm',
232
+ 'code_algorithm_caption', 'header', 'footer', 'page_footnote', 'page_number', 'equation_caption'
233
+ ]
234
+ )
235
+
236
+ if gt_page_elements.get('equation_isolated'):
237
+ gt_display_list = self.get_sorted_text_list(gt_page_elements['equation_isolated'])
238
+ display_formula_match_s = match_gt2pred(
239
+ gt_display_list, pred_dataset['equation_isolated'], 'formula', img_name
240
+ )
241
+ display_formula_match_s = [x for x in display_formula_match_s if x['gt_idx'] != ['']]
242
+ if not display_formula_match_s:
243
+ logger.warning(f'No display_formula_match of {img_name}. The display_formula_match will be empty.')
244
+
245
+ if gt_page_elements.get('table'):
246
+ gt_table_list = self.get_sorted_text_list(gt_page_elements['table'])
247
+ if pred_dataset['latex_table']:
248
+ latex_table_match_s = match_gt2pred_simple(
249
+ gt_table_list, pred_dataset['latex_table'], 'latex_table', img_name
250
+ )
251
+ latex_table_match_s = [x for x in latex_table_match_s if x['gt_idx'] != ['']]
252
+ if pred_dataset['html_table']:
253
+ html_table_match_s = match_gt2pred_simple(
254
+ gt_table_list, pred_dataset['html_table'], 'html_table', img_name
255
+ )
256
+ html_table_match_s = [x for x in html_table_match_s if x['gt_idx'] != ['']]
257
+ else:
258
+ html_table_match_s = match_gt2pred_simple(gt_table_list, [], 'html_table', img_name)
259
+ html_table_match_s = [x for x in html_table_match_s if x['gt_idx'] != ['']]
260
+
261
+ order_match_s = plain_text_match_clean
262
+ if order_match_s:
263
+ order_match_single = self.get_order_paired(order_match_s, img_name)
264
+
265
+ return [
266
+ plain_text_match_clean, display_formula_match_s, latex_table_match_s, html_table_match_s, order_match_single
267
+ ]
268
+
269
+ def process_generated_metric_results(self, samples, save_name: str = 'end2end_quick_match'):
270
+ from .metrics import METRIC_REGISTRY, get_full_labels_results, get_page_split, show_result
271
+
272
+ result_all = {}
273
+ page_info = {}
274
+ metircs_dict = self.dafault_metircs_dict
275
+ pages = self.references #gt_samples list
276
+
277
+ for page in pages:
278
+ img_path = os.path.basename(page['page_info']['image_path'])
279
+ page_info[img_path] = page['page_info']['page_attribute']
280
+
281
+ for element in metircs_dict.keys():
282
+
283
+ result = {}
284
+ group_info = metircs_dict[element].get('group', [])
285
+ # samples = samples.get(element) ##
286
+ cur_samples = samples[element]
287
+
288
+ for metric in metircs_dict[element]['metric']:
289
+ metric_val = METRIC_REGISTRY.get(metric)
290
+
291
+ cur_samples, result_s = metric_val(cur_samples).evaluate(group_info, f'{save_name}_{element}')
292
+ if result_s:
293
+ result.update(result_s)
294
+
295
+ if result:
296
+ logger.info(f'{element}')
297
+ show_result(result)
298
+ result_all[element] = {}
299
+
300
+ group_result = get_full_labels_results(cur_samples)
301
+ page_result = get_page_split(cur_samples, page_info)
302
+
303
+ result_all[element] = {'all': result, 'group': group_result, 'page': page_result}
304
+
305
+ save_dict = {}
306
+ en_overall = []
307
+ ch_overall = []
308
+ for category_type, metric in [('text_block', 'Edit_dist'), ('display_formula', 'Edit_dist'),
309
+ ('display_formula', 'CDM'), ('table', 'TEDS'), ('table', 'Edit_dist'),
310
+ ('reading_order', 'Edit_dist')]:
311
+ if metric == 'TEDS':
312
+ if category_type in result_all and 'page' in result_all[category_type] and metric in result_all[
313
+ category_type]['page']:
314
+ save_dict[category_type + '_' + metric
315
+ + '_EN'] = result_all[category_type]['page'][metric]['language: english']
316
+ save_dict[category_type + '_' + metric
317
+ + '_CH'] = result_all[category_type]['page'][metric]['language: simplified_chinese']
318
+ else:
319
+ save_dict[category_type + '_' + metric + '_EN'] = np.nan
320
+ save_dict[category_type + '_' + metric + '_CH'] = np.nan
321
+ else:
322
+ if category_type in result_all and 'page' in result_all[category_type] and metric in result_all[
323
+ category_type]['page']:
324
+ save_dict[category_type + '_' + metric
325
+ + '_EN'] = result_all[category_type]['page'][metric].get('language: english', np.nan)
326
+ save_dict[category_type + '_' + metric + '_CH'] = result_all[category_type]['page'][metric].get(
327
+ 'language: simplified_chinese', np.nan
328
+ )
329
+ else:
330
+ save_dict[category_type + '_' + metric + '_EN'] = np.nan
331
+ save_dict[category_type + '_' + metric + '_CH'] = np.nan
332
+
333
+ if metric == 'Edit_dist':
334
+ if category_type in result_all and 'page' in result_all[category_type] and metric in result_all[
335
+ category_type]['page']:
336
+ en_overall.append(result_all[category_type]['page'][metric].get('language: english', np.nan))
337
+ ch_overall.append(
338
+ result_all[category_type]['page'][metric].get('language: simplified_chinese', np.nan)
339
+ )
340
+ else:
341
+ en_overall.append(np.nan)
342
+ ch_overall.append(np.nan)
343
+
344
+ en_overall_filtered = [x for x in en_overall if not np.isnan(x)]
345
+ ch_overall_filtered = [x for x in ch_overall if not np.isnan(x)]
346
+ save_dict['overall_EN'] = sum(en_overall_filtered) / len(en_overall_filtered) if en_overall_filtered else np.nan
347
+ save_dict['overall_CH'] = sum(ch_overall_filtered) / len(ch_overall_filtered) if ch_overall_filtered else np.nan
348
+
349
+ return save_dict