evalscope 0.17.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. evalscope/__init__.py +4 -1
  2. evalscope/api/__init__.py +0 -0
  3. evalscope/api/benchmark/__init__.py +3 -0
  4. evalscope/api/benchmark/adapters/__init__.py +3 -0
  5. evalscope/api/benchmark/adapters/default_data_adapter.py +683 -0
  6. evalscope/api/benchmark/adapters/multi_choice_adapter.py +83 -0
  7. evalscope/api/benchmark/adapters/text2image_adapter.py +155 -0
  8. evalscope/api/benchmark/benchmark.py +321 -0
  9. evalscope/api/benchmark/meta.py +115 -0
  10. evalscope/api/dataset/__init__.py +2 -0
  11. evalscope/api/dataset/dataset.py +349 -0
  12. evalscope/api/dataset/loader.py +261 -0
  13. evalscope/api/dataset/utils.py +143 -0
  14. evalscope/api/evaluator/__init__.py +3 -0
  15. evalscope/api/evaluator/cache.py +355 -0
  16. evalscope/api/evaluator/evaluator.py +56 -0
  17. evalscope/api/evaluator/state.py +264 -0
  18. evalscope/api/filter/__init__.py +1 -0
  19. evalscope/api/filter/filter.py +72 -0
  20. evalscope/api/messages/__init__.py +11 -0
  21. evalscope/api/messages/chat_message.py +198 -0
  22. evalscope/api/messages/content.py +102 -0
  23. evalscope/api/messages/utils.py +35 -0
  24. evalscope/api/metric/__init__.py +2 -0
  25. evalscope/api/metric/metric.py +55 -0
  26. evalscope/api/metric/scorer.py +105 -0
  27. evalscope/api/mixin/__init__.py +2 -0
  28. evalscope/api/mixin/dataset_mixin.py +105 -0
  29. evalscope/api/mixin/llm_judge_mixin.py +168 -0
  30. evalscope/api/model/__init__.py +12 -0
  31. evalscope/api/model/generate_config.py +157 -0
  32. evalscope/api/model/model.py +383 -0
  33. evalscope/api/model/model_output.py +285 -0
  34. evalscope/api/registry.py +182 -0
  35. evalscope/api/tool/__init__.py +3 -0
  36. evalscope/api/tool/tool_call.py +101 -0
  37. evalscope/api/tool/tool_info.py +173 -0
  38. evalscope/api/tool/utils.py +64 -0
  39. evalscope/app/ui/app_ui.py +2 -1
  40. evalscope/app/ui/multi_model.py +50 -25
  41. evalscope/app/ui/single_model.py +23 -11
  42. evalscope/app/utils/data_utils.py +42 -26
  43. evalscope/app/utils/text_utils.py +0 -2
  44. evalscope/app/utils/visualization.py +9 -4
  45. evalscope/arguments.py +6 -7
  46. evalscope/backend/opencompass/api_meta_template.py +2 -1
  47. evalscope/backend/opencompass/backend_manager.py +6 -3
  48. evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +10 -10
  49. evalscope/backend/rag_eval/clip_benchmark/task_template.py +8 -4
  50. evalscope/backend/rag_eval/ragas/task_template.py +2 -1
  51. evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +2 -1
  52. evalscope/backend/rag_eval/ragas/tasks/build_transform.py +7 -4
  53. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +2 -1
  54. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +2 -1
  55. evalscope/backend/rag_eval/utils/embedding.py +2 -1
  56. evalscope/backend/rag_eval/utils/llm.py +13 -12
  57. evalscope/benchmarks/__init__.py +0 -2
  58. evalscope/benchmarks/aigc/i2i/__init__.py +0 -0
  59. evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +44 -0
  60. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +53 -55
  61. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +41 -46
  62. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +29 -45
  63. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +34 -44
  64. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +16 -27
  65. evalscope/benchmarks/aime/aime24_adapter.py +38 -40
  66. evalscope/benchmarks/aime/aime25_adapter.py +34 -40
  67. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +86 -60
  68. evalscope/benchmarks/arc/arc_adapter.py +34 -147
  69. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +96 -70
  70. evalscope/benchmarks/arena_hard/utils.py +37 -1
  71. evalscope/benchmarks/bbh/bbh_adapter.py +72 -144
  72. evalscope/benchmarks/bfcl/bfcl_adapter.py +181 -160
  73. evalscope/benchmarks/bfcl/generation.py +222 -0
  74. evalscope/benchmarks/ceval/ceval_adapter.py +94 -162
  75. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +85 -82
  76. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +34 -125
  77. evalscope/benchmarks/competition_math/competition_math_adapter.py +56 -108
  78. evalscope/benchmarks/data_collection/data_collection_adapter.py +183 -45
  79. evalscope/benchmarks/docmath/docmath_adapter.py +109 -51
  80. evalscope/benchmarks/docmath/utils.py +4 -5
  81. evalscope/benchmarks/drop/drop_adapter.py +88 -40
  82. evalscope/benchmarks/frames/frames_adapter.py +135 -52
  83. evalscope/benchmarks/general_arena/general_arena_adapter.py +136 -98
  84. evalscope/benchmarks/general_arena/utils.py +23 -27
  85. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +40 -101
  86. evalscope/benchmarks/general_qa/general_qa_adapter.py +73 -134
  87. evalscope/benchmarks/gpqa/gpqa_adapter.py +61 -100
  88. evalscope/benchmarks/gpqa/{chain_of_thought.txt → prompt.py} +12 -5
  89. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +62 -142
  90. evalscope/benchmarks/hellaswag/hellaswag_adapter.py +35 -124
  91. evalscope/benchmarks/hle/hle_adapter.py +127 -93
  92. evalscope/benchmarks/humaneval/humaneval_adapter.py +86 -55
  93. evalscope/benchmarks/ifeval/ifeval_adapter.py +69 -40
  94. evalscope/benchmarks/ifeval/instructions.py +109 -64
  95. evalscope/benchmarks/ifeval/instructions_registry.py +1 -1
  96. evalscope/benchmarks/ifeval/utils.py +6 -7
  97. evalscope/benchmarks/iquiz/iquiz_adapter.py +30 -65
  98. evalscope/benchmarks/live_code_bench/evaluate_utils.py +2 -2
  99. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +121 -71
  100. evalscope/benchmarks/live_code_bench/load_utils.py +13 -21
  101. evalscope/benchmarks/live_code_bench/testing_util.py +6 -2
  102. evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +49 -75
  103. evalscope/benchmarks/math_500/math_500_adapter.py +41 -48
  104. evalscope/benchmarks/mmlu/mmlu_adapter.py +32 -205
  105. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +80 -99
  106. evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +64 -110
  107. evalscope/benchmarks/musr/musr_adapter.py +33 -64
  108. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +192 -152
  109. evalscope/benchmarks/process_bench/process_bench_adapter.py +144 -76
  110. evalscope/benchmarks/race/race_adapter.py +33 -119
  111. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +72 -70
  112. evalscope/benchmarks/super_gpqa/{five_shot_prompt.txt → prompt.py} +14 -16
  113. evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py +73 -117
  114. evalscope/benchmarks/super_gpqa/utils.py +2 -1
  115. evalscope/benchmarks/tau_bench/generation.py +147 -0
  116. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +112 -54
  117. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +91 -70
  118. evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +56 -124
  119. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +70 -265
  120. evalscope/benchmarks/winogrande/winogrande_adapter.py +28 -54
  121. evalscope/cli/cli.py +2 -0
  122. evalscope/cli/start_server.py +6 -3
  123. evalscope/collections/__init__.py +2 -10
  124. evalscope/collections/sampler.py +10 -10
  125. evalscope/collections/schema.py +13 -11
  126. evalscope/config.py +95 -54
  127. evalscope/constants.py +29 -61
  128. evalscope/evaluator/__init__.py +1 -1
  129. evalscope/evaluator/evaluator.py +277 -423
  130. evalscope/filters/__init__.py +2 -0
  131. evalscope/filters/extraction.py +126 -0
  132. evalscope/filters/selection.py +57 -0
  133. evalscope/metrics/__init__.py +13 -13
  134. evalscope/metrics/llm_judge.py +32 -30
  135. evalscope/metrics/math_parser.py +27 -22
  136. evalscope/metrics/metric.py +307 -0
  137. evalscope/metrics/metrics.py +22 -18
  138. evalscope/metrics/t2v_metrics/__init__.py +0 -52
  139. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +4 -2
  140. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +9 -13
  141. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +2 -1
  142. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +3 -2
  143. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +2 -1
  144. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +2 -2
  145. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +2 -1
  146. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +4 -2
  147. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +10 -5
  148. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +4 -2
  149. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +2 -1
  150. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +15 -9
  151. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +4 -2
  152. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +15 -10
  153. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +9 -6
  154. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +2 -2
  155. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +4 -2
  156. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +4 -2
  157. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +3 -9
  158. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +16 -10
  159. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +3 -2
  160. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +4 -2
  161. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +8 -4
  162. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +47 -25
  163. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +12 -7
  164. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +23 -17
  165. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +33 -23
  166. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +2 -1
  167. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +46 -30
  168. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +69 -37
  169. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +7 -5
  170. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +6 -4
  171. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +7 -5
  172. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +3 -2
  173. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +5 -2
  174. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +17 -13
  175. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +35 -19
  176. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +14 -12
  177. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +63 -52
  178. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +63 -38
  179. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +6 -3
  180. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +6 -2
  181. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +3 -2
  182. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +15 -13
  183. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +3 -2
  184. evalscope/models/__init__.py +6 -29
  185. evalscope/models/mockllm.py +65 -0
  186. evalscope/models/model_apis.py +47 -0
  187. evalscope/models/modelscope.py +455 -0
  188. evalscope/models/openai_compatible.py +123 -0
  189. evalscope/models/text2image_model.py +124 -0
  190. evalscope/models/utils/openai.py +698 -0
  191. evalscope/perf/benchmark.py +2 -1
  192. evalscope/perf/http_client.py +4 -2
  193. evalscope/perf/plugin/api/custom_api.py +5 -4
  194. evalscope/perf/plugin/api/openai_api.py +11 -9
  195. evalscope/perf/plugin/datasets/custom.py +2 -1
  196. evalscope/perf/plugin/datasets/flickr8k.py +1 -1
  197. evalscope/perf/plugin/datasets/kontext_bench.py +1 -1
  198. evalscope/perf/plugin/datasets/line_by_line.py +2 -1
  199. evalscope/perf/plugin/datasets/longalpaca.py +2 -1
  200. evalscope/perf/plugin/datasets/openqa.py +4 -2
  201. evalscope/perf/utils/benchmark_util.py +7 -5
  202. evalscope/perf/utils/db_util.py +9 -6
  203. evalscope/perf/utils/local_server.py +8 -3
  204. evalscope/perf/utils/rich_display.py +16 -10
  205. evalscope/report/__init__.py +2 -2
  206. evalscope/report/combinator.py +18 -12
  207. evalscope/report/generator.py +101 -6
  208. evalscope/report/{utils.py → report.py} +8 -6
  209. evalscope/run.py +26 -44
  210. evalscope/summarizer.py +1 -1
  211. evalscope/utils/__init__.py +21 -2
  212. evalscope/utils/chat_service.py +2 -1
  213. evalscope/utils/deprecation_utils.py +12 -1
  214. evalscope/utils/function_utils.py +29 -0
  215. evalscope/utils/io_utils.py +100 -5
  216. evalscope/utils/json_schema.py +208 -0
  217. evalscope/utils/logger.py +51 -12
  218. evalscope/utils/model_utils.py +10 -7
  219. evalscope/utils/multi_choices.py +271 -0
  220. evalscope/utils/url_utils.py +65 -0
  221. evalscope/version.py +2 -2
  222. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/METADATA +98 -49
  223. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/RECORD +234 -216
  224. tests/aigc/test_t2i.py +22 -4
  225. tests/benchmark/__init__.py +1 -0
  226. tests/benchmark/test_eval.py +386 -0
  227. tests/cli/test_all.py +3 -5
  228. tests/cli/test_collection.py +13 -4
  229. tests/cli/test_custom.py +22 -15
  230. tests/rag/test_clip_benchmark.py +1 -0
  231. evalscope/benchmarks/aigc/t2i/base.py +0 -56
  232. evalscope/benchmarks/arc/ai2_arc.py +0 -151
  233. evalscope/benchmarks/benchmark.py +0 -81
  234. evalscope/benchmarks/ceval/ceval_exam.py +0 -146
  235. evalscope/benchmarks/cmmlu/cmmlu.py +0 -161
  236. evalscope/benchmarks/cmmlu/samples.jsonl +0 -5
  237. evalscope/benchmarks/competition_math/competition_math.py +0 -79
  238. evalscope/benchmarks/data_adapter.py +0 -528
  239. evalscope/benchmarks/filters.py +0 -59
  240. evalscope/benchmarks/gsm8k/gsm8k.py +0 -121
  241. evalscope/benchmarks/hellaswag/hellaswag.py +0 -112
  242. evalscope/benchmarks/humaneval/humaneval.py +0 -79
  243. evalscope/benchmarks/mmlu/mmlu.py +0 -160
  244. evalscope/benchmarks/mmlu/samples.jsonl +0 -5
  245. evalscope/benchmarks/process_bench/critique_template.txt +0 -13
  246. evalscope/benchmarks/race/race.py +0 -104
  247. evalscope/benchmarks/race/samples.jsonl +0 -5
  248. evalscope/benchmarks/super_gpqa/zero_shot_prompt.txt +0 -4
  249. evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -89
  250. evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -163
  251. evalscope/benchmarks/utils.py +0 -60
  252. evalscope/collections/evaluator.py +0 -375
  253. evalscope/metrics/completion_parsers.py +0 -227
  254. evalscope/metrics/named_metrics.py +0 -55
  255. evalscope/models/adapters/__init__.py +0 -14
  256. evalscope/models/adapters/base_adapter.py +0 -84
  257. evalscope/models/adapters/bfcl_adapter.py +0 -246
  258. evalscope/models/adapters/chat_adapter.py +0 -207
  259. evalscope/models/adapters/choice_adapter.py +0 -222
  260. evalscope/models/adapters/custom_adapter.py +0 -71
  261. evalscope/models/adapters/server_adapter.py +0 -236
  262. evalscope/models/adapters/t2i_adapter.py +0 -79
  263. evalscope/models/adapters/tau_bench_adapter.py +0 -189
  264. evalscope/models/custom/__init__.py +0 -4
  265. evalscope/models/custom/custom_model.py +0 -50
  266. evalscope/models/custom/dummy_model.py +0 -99
  267. evalscope/models/local_model.py +0 -128
  268. evalscope/models/register.py +0 -41
  269. tests/cli/test_run.py +0 -489
  270. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/LICENSE +0 -0
  271. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/WHEEL +0 -0
  272. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/entry_points.txt +0 -0
  273. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/top_level.txt +0 -0
evalscope/run.py CHANGED
@@ -13,9 +13,6 @@ from evalscope.utils.io_utils import OutputsStructure
13
13
  from evalscope.utils.logger import configure_logging, get_logger
14
14
  from evalscope.utils.model_utils import seed_everything
15
15
 
16
- if TYPE_CHECKING:
17
- from evalscope.models import LocalModel
18
-
19
16
  logger = get_logger()
20
17
 
21
18
 
@@ -109,27 +106,42 @@ def get_backend_manager_class(eval_backend: EvalBackend):
109
106
  raise NotImplementedError(f'Not implemented for evaluation backend {eval_backend}')
110
107
 
111
108
 
112
- def evaluate_model(task_cfg: TaskConfig, outputs: OutputsStructure) -> dict:
109
+ def evaluate_model(task_config: TaskConfig, outputs: OutputsStructure) -> dict:
113
110
  """Evaluate the model based on the provided task configuration."""
114
- from evalscope.models import get_local_model
111
+ from evalscope.api.evaluator import Evaluator
112
+ from evalscope.api.model import get_model_with_task_config
113
+ from evalscope.api.registry import get_benchmark
114
+ from evalscope.evaluator import DefaultEvaluator
115
115
  from evalscope.report import gen_table
116
116
 
117
117
  # Initialize evaluator
118
118
  eval_results = {}
119
- base_model = get_local_model(task_cfg)
120
- evaluators = []
121
- for dataset_name in task_cfg.datasets:
122
- evaluator = create_evaluator(task_cfg, dataset_name, outputs, base_model)
119
+ # Initialize model
120
+ model = get_model_with_task_config(task_config=task_config)
121
+ # Initialize evaluators for each dataset
122
+ evaluators: List[Evaluator] = []
123
+ for dataset_name in task_config.datasets:
124
+ # Create evaluator for each dataset
125
+ benchmark = get_benchmark(dataset_name, task_config)
126
+ evaluator = DefaultEvaluator(
127
+ task_config=task_config,
128
+ model=model,
129
+ benchmark=benchmark,
130
+ outputs=outputs,
131
+ )
123
132
  evaluators.append(evaluator)
124
133
 
134
+ # Update task_config.dataset_args with benchmark metadata
135
+ task_config.dataset_args[dataset_name] = benchmark.to_dict()
136
+
125
137
  # dump task_cfg to outputs.configs_dir after creating evaluators
126
- task_cfg.dump_yaml(outputs.configs_dir)
127
- logger.info(task_cfg)
138
+ task_config.dump_yaml(outputs.configs_dir)
139
+ logger.info(task_config)
128
140
 
129
141
  # Run evaluation for each evaluator
130
142
  for evaluator in evaluators:
131
143
  res_dict = evaluator.eval()
132
- eval_results[evaluator.dataset_name] = res_dict
144
+ eval_results[evaluator.benchmark.name] = res_dict
133
145
 
134
146
  # Make overall report
135
147
  try:
@@ -139,11 +151,11 @@ def evaluate_model(task_cfg: TaskConfig, outputs: OutputsStructure) -> dict:
139
151
  logger.error('Failed to generate report table.')
140
152
 
141
153
  # Clean up
142
- if base_model is not None:
154
+ if model is not None:
143
155
  import gc
144
156
  import torch
145
157
 
146
- del base_model
158
+ del model
147
159
  del evaluators
148
160
  torch.cuda.empty_cache()
149
161
  gc.collect()
@@ -151,36 +163,6 @@ def evaluate_model(task_cfg: TaskConfig, outputs: OutputsStructure) -> dict:
151
163
  return eval_results
152
164
 
153
165
 
154
- def create_evaluator(task_cfg: TaskConfig, dataset_name: str, outputs: OutputsStructure, base_model: 'LocalModel'):
155
- """Create an evaluator object for the specified dataset."""
156
- from evalscope.benchmarks import Benchmark, BenchmarkMeta
157
- from evalscope.evaluator import Evaluator
158
- from evalscope.models import initialize_model_adapter
159
-
160
- benchmark: BenchmarkMeta = Benchmark.get(dataset_name)
161
-
162
- if dataset_name == DataCollection.NAME:
163
- # EvaluatorCollection is a collection of evaluators
164
- from evalscope.collections import EvaluatorCollection
165
- data_adapter = benchmark.get_data_adapter(config=task_cfg.dataset_args.get(dataset_name, {}))
166
- return EvaluatorCollection(task_cfg, data_adapter, outputs, base_model)
167
-
168
- # Initialize data adapter first to update config
169
- data_adapter = benchmark.get_data_adapter(config=task_cfg.dataset_args.get(dataset_name, {}))
170
- # Initialize model adapter
171
- model_adapter = initialize_model_adapter(task_cfg, data_adapter, base_model)
172
-
173
- # update task_cfg.dataset_args
174
- task_cfg.dataset_args[dataset_name] = benchmark.to_string_dict()
175
-
176
- return Evaluator(
177
- data_adapter=data_adapter,
178
- model_adapter=model_adapter,
179
- outputs=outputs,
180
- task_cfg=task_cfg,
181
- )
182
-
183
-
184
166
  def main():
185
167
  from evalscope.arguments import parse_args
186
168
  args = parse_args()
evalscope/summarizer.py CHANGED
@@ -80,7 +80,7 @@ class Summarizer:
80
80
 
81
81
  summary_file_path = summary_files[0]
82
82
  # Example: [{'dataset': 'gsm8k', 'version': '1d7fe4', 'metric': 'accuracy', 'mode': 'gen', 'qwen-7b-chat': '53.98'} # noqa: E501
83
- summary_res: List[dict] = csv_to_list(file_path=summary_file_path)
83
+ summary_res: List[dict] = csv_to_list(summary_file_path)
84
84
  final_res_list.extend(summary_res)
85
85
  elif eval_backend == EvalBackend.VLM_EVAL_KIT:
86
86
  eval_config = Summarizer.parse_eval_config(candidate_task)
@@ -7,9 +7,22 @@ from .import_utils import _LazyModule
7
7
  if TYPE_CHECKING:
8
8
  from .argument_utils import BaseArgument, get_supported_params, parse_int_or_float
9
9
  from .deprecation_utils import deprecated
10
+ from .function_utils import run_once, thread_safe
10
11
  from .import_utils import get_module_path, is_module_installed
11
- from .io_utils import (OutputsStructure, csv_to_jsonl, csv_to_list, dict_to_yaml, gen_hash, get_latest_folder_path,
12
- get_valid_list, json_to_dict, jsonl_to_csv, jsonl_to_list, yaml_to_dict)
12
+ from .io_utils import (
13
+ OutputsStructure,
14
+ csv_to_jsonl,
15
+ csv_to_list,
16
+ dict_to_yaml,
17
+ gen_hash,
18
+ get_latest_folder_path,
19
+ get_valid_list,
20
+ json_to_dict,
21
+ jsonl_to_csv,
22
+ jsonl_to_list,
23
+ safe_filename,
24
+ yaml_to_dict,
25
+ )
13
26
  from .logger import configure_logging, get_logger
14
27
  from .model_utils import EvalBackend, dict_torch_dtype_to_str, fix_do_sample_warning, get_device, seed_everything
15
28
 
@@ -31,6 +44,10 @@ else:
31
44
  'is_module_installed',
32
45
  'get_module_path',
33
46
  ],
47
+ 'function_utils': [
48
+ 'thread_safe',
49
+ 'run_once',
50
+ ],
34
51
  'io_utils': [
35
52
  'OutputsStructure',
36
53
  'csv_to_list',
@@ -44,6 +61,8 @@ else:
44
61
  'jsonl_to_list',
45
62
  'gen_hash',
46
63
  'get_valid_list',
64
+ 'safe_filename',
65
+ 'thread_safe',
47
66
  ],
48
67
  'deprecation_utils': [
49
68
  'deprecated',
@@ -204,7 +204,8 @@ class ChatService:
204
204
 
205
205
  def _prepare_chat_inputs(self, request: ChatCompletionRequest):
206
206
  formatted_prompt = self.tokenizer.apply_chat_template(
207
- request.messages, tokenize=False, add_generation_prompt=True)
207
+ request.messages, tokenize=False, add_generation_prompt=True
208
+ )
208
209
  inputs = self.tokenizer(formatted_prompt, return_tensors='pt', padding=False).to(self.device)
209
210
  prompt_tokens = len(inputs['input_ids'][0])
210
211
  return formatted_prompt, inputs, prompt_tokens
@@ -1,5 +1,6 @@
1
1
  import functools
2
2
  import inspect
3
+ import os
3
4
  from typing import Callable, Optional
4
5
 
5
6
  from .logger import get_logger
@@ -22,7 +23,7 @@ def deprecated(since: str, remove_in: Optional[str] = None, alternative: Optiona
22
23
  @functools.wraps(func)
23
24
  def wrapper(*args, **kwargs):
24
25
  # Get the file name where the function is defined
25
- file_name = inspect.getfile(func)
26
+ file_name = os.path.basename(inspect.getfile(func))
26
27
 
27
28
  # Construct the warning message
28
29
  warning_parts = [
@@ -40,3 +41,13 @@ def deprecated(since: str, remove_in: Optional[str] = None, alternative: Optiona
40
41
  return wrapper
41
42
 
42
43
  return decorator
44
+
45
+
46
+ def deprecated_warning(logger, message: str):
47
+ """
48
+ Log a deprecation warning.
49
+
50
+ :param logger: Logger instance to log the warning
51
+ :param message: Warning message to log
52
+ """
53
+ logger.warning(f'Deprecated: {message}')
@@ -0,0 +1,29 @@
1
+ import threading
2
+ from functools import wraps
3
+
4
+
5
+ def run_once(func):
6
+ """Decorator to ensure a function is only run once."""
7
+ has_run = False
8
+ result = None
9
+
10
+ def wrapper(*args, **kwargs):
11
+ nonlocal has_run, result
12
+ if not has_run:
13
+ result = func(*args, **kwargs)
14
+ has_run = True
15
+ return result
16
+
17
+ return wrapper
18
+
19
+
20
+ def thread_safe(func):
21
+ """Thread-safe decorator for functions that need to be executed in a thread-safe manner."""
22
+ lock = threading.RLock()
23
+
24
+ @wraps(func)
25
+ def wrapper(*args, **kwargs):
26
+ with lock:
27
+ return func(*args, **kwargs)
28
+
29
+ return wrapper
@@ -5,6 +5,8 @@ import json
5
5
  import jsonlines as jsonl
6
6
  import os
7
7
  import re
8
+ import string
9
+ import unicodedata
8
10
  import yaml
9
11
  from io import BytesIO
10
12
  from PIL import Image
@@ -33,7 +35,7 @@ class OutputsStructure:
33
35
  'configs_dir': None
34
36
  }
35
37
 
36
- def _get_dir(self, attr_name, dir_name):
38
+ def _get_dir(self, attr_name, dir_name) -> str:
37
39
  if self._dirs[attr_name] is None:
38
40
  dir_path = os.path.join(self.outputs_dir, dir_name)
39
41
  if self.is_make:
@@ -72,10 +74,20 @@ def jsonl_to_list(jsonl_file):
72
74
  Returns:
73
75
  list: list of lines. Each line is a dict.
74
76
  """
75
- res_list = []
76
- with jsonl.open(jsonl_file, mode='r') as reader:
77
- for line in reader.iter(type=dict, allow_none=True, skip_invalid=False):
78
- res_list.append(line)
77
+ try:
78
+ res_list = []
79
+ with jsonl.open(jsonl_file, mode='r') as reader:
80
+ for line in reader.iter(type=dict, allow_none=True, skip_invalid=False):
81
+ res_list.append(line)
82
+ except Exception:
83
+ # Fallback to reading line by line
84
+ res_list = []
85
+ with open(jsonl_file, 'r', encoding='utf-8') as f:
86
+ for line in f:
87
+ if line.strip(): # Skip empty lines
88
+ res_list.append(json.loads(line.strip()))
89
+ if not res_list:
90
+ logger.warning(f'No data found in {jsonl_file}.')
79
91
  return res_list
80
92
 
81
93
 
@@ -272,7 +284,90 @@ def get_valid_list(input_list, candidate_list):
272
284
 
273
285
 
274
286
  def PIL_to_base64(image: Image.Image, format: str = 'JPEG') -> str:
287
+ """
288
+ Convert a PIL Image to a base64 encoded string.
289
+
290
+ Args:
291
+ image (Image.Image): The PIL Image to convert.
292
+ format (str): The format to save the image in. Default is 'JPEG'.
293
+ Returns:
294
+ str: Base64 encoded string of the image.
295
+ """
275
296
  buffered = BytesIO()
276
297
  image.save(buffered, format=format)
277
298
  img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
278
299
  return img_str
300
+
301
+
302
+ def safe_filename(s: str, max_length: int = 255) -> str:
303
+ """
304
+ Convert a string into a safe filename by removing or replacing unsafe characters.
305
+
306
+ Args:
307
+ s (str): The input string to convert
308
+ max_length (int): Maximum length of the resulting filename (default 255)
309
+
310
+ Returns:
311
+ str: A safe filename string
312
+
313
+ Examples:
314
+ >>> safe_filename("Hello/World?.txt")
315
+ 'Hello_World.txt'
316
+ """
317
+ # normalize unicode characters
318
+ s = unicodedata.normalize('NFKD', s)
319
+ s = s.encode('ASCII', 'ignore').decode('ASCII')
320
+
321
+ # remove or replace unsafe characters
322
+ # Keep only alphanumeric characters, dots, dashes, and underscores
323
+ safe_chars = string.ascii_letters + string.digits + '.-_'
324
+ s = ''.join(c if c in safe_chars else '_' for c in s)
325
+
326
+ # remove consecutive underscores
327
+ s = re.sub(r'_+', '_', s)
328
+
329
+ # remove leading/trailing periods and underscores
330
+ s = s.strip('._')
331
+
332
+ # handle empty string case
333
+ if not s:
334
+ s = 'untitled'
335
+
336
+ # handle starting with a period (hidden files)
337
+ if s.startswith('.'):
338
+ s = '_' + s
339
+
340
+ # enforce length limit
341
+ if len(s) > max_length:
342
+ # If we need to truncate, preserve the file extension if present
343
+ name, ext = os.path.splitext(s)
344
+ ext_len = len(ext)
345
+ if ext_len > 0:
346
+ max_name_length = max_length - ext_len
347
+ s = name[:max_name_length] + ext
348
+ else:
349
+ s = s[:max_length]
350
+
351
+ return s
352
+
353
+
354
+ def convert_numpy_types(obj):
355
+ """Recursively convert numpy types to native Python types for JSON serialization."""
356
+ import numpy as np
357
+
358
+ if isinstance(obj, np.bool_):
359
+ return bool(obj)
360
+ elif isinstance(obj, np.integer):
361
+ return int(obj)
362
+ elif isinstance(obj, np.floating):
363
+ return float(obj)
364
+ elif isinstance(obj, np.ndarray):
365
+ return obj.tolist()
366
+ elif isinstance(obj, dict):
367
+ return {key: convert_numpy_types(value) for key, value in obj.items()}
368
+ elif isinstance(obj, list):
369
+ return [convert_numpy_types(item) for item in obj]
370
+ elif isinstance(obj, tuple):
371
+ return tuple(convert_numpy_types(item) for item in obj)
372
+ else:
373
+ return obj
@@ -0,0 +1,208 @@
1
+ import types
2
+ import typing
3
+ from copy import deepcopy
4
+ from dataclasses import is_dataclass
5
+ from datetime import date, datetime, time
6
+ from enum import EnumMeta
7
+ from pydantic import BaseModel, Field
8
+ from typing import (
9
+ Any,
10
+ Dict,
11
+ List,
12
+ Literal,
13
+ Optional,
14
+ Set,
15
+ Tuple,
16
+ Type,
17
+ Union,
18
+ cast,
19
+ get_args,
20
+ get_origin,
21
+ get_type_hints,
22
+ is_typeddict,
23
+ )
24
+
25
+ JSONType = Literal['string', 'integer', 'number', 'boolean', 'array', 'object', 'null']
26
+ """Valid types within JSON schema."""
27
+
28
+
29
+ class JSONSchema(BaseModel):
30
+ """JSON Schema for type."""
31
+
32
+ type: Optional[JSONType] = Field(default=None)
33
+ """JSON type of tool parameter."""
34
+
35
+ format: Optional[str] = Field(default=None)
36
+ """Format of the parameter (e.g. date-time)."""
37
+
38
+ description: Optional[str] = Field(default=None)
39
+ """Parameter description."""
40
+
41
+ default: Any = Field(default=None)
42
+ """Default value for parameter."""
43
+
44
+ enum: Optional[List[Any]] = Field(default=None)
45
+ """Valid values for enum parameters."""
46
+
47
+ items: Optional['JSONSchema'] = Field(default=None)
48
+ """Valid type for array parameters."""
49
+
50
+ properties: Optional[Dict[str, 'JSONSchema']] = Field(default=None)
51
+ """Valid fields for object parametrs."""
52
+
53
+ additionalProperties: Optional[Union['JSONSchema', bool]] = Field(default=None)
54
+ """Are additional properties allowed?"""
55
+
56
+ anyOf: Optional[List['JSONSchema']] = Field(default=None)
57
+ """Valid types for union parameters."""
58
+
59
+ required: Optional[List[str]] = Field(default=None)
60
+ """Required fields for object parameters."""
61
+
62
+
63
+ def json_schema(t: Type[Any]) -> JSONSchema:
64
+ """Provide a JSON Schema for the specified type.
65
+
66
+ Schemas can be automatically inferred for a wide variety of
67
+ Python class types including Pydantic BaseModel, dataclasses,
68
+ and typed dicts.
69
+
70
+ Args:
71
+ t: Python type
72
+
73
+ Returns:
74
+ JSON Schema for type.
75
+ """
76
+ origin = get_origin(t)
77
+ args = get_args(t)
78
+
79
+ if origin is None:
80
+ if t is int:
81
+ return JSONSchema(type='integer')
82
+ elif t is float:
83
+ return JSONSchema(type='number')
84
+ elif t is str:
85
+ return JSONSchema(type='string')
86
+ elif t is bool:
87
+ return JSONSchema(type='boolean')
88
+ elif t is datetime:
89
+ return JSONSchema(type='string', format='date-time')
90
+ elif t is date:
91
+ return JSONSchema(type='string', format='date')
92
+ elif t is time:
93
+ return JSONSchema(type='string', format='time')
94
+ elif t is list or t is set:
95
+ return JSONSchema(type='array', items=JSONSchema())
96
+ elif t is dict:
97
+ return JSONSchema(type='object', additionalProperties=JSONSchema())
98
+ elif (is_dataclass(t) or is_typeddict(t) or (isinstance(t, type) and issubclass(t, BaseModel))):
99
+ return cls_json_schema(t)
100
+ elif isinstance(t, EnumMeta):
101
+ return JSONSchema(enum=[item.value for item in t])
102
+ elif t is type(None):
103
+ return JSONSchema(type='null')
104
+ else:
105
+ return JSONSchema()
106
+ elif (origin is list or origin is List or origin is tuple or origin is Tuple or origin is set or origin is Set):
107
+ return JSONSchema(type='array', items=json_schema(args[0]) if args else JSONSchema())
108
+ elif origin is dict or origin is Dict:
109
+ return JSONSchema(
110
+ type='object',
111
+ additionalProperties=json_schema(args[1]) if len(args) > 1 else JSONSchema(),
112
+ )
113
+ elif origin is Union or origin is types.UnionType:
114
+ return JSONSchema(anyOf=[json_schema(arg) for arg in args])
115
+ elif origin is Optional:
116
+ return JSONSchema(anyOf=[json_schema(arg) for arg in args] + [JSONSchema(type='null')])
117
+ elif origin is typing.Literal:
118
+ return JSONSchema(enum=list(args))
119
+
120
+ return JSONSchema() # Default case if we can't determine the type
121
+
122
+
123
+ def cls_json_schema(cls: Type[Any]) -> JSONSchema:
124
+ properties: Dict[str, JSONSchema] = {}
125
+ required: List[str] = []
126
+
127
+ if is_dataclass(cls):
128
+ fields = cls.__dataclass_fields__ # type: ignore
129
+ for name, field in fields.items():
130
+ properties[name] = json_schema(field.type) # type: ignore
131
+ if field.default == field.default_factory:
132
+ required.append(name)
133
+ elif isinstance(cls, type) and issubclass(cls, BaseModel):
134
+ schema = cls.model_json_schema()
135
+ schema = resolve_schema_references(schema)
136
+ for name, prop in schema.get('properties', {}).items():
137
+ properties[name] = JSONSchema(**prop)
138
+ required = schema.get('required', [])
139
+ elif is_typeddict(cls):
140
+ annotations = get_type_hints(cls)
141
+ for name, type_hint in annotations.items():
142
+ properties[name] = json_schema(type_hint)
143
+ if name in cls.__required_keys__:
144
+ required.append(name)
145
+
146
+ return JSONSchema(
147
+ type='object',
148
+ properties=properties,
149
+ required=required if required else None,
150
+ additionalProperties=False,
151
+ )
152
+
153
+
154
+ def python_type_to_json_type(python_type: Optional[str]) -> JSONType:
155
+ if python_type == 'str':
156
+ return 'string'
157
+ elif python_type == 'int':
158
+ return 'integer'
159
+ elif python_type == 'float':
160
+ return 'number'
161
+ elif python_type == 'bool':
162
+ return 'boolean'
163
+ elif python_type == 'list':
164
+ return 'array'
165
+ elif python_type == 'dict':
166
+ return 'object'
167
+ elif python_type == 'None':
168
+ return 'null'
169
+ elif python_type is None:
170
+ # treat 'unknown' as string as anything can be converted to string
171
+ return 'string'
172
+ else:
173
+ raise ValueError(f'Unsupported type: {python_type} for Python to JSON conversion.')
174
+
175
+
176
+ def resolve_schema_references(schema: Dict[str, Any]) -> Dict[str, Any]:
177
+ """Resolves all $ref references in a JSON schema by inlining the definitions."""
178
+ schema = deepcopy(schema)
179
+ definitions = schema.pop('$defs', {})
180
+
181
+ def _resolve_refs(obj: Any) -> Any:
182
+ if isinstance(obj, dict):
183
+ if '$ref' in obj and obj['$ref'].startswith('#/$defs/'):
184
+ ref_key = obj['$ref'].split('/')[-1]
185
+ if ref_key in definitions:
186
+ # Replace with a deep copy of the definition
187
+ resolved = deepcopy(definitions[ref_key])
188
+ # Process any nested references in the definition
189
+ resolved = _resolve_refs(resolved)
190
+
191
+ # Merge in the current object fields, which should take priority
192
+ # This means that if you have e.g.
193
+ # {"$ref": "#/$defs/SubType", "description": "subtype of type SubType"},
194
+ # and SubType resolves to
195
+ # {"description": "The SubType Class", "parameters": {"param1": {"type": "string"}}},
196
+ # the final result will be:
197
+ # {"description": "subtype of type SubType", "parameters": {"param1": {"type": "string"}}}
198
+ return resolved | {k: o for k, o in obj.items() if k != '$ref'}
199
+
200
+ # Process all entries in the dictionary
201
+ return {k: _resolve_refs(v) for k, v in obj.items()}
202
+ elif isinstance(obj, list):
203
+ return [_resolve_refs(item) for item in obj]
204
+ else:
205
+ return obj
206
+
207
+ return cast(Dict[str, Any], _resolve_refs(schema))
208
+ return cast(Dict[str, Any], _resolve_refs(schema))
evalscope/utils/logger.py CHANGED
@@ -1,18 +1,27 @@
1
+ import colorlog
1
2
  import importlib.util as iutil
2
3
  import logging
3
4
  import os
4
- from typing import Optional
5
+ from logging import Logger
6
+ from typing import List, Optional
5
7
 
6
8
  init_loggers = {}
9
+ # Define log formats
10
+ data_format = '%Y-%m-%d %H:%M:%S'
11
+ # For console output
12
+ color_detailed_format = '%(asctime)s - %(name)s - %(filename)s - %(funcName)s - %(lineno)d - %(log_color)s%(levelname)s%(reset)s: %(message)s' # noqa:E501
13
+ color_simple_format = '%(asctime)s - %(name)s - %(log_color)s%(levelname)s%(reset)s: %(message)s'
14
+ color_detailed_formatter = colorlog.ColoredFormatter(color_detailed_format, datefmt=data_format)
15
+ color_simple_formatter = colorlog.ColoredFormatter(color_simple_format, datefmt=data_format)
16
+ # For file output
17
+ detailed_format = '%(asctime)s - %(name)s - %(filename)s - %(funcName)s - %(lineno)d - %(levelname)s: %(message)s' # noqa:E501
18
+ simple_format = '%(asctime)s - %(name)s - %(levelname)s: %(message)s'
19
+ plain_detailed_formatter = logging.Formatter(detailed_format, datefmt=data_format)
20
+ plain_simple_formatter = logging.Formatter(simple_format, datefmt=data_format)
7
21
 
8
- detailed_format = '%(asctime)s - %(name)s - %(filename)s - %(funcName)s - %(lineno)d - %(levelname)s - %(message)s'
9
- simple_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
10
-
11
- detailed_formatter = logging.Formatter(detailed_format)
12
- simple_formatter = logging.Formatter(simple_format)
13
22
  DEFAULT_LEVEL = logging.DEBUG if os.getenv('EVALSCOPE_LOG_LEVEL', 'INFO') == 'DEBUG' else logging.INFO
14
23
 
15
- logging.basicConfig(format=simple_format, level=DEFAULT_LEVEL, force=True)
24
+ logging.basicConfig(format=simple_format, level=logging.INFO, force=True)
16
25
 
17
26
  # set logging level
18
27
  logging.getLogger('datasets').setLevel(logging.WARNING)
@@ -20,7 +29,13 @@ logging.getLogger('httpx').setLevel(logging.WARNING)
20
29
  logging.getLogger('modelscope').setLevel(logging.ERROR)
21
30
 
22
31
 
23
- def get_logger(log_file: Optional[str] = None, log_level: int = DEFAULT_LEVEL, file_mode: str = 'w', force=False):
32
+ def get_logger(
33
+ log_file: Optional[str] = None,
34
+ name: Optional[str] = None,
35
+ log_level: int = DEFAULT_LEVEL,
36
+ file_mode: str = 'w',
37
+ force=False
38
+ ):
24
39
  """Get logging logger
25
40
 
26
41
  Args:
@@ -31,7 +46,10 @@ def get_logger(log_file: Optional[str] = None, log_level: int = DEFAULT_LEVEL, f
31
46
  specified (if filemode is unspecified, it defaults to 'w').
32
47
  """
33
48
 
34
- logger_name = __name__.split('.')[0]
49
+ if name:
50
+ logger_name = f"evalscope.{name.split('.')[-1]}"
51
+ else:
52
+ logger_name = 'evalscope'
35
53
  logger = logging.getLogger(logger_name)
36
54
  logger.propagate = False
37
55
 
@@ -40,7 +58,15 @@ def get_logger(log_file: Optional[str] = None, log_level: int = DEFAULT_LEVEL, f
40
58
  logger.setLevel(log_level)
41
59
  for handler in logger.handlers:
42
60
  handler.setLevel(log_level)
43
- handler.setFormatter(detailed_formatter if log_level == logging.DEBUG else simple_formatter)
61
+ # 区分不同类型的 handler,使用相应的格式化器
62
+ if isinstance(handler, logging.FileHandler):
63
+ handler.setFormatter(
64
+ plain_detailed_formatter if log_level == logging.DEBUG else plain_simple_formatter
65
+ )
66
+ else:
67
+ handler.setFormatter(
68
+ color_detailed_formatter if log_level == logging.DEBUG else color_simple_formatter
69
+ )
44
70
  add_file_handler_if_needed(logger, log_file, file_mode, log_level)
45
71
  return logger
46
72
 
@@ -66,7 +92,11 @@ def get_logger(log_file: Optional[str] = None, log_level: int = DEFAULT_LEVEL, f
66
92
  handlers.append(file_handler)
67
93
 
68
94
  for handler in handlers:
69
- handler.setFormatter(detailed_formatter if log_level == logging.DEBUG else simple_formatter)
95
+ # 区分不同类型的 handler,使用相应的格式化器
96
+ if isinstance(handler, logging.FileHandler):
97
+ handler.setFormatter(plain_detailed_formatter if log_level == logging.DEBUG else plain_simple_formatter)
98
+ else:
99
+ handler.setFormatter(color_detailed_formatter if log_level == logging.DEBUG else color_simple_formatter)
70
100
  handler.setLevel(log_level)
71
101
  logger.addHandler(handler)
72
102
 
@@ -102,6 +132,15 @@ def add_file_handler_if_needed(logger, log_file, file_mode, log_level):
102
132
 
103
133
  if is_worker0 and log_file is not None:
104
134
  file_handler = logging.FileHandler(log_file, file_mode)
105
- file_handler.setFormatter(detailed_formatter if log_level == logging.DEBUG else simple_formatter)
135
+ file_handler.setFormatter(plain_detailed_formatter if log_level == logging.DEBUG else plain_simple_formatter)
106
136
  file_handler.setLevel(log_level)
107
137
  logger.addHandler(file_handler)
138
+
139
+
140
+ def warn_once(logger: Logger, message: str) -> None:
141
+ if message not in _warned:
142
+ logger.warning(message)
143
+ _warned.append(message)
144
+
145
+
146
+ _warned: List[str] = []