evalscope 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalscope/api/benchmark/__init__.py +9 -1
- evalscope/api/benchmark/adapters/__init__.py +4 -0
- evalscope/api/benchmark/adapters/agent_adapter.py +8 -0
- evalscope/api/benchmark/adapters/default_data_adapter.py +75 -4
- evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
- evalscope/api/benchmark/adapters/multi_choice_adapter.py +5 -2
- evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
- evalscope/api/benchmark/adapters/text2image_adapter.py +12 -10
- evalscope/api/benchmark/adapters/vision_language_adapter.py +8 -0
- evalscope/api/benchmark/benchmark.py +85 -2
- evalscope/api/benchmark/meta.py +10 -1
- evalscope/api/dataset/dataset.py +27 -6
- evalscope/api/dataset/loader.py +8 -3
- evalscope/api/evaluator/cache.py +31 -4
- evalscope/api/evaluator/evaluator.py +5 -0
- evalscope/api/evaluator/state.py +17 -1
- evalscope/api/messages/__init__.py +1 -0
- evalscope/api/messages/chat_message.py +52 -2
- evalscope/api/metric/__init__.py +1 -1
- evalscope/api/metric/metric.py +6 -1
- evalscope/api/metric/scorer.py +15 -7
- evalscope/api/mixin/__init__.py +1 -1
- evalscope/api/mixin/llm_judge_mixin.py +2 -0
- evalscope/api/mixin/sandbox_mixin.py +182 -0
- evalscope/api/model/generate_config.py +10 -6
- evalscope/api/model/model.py +5 -2
- evalscope/api/tool/tool_info.py +1 -1
- evalscope/app/app.py +3 -0
- evalscope/app/ui/multi_model.py +6 -1
- evalscope/app/ui/single_model.py +11 -5
- evalscope/app/utils/data_utils.py +8 -7
- evalscope/app/utils/env_utils.py +12 -0
- evalscope/app/utils/text_utils.py +14 -12
- evalscope/app/utils/visualization.py +2 -2
- evalscope/arguments.py +8 -4
- evalscope/backend/opencompass/backend_manager.py +0 -2
- evalscope/backend/rag_eval/utils/embedding.py +9 -1
- evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
- evalscope/benchmarks/ai2d/ai2d_adapter.py +54 -0
- evalscope/benchmarks/aime/aime24_adapter.py +5 -0
- evalscope/benchmarks/aime/aime25_adapter.py +136 -1
- evalscope/benchmarks/aime/grader.py +307 -0
- evalscope/benchmarks/aime/math_normalize.py +189 -0
- evalscope/benchmarks/amc/amc_adapter.py +51 -0
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -0
- evalscope/benchmarks/bbh/bbh_adapter.py +43 -17
- evalscope/benchmarks/bfcl/{bfcl_adapter.py → v3/bfcl_v3_adapter.py} +131 -19
- evalscope/benchmarks/bfcl/{generation.py → v3/generation.py} +9 -9
- evalscope/benchmarks/bfcl/v3/utils.py +23 -0
- evalscope/benchmarks/bfcl/v4/__init__.py +0 -0
- evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py +229 -0
- evalscope/benchmarks/bfcl/v4/utils.py +410 -0
- evalscope/benchmarks/biomix_qa/__init__.py +0 -0
- evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py +36 -0
- evalscope/benchmarks/blink/__init__.py +0 -0
- evalscope/benchmarks/blink/blink_adapter.py +61 -0
- evalscope/benchmarks/ceval/ceval_adapter.py +1 -2
- evalscope/benchmarks/chartqa/__init__.py +0 -0
- evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
- evalscope/benchmarks/chartqa/utils.py +38 -0
- evalscope/benchmarks/coin_flip/__init__.py +0 -0
- evalscope/benchmarks/coin_flip/coin_flip_adapter.py +128 -0
- evalscope/benchmarks/commonsense_qa/__init__.py +0 -0
- evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py +32 -0
- evalscope/benchmarks/competition_math/competition_math_adapter.py +5 -0
- evalscope/benchmarks/data_collection/data_collection_adapter.py +24 -19
- evalscope/benchmarks/docvqa/__init__.py +0 -0
- evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
- evalscope/benchmarks/drivelology/__init__.py +0 -0
- evalscope/benchmarks/drivelology/drivelology_binary_adapter.py +170 -0
- evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py +254 -0
- evalscope/benchmarks/drivelology/drivelology_selection_adapter.py +49 -0
- evalscope/benchmarks/drivelology/drivelology_writing_adapter.py +218 -0
- evalscope/benchmarks/drop/drop_adapter.py +15 -44
- evalscope/benchmarks/drop/utils.py +97 -0
- evalscope/benchmarks/frames/frames_adapter.py +2 -1
- evalscope/benchmarks/general_arena/general_arena_adapter.py +7 -2
- evalscope/benchmarks/general_arena/utils.py +2 -1
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
- evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py +25 -9
- evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
- evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +159 -0
- evalscope/benchmarks/halu_eval/__init__.py +0 -0
- evalscope/benchmarks/halu_eval/halu_eval_adapter.py +128 -0
- evalscope/benchmarks/halu_eval/halu_eval_instructions.py +84 -0
- evalscope/benchmarks/healthbench/__init__.py +0 -0
- evalscope/benchmarks/healthbench/healthbench_adapter.py +282 -0
- evalscope/benchmarks/healthbench/utils.py +102 -0
- evalscope/benchmarks/hle/hle_adapter.py +3 -2
- evalscope/benchmarks/humaneval/humaneval_adapter.py +24 -52
- evalscope/benchmarks/humaneval/utils.py +235 -0
- evalscope/benchmarks/ifeval/instructions_util.py +2 -3
- evalscope/benchmarks/image_edit/__init__.py +0 -0
- evalscope/benchmarks/image_edit/gedit/__init__.py +0 -0
- evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
- evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
- evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
- evalscope/benchmarks/infovqa/__init__.py +0 -0
- evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
- evalscope/benchmarks/live_code_bench/evaluate_utils.py +13 -6
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +66 -54
- evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py +220 -0
- evalscope/benchmarks/logi_qa/__int__.py +0 -0
- evalscope/benchmarks/logi_qa/logi_qa_adapter.py +41 -0
- evalscope/benchmarks/math_500/math_500_adapter.py +5 -1
- evalscope/benchmarks/math_qa/__init__.py +0 -0
- evalscope/benchmarks/math_qa/math_qa_adapter.py +35 -0
- evalscope/benchmarks/math_verse/__init__.py +0 -0
- evalscope/benchmarks/math_verse/math_verse_adapter.py +105 -0
- evalscope/benchmarks/math_vision/__init__.py +0 -0
- evalscope/benchmarks/math_vision/math_vision_adapter.py +116 -0
- evalscope/benchmarks/math_vista/__init__.py +0 -0
- evalscope/benchmarks/math_vista/math_vista_adapter.py +114 -0
- evalscope/benchmarks/med_mcqa/__init__.py +0 -0
- evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py +32 -0
- evalscope/benchmarks/minerva_math/__init__.py +0 -0
- evalscope/benchmarks/minerva_math/minerva_math_adapter.py +53 -0
- evalscope/benchmarks/mm_bench/__init__.py +0 -0
- evalscope/benchmarks/mm_bench/mm_bench_adapter.py +99 -0
- evalscope/benchmarks/mm_star/__init__.py +0 -0
- evalscope/benchmarks/mm_star/mm_star_adapter.py +73 -0
- evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +1 -1
- evalscope/benchmarks/mmmu/__init__.py +0 -0
- evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
- evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
- evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +124 -0
- evalscope/benchmarks/mri_mcqa/__init__.py +0 -0
- evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py +34 -0
- evalscope/benchmarks/multi_if/__init__.py +0 -0
- evalscope/benchmarks/multi_if/ifeval.py +3354 -0
- evalscope/benchmarks/multi_if/metrics.py +120 -0
- evalscope/benchmarks/multi_if/multi_if_adapter.py +161 -0
- evalscope/benchmarks/music_trivia/__init__.py +0 -0
- evalscope/benchmarks/music_trivia/music_trivia_adapter.py +36 -0
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +7 -6
- evalscope/benchmarks/ner/__init__.py +0 -0
- evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
- evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
- evalscope/benchmarks/ner/copious_adapter.py +85 -0
- evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
- evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
- evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
- evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
- evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
- evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
- evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
- evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
- evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
- evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
- evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
- evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
- evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
- evalscope/benchmarks/ocr_bench/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py +101 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py +87 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py +963 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py +50 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py +46 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt +26 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py +481 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py +179 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py +433 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py +254 -0
- evalscope/benchmarks/olympiad_bench/__init__.py +0 -0
- evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py +163 -0
- evalscope/benchmarks/olympiad_bench/utils.py +565 -0
- evalscope/benchmarks/omni_bench/__init__.py +0 -0
- evalscope/benchmarks/omni_bench/omni_bench_adapter.py +86 -0
- evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
- evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
- evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
- evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
- evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
- evalscope/benchmarks/piqa/__init__.py +0 -0
- evalscope/benchmarks/piqa/piqa_adapter.py +32 -0
- evalscope/benchmarks/poly_math/__init__.py +0 -0
- evalscope/benchmarks/poly_math/poly_math_adapter.py +132 -0
- evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
- evalscope/benchmarks/pope/__init__.py +0 -0
- evalscope/benchmarks/pope/pope_adapter.py +112 -0
- evalscope/benchmarks/process_bench/process_bench_adapter.py +1 -0
- evalscope/benchmarks/pumed_qa/__init__.py +0 -0
- evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py +175 -0
- evalscope/benchmarks/qasc/__init__.py +0 -0
- evalscope/benchmarks/qasc/qasc_adapter.py +35 -0
- evalscope/benchmarks/real_world_qa/__init__.py +0 -0
- evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py +64 -0
- evalscope/benchmarks/sciq/__init__.py +0 -0
- evalscope/benchmarks/sciq/sciq_adapter.py +36 -0
- evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
- evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -1
- evalscope/benchmarks/simple_vqa/__init__.py +0 -0
- evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
- evalscope/benchmarks/siqa/__init__.py +0 -0
- evalscope/benchmarks/siqa/siqa_adapter.py +39 -0
- evalscope/benchmarks/tau_bench/tau2_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/tau2_bench/generation.py +158 -0
- evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py +146 -0
- evalscope/benchmarks/tau_bench/tau_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/{generation.py → tau_bench/generation.py} +1 -1
- evalscope/benchmarks/tau_bench/{tau_bench_adapter.py → tau_bench/tau_bench_adapter.py} +29 -29
- evalscope/benchmarks/text2image/__init__.py +0 -0
- evalscope/benchmarks/{aigc/t2i → text2image}/evalmuse_adapter.py +3 -1
- evalscope/benchmarks/{aigc/t2i → text2image}/genai_bench_adapter.py +2 -2
- evalscope/benchmarks/{aigc/t2i → text2image}/general_t2i_adapter.py +1 -1
- evalscope/benchmarks/{aigc/t2i → text2image}/hpdv2_adapter.py +7 -2
- evalscope/benchmarks/{aigc/t2i → text2image}/tifa_adapter.py +1 -0
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py +3 -3
- evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +1 -2
- evalscope/benchmarks/visu_logic/__init__.py +0 -0
- evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
- evalscope/benchmarks/wmt/__init__.py +0 -0
- evalscope/benchmarks/wmt/wmt24_adapter.py +294 -0
- evalscope/benchmarks/zerobench/__init__.py +0 -0
- evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
- evalscope/cli/start_app.py +7 -1
- evalscope/cli/start_perf.py +7 -1
- evalscope/config.py +103 -18
- evalscope/constants.py +18 -0
- evalscope/evaluator/evaluator.py +138 -82
- evalscope/metrics/bert_score/__init__.py +0 -0
- evalscope/metrics/bert_score/scorer.py +338 -0
- evalscope/metrics/bert_score/utils.py +697 -0
- evalscope/metrics/llm_judge.py +19 -7
- evalscope/metrics/math_parser.py +14 -0
- evalscope/metrics/metric.py +317 -13
- evalscope/metrics/metrics.py +37 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +2 -6
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +2 -6
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +2 -6
- evalscope/models/image_edit_model.py +125 -0
- evalscope/models/model_apis.py +22 -0
- evalscope/models/openai_compatible.py +21 -0
- evalscope/models/text2image_model.py +2 -2
- evalscope/models/utils/openai.py +16 -6
- evalscope/perf/arguments.py +26 -4
- evalscope/perf/benchmark.py +76 -89
- evalscope/perf/http_client.py +31 -16
- evalscope/perf/main.py +15 -2
- evalscope/perf/plugin/api/base.py +9 -7
- evalscope/perf/plugin/api/custom_api.py +13 -58
- evalscope/perf/plugin/api/default_api.py +188 -79
- evalscope/perf/plugin/api/openai_api.py +85 -20
- evalscope/perf/plugin/datasets/base.py +21 -0
- evalscope/perf/plugin/datasets/custom.py +2 -3
- evalscope/perf/plugin/datasets/flickr8k.py +2 -2
- evalscope/perf/plugin/datasets/kontext_bench.py +2 -2
- evalscope/perf/plugin/datasets/line_by_line.py +2 -3
- evalscope/perf/plugin/datasets/longalpaca.py +2 -3
- evalscope/perf/plugin/datasets/openqa.py +2 -4
- evalscope/perf/plugin/datasets/random_dataset.py +1 -3
- evalscope/perf/plugin/datasets/random_vl_dataset.py +2 -2
- evalscope/perf/utils/benchmark_util.py +43 -27
- evalscope/perf/utils/db_util.py +14 -19
- evalscope/perf/utils/local_server.py +3 -44
- evalscope/perf/utils/log_utils.py +21 -6
- evalscope/report/__init__.py +13 -3
- evalscope/report/combinator.py +91 -20
- evalscope/report/generator.py +8 -87
- evalscope/report/report.py +8 -4
- evalscope/run.py +13 -5
- evalscope/third_party/toolbench_static/llm/swift_infer.py +0 -4
- evalscope/utils/argument_utils.py +1 -1
- evalscope/utils/chat_service.py +1 -1
- evalscope/utils/function_utils.py +249 -12
- evalscope/utils/import_utils.py +73 -1
- evalscope/utils/io_utils.py +132 -7
- evalscope/utils/json_schema.py +25 -2
- evalscope/utils/logger.py +69 -18
- evalscope/utils/model_utils.py +4 -3
- evalscope/utils/multi_choices.py +39 -7
- evalscope/utils/ner.py +377 -0
- evalscope/version.py +2 -2
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/METADATA +252 -408
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/RECORD +290 -154
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/WHEEL +1 -1
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/top_level.txt +0 -1
- evalscope/api/mixin/dataset_mixin.py +0 -105
- evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +0 -44
- tests/__init__.py +0 -1
- tests/aigc/__init__.py +0 -1
- tests/aigc/test_t2i.py +0 -142
- tests/benchmark/__init__.py +0 -1
- tests/benchmark/test_eval.py +0 -386
- tests/cli/__init__.py +0 -1
- tests/cli/test_all.py +0 -229
- tests/cli/test_collection.py +0 -96
- tests/cli/test_custom.py +0 -268
- tests/perf/__init__.py +0 -1
- tests/perf/test_perf.py +0 -176
- tests/rag/test_clip_benchmark.py +0 -90
- tests/rag/test_mteb.py +0 -213
- tests/rag/test_ragas.py +0 -128
- tests/swift/__init__.py +0 -1
- tests/swift/test_run_swift_eval.py +0 -146
- tests/swift/test_run_swift_vlm_eval.py +0 -128
- tests/swift/test_run_swift_vlm_jugde_eval.py +0 -157
- tests/test_run_all.py +0 -12
- tests/utils.py +0 -13
- tests/vlm/__init__.py +0 -1
- tests/vlm/test_vlmeval.py +0 -102
- /evalscope/benchmarks/{aigc → aa_lcr}/__init__.py +0 -0
- /evalscope/benchmarks/{aigc/i2i → ai2d}/__init__.py +0 -0
- /evalscope/benchmarks/{aigc/t2i → amc}/__init__.py +0 -0
- {tests/rag → evalscope/benchmarks/bfcl/v3}/__init__.py +0 -0
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/entry_points.txt +0 -0
- {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,29 +1,266 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import threading
|
|
3
|
+
import time
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, wait
|
|
5
|
+
from contextlib import contextmanager
|
|
2
6
|
from functools import wraps
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
from typing import Any, Awaitable, Callable, List, Optional, Sequence, TypeVar, Union
|
|
3
9
|
|
|
10
|
+
from evalscope.utils.logger import get_logger
|
|
4
11
|
|
|
5
|
-
|
|
6
|
-
"""Decorator to ensure a function is only run once."""
|
|
7
|
-
has_run = False
|
|
8
|
-
result = None
|
|
12
|
+
logger = get_logger()
|
|
9
13
|
|
|
14
|
+
T = TypeVar('T')
|
|
15
|
+
R = TypeVar('R')
|
|
16
|
+
|
|
17
|
+
# Global lock to safely create per-instance locks in decorators
|
|
18
|
+
_THREAD_SAFE_GLOBAL_LOCK = threading.RLock()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def thread_safe(func: Callable[..., T]) -> Callable[..., T]:
|
|
22
|
+
"""Thread-safe decorator.
|
|
23
|
+
- If decorating a bound method, uses a per-instance, per-method lock.
|
|
24
|
+
- If decorating a function, uses a function-scoped lock.
|
|
25
|
+
"""
|
|
26
|
+
func_lock = threading.RLock()
|
|
27
|
+
lock_attr_name = f'__lock_{func.__name__}'
|
|
28
|
+
|
|
29
|
+
@wraps(func)
|
|
10
30
|
def wrapper(*args, **kwargs):
|
|
11
|
-
|
|
12
|
-
if
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
31
|
+
# Prefer per-instance lock if the first arg looks like 'self'
|
|
32
|
+
if args and hasattr(args[0], '__dict__'):
|
|
33
|
+
self_obj = args[0]
|
|
34
|
+
lock = getattr(self_obj, lock_attr_name, None)
|
|
35
|
+
if lock is None:
|
|
36
|
+
with _THREAD_SAFE_GLOBAL_LOCK:
|
|
37
|
+
lock = getattr(self_obj, lock_attr_name, None)
|
|
38
|
+
if lock is None:
|
|
39
|
+
lock = threading.RLock()
|
|
40
|
+
setattr(self_obj, lock_attr_name, lock)
|
|
41
|
+
else:
|
|
42
|
+
lock = func_lock
|
|
43
|
+
|
|
44
|
+
with lock:
|
|
45
|
+
return func(*args, **kwargs)
|
|
16
46
|
|
|
17
47
|
return wrapper
|
|
18
48
|
|
|
19
49
|
|
|
20
|
-
def
|
|
21
|
-
"""
|
|
50
|
+
def run_once(func: Callable[..., T]) -> Callable[..., T]:
|
|
51
|
+
"""Decorator to ensure a function is executed at most once across threads."""
|
|
22
52
|
lock = threading.RLock()
|
|
53
|
+
has_run: bool = False
|
|
54
|
+
result: Optional[T] = None
|
|
23
55
|
|
|
24
56
|
@wraps(func)
|
|
25
57
|
def wrapper(*args, **kwargs):
|
|
58
|
+
nonlocal has_run, result
|
|
59
|
+
if has_run:
|
|
60
|
+
return result
|
|
61
|
+
# Double-checked locking to avoid redundant locking on hot path
|
|
26
62
|
with lock:
|
|
27
|
-
|
|
63
|
+
if not has_run:
|
|
64
|
+
result = func(*args, **kwargs)
|
|
65
|
+
has_run = True
|
|
66
|
+
return result
|
|
28
67
|
|
|
29
68
|
return wrapper
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def retry_func(retries=3, sleep_interval=0):
|
|
72
|
+
"""A decorator that retries a function call up to `retries` times if an exception occurs."""
|
|
73
|
+
|
|
74
|
+
def decorator(func):
|
|
75
|
+
|
|
76
|
+
@wraps(func)
|
|
77
|
+
def wrapper(*args, **kwargs):
|
|
78
|
+
last_exception = None
|
|
79
|
+
for attempt in range(retries):
|
|
80
|
+
try:
|
|
81
|
+
return func(*args, **kwargs)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
last_exception = e
|
|
84
|
+
if sleep_interval > 0:
|
|
85
|
+
time.sleep(sleep_interval)
|
|
86
|
+
raise last_exception
|
|
87
|
+
|
|
88
|
+
return wrapper
|
|
89
|
+
|
|
90
|
+
return decorator
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@contextmanager
|
|
94
|
+
def retry_context(retries=3, sleep_interval=0):
|
|
95
|
+
"""A context manager that retries the code block up to `retries` times if an exception occurs."""
|
|
96
|
+
last_exception = None
|
|
97
|
+
for attempt in range(retries):
|
|
98
|
+
try:
|
|
99
|
+
yield
|
|
100
|
+
return # If no exception, exit successfully
|
|
101
|
+
except Exception as e:
|
|
102
|
+
last_exception = e
|
|
103
|
+
if sleep_interval > 0:
|
|
104
|
+
time.sleep(sleep_interval)
|
|
105
|
+
if attempt == retries - 1: # Last attempt
|
|
106
|
+
break
|
|
107
|
+
raise last_exception
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class AsyncioLoopRunner:
|
|
111
|
+
"""Singleton background asyncio loop runner for sync→async bridging."""
|
|
112
|
+
_instance: Optional['AsyncioLoopRunner'] = None
|
|
113
|
+
_inst_lock = threading.Lock()
|
|
114
|
+
|
|
115
|
+
def __init__(self) -> None:
|
|
116
|
+
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
117
|
+
self._thread: Optional[threading.Thread] = None
|
|
118
|
+
self._start_loop()
|
|
119
|
+
|
|
120
|
+
def _start_loop(self) -> None:
|
|
121
|
+
loop = asyncio.new_event_loop()
|
|
122
|
+
self._loop = loop
|
|
123
|
+
|
|
124
|
+
def run_loop() -> None:
|
|
125
|
+
asyncio.set_event_loop(loop)
|
|
126
|
+
loop.run_forever()
|
|
127
|
+
|
|
128
|
+
self._thread = threading.Thread(target=run_loop, daemon=True, name='AsyncioLoopRunner')
|
|
129
|
+
self._thread.start()
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def instance(cls) -> 'AsyncioLoopRunner':
|
|
133
|
+
if cls._instance is not None:
|
|
134
|
+
return cls._instance
|
|
135
|
+
with cls._inst_lock:
|
|
136
|
+
if cls._instance is None:
|
|
137
|
+
cls._instance = AsyncioLoopRunner()
|
|
138
|
+
return cls._instance
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def run(cls, coro: Awaitable[T], timeout: Optional[float] = None) -> T:
|
|
142
|
+
"""Submit a coroutine to the background loop and wait for result."""
|
|
143
|
+
inst = cls.instance()
|
|
144
|
+
fut = asyncio.run_coroutine_threadsafe(coro, inst._loop)
|
|
145
|
+
return fut.result(timeout=timeout)
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def loop(self) -> Optional[asyncio.AbstractEventLoop]:
|
|
149
|
+
"""Access the underlying event loop (read-only use)."""
|
|
150
|
+
return self._loop
|
|
151
|
+
|
|
152
|
+
def stop(self, join_timeout: float = 5.0) -> None:
|
|
153
|
+
"""Optional shutdown of the background loop (generally not needed)."""
|
|
154
|
+
if not self._loop:
|
|
155
|
+
return
|
|
156
|
+
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
157
|
+
if self._thread:
|
|
158
|
+
self._thread.join(timeout=join_timeout)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def run_in_threads_with_progress(
|
|
162
|
+
items: Sequence[T],
|
|
163
|
+
worker: Callable[[T], R],
|
|
164
|
+
*,
|
|
165
|
+
desc: str,
|
|
166
|
+
max_workers: int,
|
|
167
|
+
heartbeat_sec: int,
|
|
168
|
+
on_result: Optional[Callable[[T, R], None]] = None,
|
|
169
|
+
on_error: Optional[Callable[[T, Exception], None]] = None,
|
|
170
|
+
filter_none_results: bool = False,
|
|
171
|
+
) -> List[R]:
|
|
172
|
+
"""
|
|
173
|
+
Execute a collection of tasks concurrently with a ThreadPoolExecutor while
|
|
174
|
+
displaying a tqdm progress bar and emitting periodic heartbeat logs.
|
|
175
|
+
|
|
176
|
+
Key behaviors:
|
|
177
|
+
- Concurrency: Uses up to `min(len(items), max_workers)` threads.
|
|
178
|
+
- Progress: A tqdm bar advances when each task finishes (success or failure).
|
|
179
|
+
- Heartbeat: If no tasks finish within `heartbeat_sec`, a status line is logged.
|
|
180
|
+
- Ordering: Results are appended in completion order (not the original order).
|
|
181
|
+
- Error handling:
|
|
182
|
+
* If `on_error` is provided, it is called for each failed item; execution continues
|
|
183
|
+
unless `on_error` itself raises.
|
|
184
|
+
* If `on_error` is None, the first exception is raised immediately and stops processing.
|
|
185
|
+
- Callbacks:
|
|
186
|
+
* `on_result(item, result)` is called after a successful result is obtained.
|
|
187
|
+
* Both callbacks run in the main thread (not worker threads).
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
items: A sequence of items (inputs) to process. Converted to a list internally.
|
|
191
|
+
worker: A callable executed in threads to process a single item and return a result.
|
|
192
|
+
desc: A short text shown as the tqdm progress bar description.
|
|
193
|
+
max_workers: Upper bound on the number of concurrent threads.
|
|
194
|
+
heartbeat_sec: Interval (in seconds) to wait before emitting a heartbeat log if
|
|
195
|
+
no tasks complete in that window.
|
|
196
|
+
on_result: Optional callback invoked as on_result(item, result) after success.
|
|
197
|
+
on_error: Optional callback invoked as on_error(item, exception) on failure. If omitted,
|
|
198
|
+
the exception is propagated and the function terminates early.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
A list of results collected as tasks complete (completion order).
|
|
202
|
+
If some tasks fail and `on_error` is provided (and does not re-raise), those failures
|
|
203
|
+
are skipped and not included in the returned results.
|
|
204
|
+
|
|
205
|
+
Raises:
|
|
206
|
+
Exception: Propagates the first task exception if `on_error` is not provided, or if
|
|
207
|
+
`on_error` re-raises.
|
|
208
|
+
|
|
209
|
+
Notes:
|
|
210
|
+
- The function is blocking until all tasks complete or an exception is propagated.
|
|
211
|
+
- Use `on_error` to implement "best-effort" processing where failures are logged
|
|
212
|
+
and the rest continue.
|
|
213
|
+
"""
|
|
214
|
+
# Defensive copy to avoid consuming a generator multiple times and to compute pool size.
|
|
215
|
+
pending_items: List[T] = list(items)
|
|
216
|
+
if not pending_items:
|
|
217
|
+
return []
|
|
218
|
+
|
|
219
|
+
# Include indices to ensure results are returned in input order
|
|
220
|
+
indexed_items = list(enumerate(items))
|
|
221
|
+
results: List[Optional[R]] = [None] * len(items) # Preallocate results list
|
|
222
|
+
|
|
223
|
+
# Bound the pool by actual workload size for efficiency.
|
|
224
|
+
with ThreadPoolExecutor(max_workers=min(len(indexed_items), max_workers)) as executor:
|
|
225
|
+
# Submit all tasks up-front and map futures back to their originating item.
|
|
226
|
+
future_to_index = {executor.submit(worker, item): index for index, item in indexed_items}
|
|
227
|
+
|
|
228
|
+
# Progress bar reflects total number of submitted tasks; updated per finished future.
|
|
229
|
+
with tqdm(total=len(indexed_items), desc=desc, mininterval=1, dynamic_ncols=True) as pbar:
|
|
230
|
+
# Track unfinished futures and poll with a timeout to enable heartbeat logs.
|
|
231
|
+
pending = set(future_to_index.keys())
|
|
232
|
+
while pending:
|
|
233
|
+
# Wait with timeout to detect stalls and emit heartbeats proactively.
|
|
234
|
+
done, not_done = wait(pending, timeout=heartbeat_sec)
|
|
235
|
+
if not done:
|
|
236
|
+
# Heartbeat when nothing has completed within the window.
|
|
237
|
+
logger.info(f'{desc} still processing... pending={len(not_done)}')
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Consume completed futures.
|
|
241
|
+
for future in done:
|
|
242
|
+
index = future_to_index[future]
|
|
243
|
+
try:
|
|
244
|
+
res = future.result()
|
|
245
|
+
results[index] = res # Store result at the correct index
|
|
246
|
+
# Invoke success callback in caller thread (not in worker).
|
|
247
|
+
if on_result is not None:
|
|
248
|
+
on_result(items[index], res)
|
|
249
|
+
except Exception as exc:
|
|
250
|
+
# Delegate failure handling to on_error if provided; otherwise bubble up.
|
|
251
|
+
if on_error is not None:
|
|
252
|
+
on_error(items[index], exc)
|
|
253
|
+
else:
|
|
254
|
+
raise
|
|
255
|
+
finally:
|
|
256
|
+
# Always advance progress for completed futures (success or failure).
|
|
257
|
+
pbar.update(1)
|
|
258
|
+
|
|
259
|
+
# Continue polling remaining futures.
|
|
260
|
+
pending = not_done
|
|
261
|
+
|
|
262
|
+
# Return results, which are now guaranteed to be in input order
|
|
263
|
+
if filter_none_results:
|
|
264
|
+
# Filter out None results if on_error was used and some tasks failed
|
|
265
|
+
results = [res for res in results if res is not None]
|
|
266
|
+
return results
|
evalscope/utils/import_utils.py
CHANGED
|
@@ -5,13 +5,85 @@ import importlib
|
|
|
5
5
|
import os
|
|
6
6
|
from itertools import chain
|
|
7
7
|
from types import ModuleType
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any, Optional, Union
|
|
9
9
|
|
|
10
|
+
from evalscope.constants import IS_BUILD_DOC
|
|
10
11
|
from .logger import get_logger
|
|
11
12
|
|
|
12
13
|
logger = get_logger() # pylint: disable=invalid-name
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
def check_import(
|
|
17
|
+
module_name: Union[str, list[str]],
|
|
18
|
+
package: Optional[Union[str, list[str]]] = None,
|
|
19
|
+
raise_warning: bool = True,
|
|
20
|
+
raise_error: bool = False,
|
|
21
|
+
feature_name: Optional[str] = 'this feature',
|
|
22
|
+
) -> bool:
|
|
23
|
+
"""Check if a module or list of modules can be imported.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
module_name (Union[str, list[str]]): The name(s) of the module(s) to check.
|
|
27
|
+
package (Union[str, list[str]], optional): The package(s) to install if the module(s) are not found.
|
|
28
|
+
Defaults to None.
|
|
29
|
+
raise_error (bool, optional): Whether to raise an error if any module is not found. Defaults to False.
|
|
30
|
+
raise_warning (bool, optional): Whether to log a warning if any module is not found. Defaults to True.
|
|
31
|
+
feature_name (str, optional): The feature name that requires the module(s). Used in the warning/error message.
|
|
32
|
+
Defaults to 'this feature'.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
bool: True if all modules can be imported, False otherwise.
|
|
36
|
+
"""
|
|
37
|
+
# Convert single strings to lists for uniform processing
|
|
38
|
+
if isinstance(module_name, str):
|
|
39
|
+
module_names = [module_name]
|
|
40
|
+
else:
|
|
41
|
+
module_names = module_name
|
|
42
|
+
|
|
43
|
+
if package is None:
|
|
44
|
+
packages = [None] * len(module_names)
|
|
45
|
+
elif isinstance(package, str):
|
|
46
|
+
packages = [package] * len(module_names)
|
|
47
|
+
else:
|
|
48
|
+
packages = package
|
|
49
|
+
# Ensure packages list has same length as module_names
|
|
50
|
+
if len(packages) < len(module_names):
|
|
51
|
+
packages.extend([None] * (len(module_names) - len(packages)))
|
|
52
|
+
|
|
53
|
+
missing_modules = []
|
|
54
|
+
missing_packages = []
|
|
55
|
+
|
|
56
|
+
for i, mod_name in enumerate(module_names):
|
|
57
|
+
try:
|
|
58
|
+
importlib.import_module(mod_name)
|
|
59
|
+
except ImportError:
|
|
60
|
+
missing_modules.append(mod_name)
|
|
61
|
+
if i < len(packages) and packages[i]:
|
|
62
|
+
missing_packages.append(packages[i])
|
|
63
|
+
|
|
64
|
+
if missing_modules:
|
|
65
|
+
if len(missing_modules) == 1:
|
|
66
|
+
error_msg = f'`{missing_modules[0]}` not found.'
|
|
67
|
+
else:
|
|
68
|
+
error_msg = f'The following modules are not found: {", ".join(f"`{mod}`" for mod in missing_modules)}.'
|
|
69
|
+
|
|
70
|
+
if missing_packages:
|
|
71
|
+
if len(missing_packages) == 1:
|
|
72
|
+
error_msg += f' Please run `pip install {missing_packages[0]}` to use {feature_name}.'
|
|
73
|
+
else:
|
|
74
|
+
unique_packages = list(dict.fromkeys(missing_packages)) # Remove duplicates while preserving order
|
|
75
|
+
error_msg += f' Please run `pip install {" ".join(unique_packages)}` to use {feature_name}.'
|
|
76
|
+
|
|
77
|
+
if raise_warning:
|
|
78
|
+
logger.warning(error_msg)
|
|
79
|
+
|
|
80
|
+
if not IS_BUILD_DOC and raise_error:
|
|
81
|
+
raise ImportError(error_msg)
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
|
|
15
87
|
class _LazyModule(ModuleType):
|
|
16
88
|
"""
|
|
17
89
|
Module class that surfaces all objects but only performs associated imports when the objects are requested.
|
evalscope/utils/io_utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import csv
|
|
3
3
|
import hashlib
|
|
4
|
+
import io
|
|
4
5
|
import json
|
|
5
6
|
import jsonlines as jsonl
|
|
6
7
|
import os
|
|
@@ -8,8 +9,10 @@ import re
|
|
|
8
9
|
import string
|
|
9
10
|
import unicodedata
|
|
10
11
|
import yaml
|
|
12
|
+
from datetime import datetime
|
|
11
13
|
from io import BytesIO
|
|
12
14
|
from PIL import Image
|
|
15
|
+
from typing import Tuple
|
|
13
16
|
|
|
14
17
|
from evalscope.constants import DumpMode
|
|
15
18
|
from evalscope.utils.logger import get_logger
|
|
@@ -122,6 +125,9 @@ def dump_jsonl_data(data_list, jsonl_file, dump_mode=DumpMode.OVERWRITE):
|
|
|
122
125
|
if not isinstance(data_list, list):
|
|
123
126
|
data_list = [data_list]
|
|
124
127
|
|
|
128
|
+
# Convert non-serializable types to serializable ones
|
|
129
|
+
data_list = convert_normal_types(data_list)
|
|
130
|
+
|
|
125
131
|
if dump_mode == DumpMode.OVERWRITE:
|
|
126
132
|
dump_mode = 'w'
|
|
127
133
|
elif dump_mode == DumpMode.APPEND:
|
|
@@ -168,6 +174,24 @@ def csv_to_list(csv_file) -> list:
|
|
|
168
174
|
return res_list
|
|
169
175
|
|
|
170
176
|
|
|
177
|
+
def tsv_to_list(tsv_file) -> list:
|
|
178
|
+
"""
|
|
179
|
+
Read tsv file to list.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
tsv_file: tsv file path.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
list: list of lines. Each line is a dict.
|
|
186
|
+
"""
|
|
187
|
+
res_list = []
|
|
188
|
+
with open(tsv_file, 'r', encoding='utf-8') as f:
|
|
189
|
+
reader = csv.DictReader(f, delimiter='\t')
|
|
190
|
+
for row in reader:
|
|
191
|
+
res_list.append(row)
|
|
192
|
+
return res_list
|
|
193
|
+
|
|
194
|
+
|
|
171
195
|
def csv_to_jsonl(csv_file, jsonl_file):
|
|
172
196
|
"""
|
|
173
197
|
Convert csv file to jsonl file.
|
|
@@ -283,22 +307,64 @@ def get_valid_list(input_list, candidate_list):
|
|
|
283
307
|
[i for i in input_list if i not in candidate_list]
|
|
284
308
|
|
|
285
309
|
|
|
286
|
-
def PIL_to_base64(image: Image.Image, format: str = 'JPEG') -> str:
|
|
310
|
+
def PIL_to_base64(image: Image.Image, format: str = 'JPEG', add_header: bool = False) -> str:
|
|
287
311
|
"""
|
|
288
312
|
Convert a PIL Image to a base64 encoded string.
|
|
289
313
|
|
|
290
314
|
Args:
|
|
291
315
|
image (Image.Image): The PIL Image to convert.
|
|
292
316
|
format (str): The format to save the image in. Default is 'JPEG'.
|
|
317
|
+
add_header (bool): Whether to add the base64 header. Default is False.
|
|
318
|
+
|
|
293
319
|
Returns:
|
|
294
320
|
str: Base64 encoded string of the image.
|
|
295
321
|
"""
|
|
296
322
|
buffered = BytesIO()
|
|
297
323
|
image.save(buffered, format=format)
|
|
298
324
|
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
|
325
|
+
if add_header:
|
|
326
|
+
img_str = f'data:image/{format.lower()};base64,{img_str}'
|
|
299
327
|
return img_str
|
|
300
328
|
|
|
301
329
|
|
|
330
|
+
def bytes_to_base64(bytes_data: bytes, *, format: str = 'png', add_header: bool = False, content_type='image') -> str:
|
|
331
|
+
"""Convert bytes to a base64 encoded string.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
bytes_data (bytes): The bytes to convert.
|
|
335
|
+
format (str): The format of the image. Default is 'png'.
|
|
336
|
+
add_header (bool): Whether to add the base64 header. Default is False.
|
|
337
|
+
content_type (str): The type of the data, 'image' or 'audio'. Default is 'image'.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
str: Base64 encoded string of the bytes.
|
|
341
|
+
"""
|
|
342
|
+
base64_str = base64.b64encode(bytes_data).decode('utf-8')
|
|
343
|
+
if add_header:
|
|
344
|
+
base64_str = f'data:{content_type}/{format};base64,{base64_str}'
|
|
345
|
+
return base64_str
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def base64_to_PIL(base64_str):
|
|
349
|
+
"""Convert a base64 encoded string to a PIL Image.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
base64_str (str): The base64 encoded string.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Image.Image: The decoded PIL Image.
|
|
356
|
+
"""
|
|
357
|
+
# remove header
|
|
358
|
+
if ',' in base64_str:
|
|
359
|
+
base64_str = base64_str.split(',', 1)[1]
|
|
360
|
+
|
|
361
|
+
# decode
|
|
362
|
+
img_data = base64.b64decode(base64_str)
|
|
363
|
+
img_file = io.BytesIO(img_data)
|
|
364
|
+
img = Image.open(img_file)
|
|
365
|
+
return img
|
|
366
|
+
|
|
367
|
+
|
|
302
368
|
def safe_filename(s: str, max_length: int = 255) -> str:
|
|
303
369
|
"""
|
|
304
370
|
Convert a string into a safe filename by removing or replacing unsafe characters.
|
|
@@ -351,11 +417,13 @@ def safe_filename(s: str, max_length: int = 255) -> str:
|
|
|
351
417
|
return s
|
|
352
418
|
|
|
353
419
|
|
|
354
|
-
def
|
|
355
|
-
"""Recursively convert numpy types to native Python types for JSON serialization."""
|
|
420
|
+
def convert_normal_types(obj):
|
|
421
|
+
"""Recursively convert numpy types and datetime objects to native Python types for JSON serialization."""
|
|
356
422
|
import numpy as np
|
|
357
423
|
|
|
358
|
-
if isinstance(obj,
|
|
424
|
+
if isinstance(obj, datetime):
|
|
425
|
+
return obj.isoformat()
|
|
426
|
+
elif isinstance(obj, np.bool_):
|
|
359
427
|
return bool(obj)
|
|
360
428
|
elif isinstance(obj, np.integer):
|
|
361
429
|
return int(obj)
|
|
@@ -364,10 +432,67 @@ def convert_numpy_types(obj):
|
|
|
364
432
|
elif isinstance(obj, np.ndarray):
|
|
365
433
|
return obj.tolist()
|
|
366
434
|
elif isinstance(obj, dict):
|
|
367
|
-
return {key:
|
|
435
|
+
return {key: convert_normal_types(value) for key, value in obj.items()}
|
|
368
436
|
elif isinstance(obj, list):
|
|
369
|
-
return [
|
|
437
|
+
return [convert_normal_types(item) for item in obj]
|
|
370
438
|
elif isinstance(obj, tuple):
|
|
371
|
-
return tuple(
|
|
439
|
+
return tuple(convert_normal_types(item) for item in obj)
|
|
440
|
+
elif isinstance(obj, os.PathLike):
|
|
441
|
+
return str(obj)
|
|
372
442
|
else:
|
|
373
443
|
return obj
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def compress_image_to_limit(image_bytes: bytes, max_bytes: int = 10_000_000) -> Tuple[bytes, str]:
|
|
447
|
+
"""
|
|
448
|
+
Ensure image bytes are under max_bytes by re-encoding to JPEG with quality reduction
|
|
449
|
+
and optional downscaling. Returns (processed_bytes, format_str).
|
|
450
|
+
If the original bytes are already below the limit, returns them as PNG.
|
|
451
|
+
"""
|
|
452
|
+
if len(image_bytes) <= max_bytes:
|
|
453
|
+
return image_bytes, 'png'
|
|
454
|
+
|
|
455
|
+
try:
|
|
456
|
+
img = Image.open(BytesIO(image_bytes))
|
|
457
|
+
except Exception as exc:
|
|
458
|
+
logger.warning(f'Failed to open image bytes with PIL, sending original image; may exceed API limit: {exc}')
|
|
459
|
+
return image_bytes, 'png'
|
|
460
|
+
|
|
461
|
+
# Convert to RGB for JPEG if needed
|
|
462
|
+
if img.mode not in ('RGB', 'L'):
|
|
463
|
+
img = img.convert('RGB')
|
|
464
|
+
|
|
465
|
+
def encode_jpeg(source: Image.Image, quality: int) -> bytes:
|
|
466
|
+
buf = BytesIO()
|
|
467
|
+
source.save(buf, format='JPEG', quality=quality, optimize=True, progressive=True)
|
|
468
|
+
return buf.getvalue()
|
|
469
|
+
|
|
470
|
+
# Start with moderate quality and reduce
|
|
471
|
+
quality: int = 85
|
|
472
|
+
out: bytes = encode_jpeg(img, quality)
|
|
473
|
+
quality_floor: int = 40
|
|
474
|
+
|
|
475
|
+
while len(out) > max_bytes and quality > quality_floor:
|
|
476
|
+
quality -= 10
|
|
477
|
+
out = encode_jpeg(img, quality)
|
|
478
|
+
|
|
479
|
+
# If still too large, progressively downscale
|
|
480
|
+
min_side_floor: int = 256
|
|
481
|
+
scale: float = 0.9
|
|
482
|
+
while len(out) > max_bytes and min(img.size) > min_side_floor:
|
|
483
|
+
new_w = max(min_side_floor, int(img.width * scale))
|
|
484
|
+
new_h = max(min_side_floor, int(img.height * scale))
|
|
485
|
+
if (new_w, new_h) == img.size:
|
|
486
|
+
break
|
|
487
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
488
|
+
out = encode_jpeg(img, quality)
|
|
489
|
+
|
|
490
|
+
if len(out) > max_bytes:
|
|
491
|
+
logger.warning(f'Image remains above limit after compression: size={len(out)} bytes (limit={max_bytes}).')
|
|
492
|
+
else:
|
|
493
|
+
logger.info(
|
|
494
|
+
f'Compressed image from {len(image_bytes)} to {len(out)} bytes; '
|
|
495
|
+
f'quality={quality}, size={img.width}x{img.height}.'
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
return out, 'jpeg'
|
evalscope/utils/json_schema.py
CHANGED
|
@@ -4,7 +4,7 @@ from copy import deepcopy
|
|
|
4
4
|
from dataclasses import is_dataclass
|
|
5
5
|
from datetime import date, datetime, time
|
|
6
6
|
from enum import EnumMeta
|
|
7
|
-
from pydantic import BaseModel, Field
|
|
7
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
8
8
|
from typing import (
|
|
9
9
|
Any,
|
|
10
10
|
Dict,
|
|
@@ -59,6 +59,28 @@ class JSONSchema(BaseModel):
|
|
|
59
59
|
required: Optional[List[str]] = Field(default=None)
|
|
60
60
|
"""Required fields for object parameters."""
|
|
61
61
|
|
|
62
|
+
@model_validator(mode='before')
|
|
63
|
+
def convert_type_before_validation(cls, values):
|
|
64
|
+
values = deepcopy(values)
|
|
65
|
+
|
|
66
|
+
def recursive_convert_type(obj):
|
|
67
|
+
if isinstance(obj, dict):
|
|
68
|
+
# Convert 'type' field if it's a string
|
|
69
|
+
if 'type' in obj and isinstance(obj['type'], str):
|
|
70
|
+
try:
|
|
71
|
+
obj['type'] = python_type_to_json_type(obj['type'])
|
|
72
|
+
except ValueError:
|
|
73
|
+
# If conversion fails, leave it as is
|
|
74
|
+
pass
|
|
75
|
+
# Recursively process nested structures
|
|
76
|
+
for k, v in obj.items():
|
|
77
|
+
obj[k] = recursive_convert_type(v)
|
|
78
|
+
elif isinstance(obj, list):
|
|
79
|
+
return [recursive_convert_type(item) for item in obj]
|
|
80
|
+
return obj
|
|
81
|
+
|
|
82
|
+
return recursive_convert_type(values)
|
|
83
|
+
|
|
62
84
|
|
|
63
85
|
def json_schema(t: Type[Any]) -> JSONSchema:
|
|
64
86
|
"""Provide a JSON Schema for the specified type.
|
|
@@ -152,6 +174,8 @@ def cls_json_schema(cls: Type[Any]) -> JSONSchema:
|
|
|
152
174
|
|
|
153
175
|
|
|
154
176
|
def python_type_to_json_type(python_type: Optional[str]) -> JSONType:
|
|
177
|
+
if python_type is not None and python_type in get_args(JSONType):
|
|
178
|
+
return python_type
|
|
155
179
|
if python_type == 'str':
|
|
156
180
|
return 'string'
|
|
157
181
|
elif python_type == 'int':
|
|
@@ -205,4 +229,3 @@ def resolve_schema_references(schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
205
229
|
return obj
|
|
206
230
|
|
|
207
231
|
return cast(Dict[str, Any], _resolve_refs(schema))
|
|
208
|
-
return cast(Dict[str, Any], _resolve_refs(schema))
|