evalscope 0.10.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalscope/__init__.py +4 -1
- evalscope/api/benchmark/__init__.py +11 -0
- evalscope/api/benchmark/adapters/__init__.py +7 -0
- evalscope/api/benchmark/adapters/agent_adapter.py +8 -0
- evalscope/api/benchmark/adapters/default_data_adapter.py +754 -0
- evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
- evalscope/api/benchmark/adapters/multi_choice_adapter.py +86 -0
- evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
- evalscope/api/benchmark/adapters/text2image_adapter.py +157 -0
- evalscope/api/benchmark/adapters/vision_language_adapter.py +8 -0
- evalscope/api/benchmark/benchmark.py +404 -0
- evalscope/api/benchmark/meta.py +124 -0
- evalscope/api/dataset/__init__.py +2 -0
- evalscope/api/dataset/dataset.py +370 -0
- evalscope/api/dataset/loader.py +266 -0
- evalscope/api/dataset/utils.py +143 -0
- evalscope/api/evaluator/__init__.py +3 -0
- evalscope/api/evaluator/cache.py +382 -0
- evalscope/api/evaluator/evaluator.py +61 -0
- evalscope/api/evaluator/state.py +280 -0
- evalscope/api/filter/__init__.py +1 -0
- evalscope/api/filter/filter.py +72 -0
- evalscope/api/messages/__init__.py +12 -0
- evalscope/api/messages/chat_message.py +248 -0
- evalscope/api/messages/content.py +102 -0
- evalscope/api/messages/utils.py +35 -0
- evalscope/api/metric/__init__.py +2 -0
- evalscope/api/metric/metric.py +60 -0
- evalscope/api/metric/scorer.py +113 -0
- evalscope/api/mixin/__init__.py +2 -0
- evalscope/api/mixin/llm_judge_mixin.py +170 -0
- evalscope/api/mixin/sandbox_mixin.py +182 -0
- evalscope/api/model/__init__.py +12 -0
- evalscope/api/model/generate_config.py +161 -0
- evalscope/api/model/model.py +386 -0
- evalscope/api/model/model_output.py +285 -0
- evalscope/api/registry.py +182 -0
- evalscope/api/tool/__init__.py +3 -0
- evalscope/api/tool/tool_call.py +101 -0
- evalscope/api/tool/tool_info.py +173 -0
- evalscope/api/tool/utils.py +64 -0
- evalscope/app/__init__.py +28 -0
- evalscope/app/app.py +38 -0
- evalscope/app/arguments.py +11 -0
- evalscope/app/constants.py +22 -0
- evalscope/app/ui/__init__.py +20 -0
- evalscope/app/ui/app_ui.py +53 -0
- evalscope/app/ui/multi_model.py +353 -0
- evalscope/app/ui/sidebar.py +42 -0
- evalscope/app/ui/single_model.py +220 -0
- evalscope/app/ui/visualization.py +36 -0
- evalscope/app/utils/data_utils.py +195 -0
- evalscope/app/utils/env_utils.py +12 -0
- evalscope/app/utils/localization.py +221 -0
- evalscope/app/utils/text_utils.py +119 -0
- evalscope/app/utils/visualization.py +96 -0
- evalscope/arguments.py +32 -9
- evalscope/backend/opencompass/api_meta_template.py +2 -1
- evalscope/backend/opencompass/backend_manager.py +10 -7
- evalscope/backend/rag_eval/__init__.py +1 -1
- evalscope/backend/rag_eval/backend_manager.py +23 -6
- evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +33 -21
- evalscope/backend/rag_eval/clip_benchmark/task_template.py +8 -4
- evalscope/backend/rag_eval/cmteb/arguments.py +14 -1
- evalscope/backend/rag_eval/cmteb/task_template.py +19 -3
- evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py +1 -1
- evalscope/backend/rag_eval/ragas/arguments.py +0 -1
- evalscope/backend/rag_eval/ragas/task_template.py +2 -1
- evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +2 -1
- evalscope/backend/rag_eval/ragas/tasks/build_transform.py +7 -4
- evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +9 -3
- evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +2 -6
- evalscope/backend/rag_eval/utils/embedding.py +125 -32
- evalscope/backend/rag_eval/utils/llm.py +16 -16
- evalscope/backend/vlm_eval_kit/backend_manager.py +8 -3
- evalscope/benchmarks/__init__.py +17 -5
- evalscope/benchmarks/aa_lcr/__init__.py +0 -0
- evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
- evalscope/benchmarks/ai2d/__init__.py +0 -0
- evalscope/benchmarks/ai2d/ai2d_adapter.py +54 -0
- evalscope/benchmarks/aime/__init__.py +0 -0
- evalscope/benchmarks/aime/aime24_adapter.py +55 -0
- evalscope/benchmarks/aime/aime25_adapter.py +181 -0
- evalscope/benchmarks/aime/grader.py +307 -0
- evalscope/{metrics/math_accuracy.py → benchmarks/aime/math_normalize.py} +61 -72
- evalscope/benchmarks/alpaca_eval/__init__.py +0 -0
- evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +133 -0
- evalscope/benchmarks/amc/__init__.py +0 -0
- evalscope/benchmarks/amc/amc_adapter.py +51 -0
- evalscope/benchmarks/arc/arc_adapter.py +34 -149
- evalscope/benchmarks/arena_hard/__init__.py +0 -0
- evalscope/benchmarks/arena_hard/arena_hard_adapter.py +149 -0
- evalscope/benchmarks/arena_hard/utils.py +186 -0
- evalscope/benchmarks/bbh/bbh_adapter.py +117 -157
- evalscope/benchmarks/bfcl/__init__.py +0 -0
- evalscope/benchmarks/bfcl/v3/__init__.py +0 -0
- evalscope/benchmarks/bfcl/v3/bfcl_v3_adapter.py +370 -0
- evalscope/benchmarks/bfcl/v3/generation.py +222 -0
- evalscope/benchmarks/bfcl/v3/utils.py +23 -0
- evalscope/benchmarks/bfcl/v4/__init__.py +0 -0
- evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py +229 -0
- evalscope/benchmarks/bfcl/v4/utils.py +410 -0
- evalscope/benchmarks/biomix_qa/__init__.py +0 -0
- evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py +36 -0
- evalscope/benchmarks/blink/__init__.py +0 -0
- evalscope/benchmarks/blink/blink_adapter.py +61 -0
- evalscope/benchmarks/ceval/ceval_adapter.py +93 -174
- evalscope/benchmarks/chartqa/__init__.py +0 -0
- evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
- evalscope/benchmarks/chartqa/utils.py +38 -0
- evalscope/benchmarks/chinese_simple_qa/__init__.py +0 -0
- evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +170 -0
- evalscope/benchmarks/cmmlu/cmmlu_adapter.py +34 -140
- evalscope/benchmarks/coin_flip/__init__.py +0 -0
- evalscope/benchmarks/coin_flip/coin_flip_adapter.py +128 -0
- evalscope/benchmarks/commonsense_qa/__init__.py +0 -0
- evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py +32 -0
- evalscope/benchmarks/competition_math/competition_math_adapter.py +64 -112
- evalscope/benchmarks/data_collection/__init__.py +0 -0
- evalscope/benchmarks/data_collection/data_collection_adapter.py +215 -0
- evalscope/benchmarks/docmath/__init__.py +0 -0
- evalscope/benchmarks/docmath/docmath_adapter.py +143 -0
- evalscope/benchmarks/docmath/utils.py +219 -0
- evalscope/benchmarks/docvqa/__init__.py +0 -0
- evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
- evalscope/benchmarks/drivelology/__init__.py +0 -0
- evalscope/benchmarks/drivelology/drivelology_binary_adapter.py +170 -0
- evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py +254 -0
- evalscope/benchmarks/drivelology/drivelology_selection_adapter.py +49 -0
- evalscope/benchmarks/drivelology/drivelology_writing_adapter.py +218 -0
- evalscope/benchmarks/drop/__init__.py +0 -0
- evalscope/benchmarks/drop/drop_adapter.py +155 -0
- evalscope/benchmarks/drop/utils.py +156 -0
- evalscope/benchmarks/frames/__init__.py +0 -0
- evalscope/benchmarks/frames/frames_adapter.py +175 -0
- evalscope/benchmarks/frames/utils.py +37 -0
- evalscope/benchmarks/general_arena/__init__.py +0 -0
- evalscope/benchmarks/general_arena/general_arena_adapter.py +454 -0
- evalscope/benchmarks/general_arena/utils.py +223 -0
- evalscope/benchmarks/general_mcq/__init__.py +0 -0
- evalscope/benchmarks/general_mcq/general_mcq_adapter.py +58 -0
- evalscope/benchmarks/general_qa/general_qa_adapter.py +75 -107
- evalscope/benchmarks/gpqa/__init__.py +0 -0
- evalscope/benchmarks/gpqa/gpqa_adapter.py +90 -0
- evalscope/benchmarks/gpqa/prompt.py +88 -0
- evalscope/benchmarks/gsm8k/gsm8k_adapter.py +77 -144
- evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
- evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +159 -0
- evalscope/benchmarks/halu_eval/__init__.py +0 -0
- evalscope/benchmarks/halu_eval/halu_eval_adapter.py +128 -0
- evalscope/benchmarks/halu_eval/halu_eval_instructions.py +84 -0
- evalscope/benchmarks/healthbench/__init__.py +0 -0
- evalscope/benchmarks/healthbench/healthbench_adapter.py +282 -0
- evalscope/benchmarks/healthbench/utils.py +102 -0
- evalscope/benchmarks/hellaswag/hellaswag_adapter.py +36 -134
- evalscope/benchmarks/hle/__init__.py +0 -0
- evalscope/benchmarks/hle/hle_adapter.py +153 -0
- evalscope/benchmarks/humaneval/humaneval_adapter.py +80 -88
- evalscope/benchmarks/humaneval/utils.py +235 -0
- evalscope/benchmarks/ifeval/ifeval_adapter.py +71 -45
- evalscope/benchmarks/ifeval/instructions.py +112 -68
- evalscope/benchmarks/ifeval/instructions_registry.py +1 -1
- evalscope/benchmarks/ifeval/instructions_util.py +2 -3
- evalscope/benchmarks/ifeval/utils.py +6 -7
- evalscope/benchmarks/image_edit/__init__.py +0 -0
- evalscope/benchmarks/image_edit/gedit/__init__.py +0 -0
- evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
- evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
- evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
- evalscope/benchmarks/infovqa/__init__.py +0 -0
- evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
- evalscope/benchmarks/iquiz/iquiz_adapter.py +30 -58
- evalscope/benchmarks/live_code_bench/__init__.py +0 -0
- evalscope/benchmarks/live_code_bench/evaluate_utils.py +195 -0
- evalscope/benchmarks/live_code_bench/extract_utils.py +70 -0
- evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +150 -0
- evalscope/benchmarks/live_code_bench/load_utils.py +63 -0
- evalscope/benchmarks/live_code_bench/pass_k_utils.py +56 -0
- evalscope/benchmarks/live_code_bench/prompts.py +207 -0
- evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py +220 -0
- evalscope/benchmarks/live_code_bench/testing_util.py +544 -0
- evalscope/benchmarks/logi_qa/__int__.py +0 -0
- evalscope/benchmarks/logi_qa/logi_qa_adapter.py +41 -0
- evalscope/benchmarks/maritime_bench/__init__.py +0 -0
- evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +56 -0
- evalscope/benchmarks/math_500/__init__.py +0 -0
- evalscope/benchmarks/math_500/math_500_adapter.py +55 -0
- evalscope/benchmarks/math_qa/__init__.py +0 -0
- evalscope/benchmarks/math_qa/math_qa_adapter.py +35 -0
- evalscope/benchmarks/math_verse/__init__.py +0 -0
- evalscope/benchmarks/math_verse/math_verse_adapter.py +105 -0
- evalscope/benchmarks/math_vision/__init__.py +0 -0
- evalscope/benchmarks/math_vision/math_vision_adapter.py +116 -0
- evalscope/benchmarks/math_vista/__init__.py +0 -0
- evalscope/benchmarks/math_vista/math_vista_adapter.py +114 -0
- evalscope/benchmarks/med_mcqa/__init__.py +0 -0
- evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py +32 -0
- evalscope/benchmarks/minerva_math/__init__.py +0 -0
- evalscope/benchmarks/minerva_math/minerva_math_adapter.py +53 -0
- evalscope/benchmarks/mm_bench/__init__.py +0 -0
- evalscope/benchmarks/mm_bench/mm_bench_adapter.py +99 -0
- evalscope/benchmarks/mm_star/__init__.py +0 -0
- evalscope/benchmarks/mm_star/mm_star_adapter.py +73 -0
- evalscope/benchmarks/mmlu/mmlu_adapter.py +32 -210
- evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +87 -103
- evalscope/benchmarks/mmlu_redux/__init__.py +0 -0
- evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +139 -0
- evalscope/benchmarks/mmmu/__init__.py +0 -0
- evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
- evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
- evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +124 -0
- evalscope/benchmarks/mri_mcqa/__init__.py +0 -0
- evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py +34 -0
- evalscope/benchmarks/multi_if/__init__.py +0 -0
- evalscope/benchmarks/multi_if/ifeval.py +3354 -0
- evalscope/benchmarks/multi_if/metrics.py +120 -0
- evalscope/benchmarks/multi_if/multi_if_adapter.py +161 -0
- evalscope/benchmarks/music_trivia/__init__.py +0 -0
- evalscope/benchmarks/music_trivia/music_trivia_adapter.py +36 -0
- evalscope/benchmarks/musr/__init__.py +0 -0
- evalscope/benchmarks/musr/musr_adapter.py +43 -0
- evalscope/benchmarks/needle_haystack/__init__.py +0 -0
- evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +389 -0
- evalscope/benchmarks/needle_haystack/utils.py +79 -0
- evalscope/benchmarks/ner/__init__.py +0 -0
- evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
- evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
- evalscope/benchmarks/ner/copious_adapter.py +85 -0
- evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
- evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
- evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
- evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
- evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
- evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
- evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
- evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
- evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
- evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
- evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
- evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
- evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
- evalscope/benchmarks/ocr_bench/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py +101 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py +87 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py +963 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py +50 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py +46 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py +0 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt +26 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py +481 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py +179 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py +433 -0
- evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py +254 -0
- evalscope/benchmarks/olympiad_bench/__init__.py +0 -0
- evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py +163 -0
- evalscope/benchmarks/olympiad_bench/utils.py +565 -0
- evalscope/benchmarks/omni_bench/__init__.py +0 -0
- evalscope/benchmarks/omni_bench/omni_bench_adapter.py +86 -0
- evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
- evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
- evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
- evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
- evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
- evalscope/benchmarks/piqa/__init__.py +0 -0
- evalscope/benchmarks/piqa/piqa_adapter.py +32 -0
- evalscope/benchmarks/poly_math/__init__.py +0 -0
- evalscope/benchmarks/poly_math/poly_math_adapter.py +132 -0
- evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
- evalscope/benchmarks/pope/__init__.py +0 -0
- evalscope/benchmarks/pope/pope_adapter.py +112 -0
- evalscope/benchmarks/process_bench/__init__.py +0 -0
- evalscope/benchmarks/process_bench/process_bench_adapter.py +171 -0
- evalscope/benchmarks/pumed_qa/__init__.py +0 -0
- evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py +175 -0
- evalscope/benchmarks/qasc/__init__.py +0 -0
- evalscope/benchmarks/qasc/qasc_adapter.py +35 -0
- evalscope/benchmarks/race/race_adapter.py +33 -120
- evalscope/benchmarks/real_world_qa/__init__.py +0 -0
- evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py +64 -0
- evalscope/benchmarks/sciq/__init__.py +0 -0
- evalscope/benchmarks/sciq/sciq_adapter.py +36 -0
- evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
- evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
- evalscope/benchmarks/simple_qa/__init__.py +0 -0
- evalscope/benchmarks/simple_qa/simple_qa_adapter.py +169 -0
- evalscope/benchmarks/simple_vqa/__init__.py +0 -0
- evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
- evalscope/benchmarks/siqa/__init__.py +0 -0
- evalscope/benchmarks/siqa/siqa_adapter.py +39 -0
- evalscope/benchmarks/super_gpqa/__init__.py +0 -0
- evalscope/benchmarks/super_gpqa/prompt.py +88 -0
- evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py +165 -0
- evalscope/benchmarks/super_gpqa/utils.py +86 -0
- evalscope/benchmarks/tau_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/tau2_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/tau2_bench/generation.py +158 -0
- evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py +146 -0
- evalscope/benchmarks/tau_bench/tau_bench/__init__.py +0 -0
- evalscope/benchmarks/tau_bench/tau_bench/generation.py +147 -0
- evalscope/benchmarks/tau_bench/tau_bench/tau_bench_adapter.py +168 -0
- evalscope/benchmarks/text2image/__init__.py +0 -0
- evalscope/benchmarks/text2image/evalmuse_adapter.py +78 -0
- evalscope/benchmarks/text2image/genai_bench_adapter.py +53 -0
- evalscope/benchmarks/text2image/general_t2i_adapter.py +42 -0
- evalscope/benchmarks/text2image/hpdv2_adapter.py +52 -0
- evalscope/benchmarks/text2image/tifa_adapter.py +27 -0
- evalscope/benchmarks/tool_bench/__init__.py +0 -0
- evalscope/benchmarks/tool_bench/tool_bench_adapter.py +102 -0
- evalscope/benchmarks/tool_bench/utils.py +203 -0
- evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +56 -118
- evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +70 -270
- evalscope/benchmarks/visu_logic/__init__.py +0 -0
- evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
- evalscope/benchmarks/winogrande/__init__.py +0 -0
- evalscope/benchmarks/winogrande/winogrande_adapter.py +34 -0
- evalscope/benchmarks/wmt/__init__.py +0 -0
- evalscope/benchmarks/wmt/wmt24_adapter.py +294 -0
- evalscope/benchmarks/zerobench/__init__.py +0 -0
- evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
- evalscope/cli/cli.py +2 -0
- evalscope/cli/start_app.py +12 -2
- evalscope/cli/start_eval.py +4 -3
- evalscope/cli/start_perf.py +10 -2
- evalscope/cli/start_server.py +6 -3
- evalscope/collections/__init__.py +27 -3
- evalscope/collections/sampler.py +12 -11
- evalscope/collections/schema.py +13 -12
- evalscope/config.py +218 -147
- evalscope/constants.py +78 -82
- evalscope/evaluator/__init__.py +1 -1
- evalscope/evaluator/evaluator.py +334 -318
- evalscope/filters/__init__.py +2 -0
- evalscope/filters/extraction.py +126 -0
- evalscope/filters/selection.py +57 -0
- evalscope/metrics/__init__.py +59 -3
- evalscope/metrics/bert_score/__init__.py +0 -0
- evalscope/metrics/bert_score/scorer.py +338 -0
- evalscope/metrics/bert_score/utils.py +697 -0
- evalscope/metrics/bundled_rouge_score/rouge_scorer.py +20 -15
- evalscope/metrics/llm_judge.py +211 -0
- evalscope/metrics/math_parser.py +545 -0
- evalscope/metrics/metric.py +611 -0
- evalscope/metrics/metrics.py +112 -23
- evalscope/metrics/rouge_metric.py +11 -13
- evalscope/metrics/t2v_metrics/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/clipscore.py +14 -0
- evalscope/metrics/t2v_metrics/constants.py +12 -0
- evalscope/metrics/t2v_metrics/itmscore.py +14 -0
- evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +134 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +282 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +115 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +87 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +86 -0
- evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +85 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +99 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +176 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +82 -0
- evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +74 -0
- evalscope/metrics/t2v_metrics/models/model.py +45 -0
- evalscope/metrics/t2v_metrics/models/utils.py +25 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +306 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +84 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +223 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +153 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +24 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +190 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +100 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +313 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +192 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +320 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +212 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1111 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +457 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +370 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +765 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +274 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +896 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1876 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +83 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +58 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +187 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +179 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +115 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +348 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +870 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +273 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +514 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1291 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +476 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +35 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +393 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +129 -0
- evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +18 -0
- evalscope/metrics/t2v_metrics/score.py +78 -0
- evalscope/metrics/t2v_metrics/vqascore.py +14 -0
- evalscope/models/__init__.py +23 -13
- evalscope/models/image_edit_model.py +125 -0
- evalscope/models/mockllm.py +65 -0
- evalscope/models/model_apis.py +69 -0
- evalscope/models/modelscope.py +455 -0
- evalscope/models/openai_compatible.py +144 -0
- evalscope/models/text2image_model.py +124 -0
- evalscope/models/utils/openai.py +708 -0
- evalscope/perf/__init__.py +0 -1
- evalscope/perf/arguments.py +103 -69
- evalscope/perf/benchmark.py +114 -163
- evalscope/perf/http_client.py +59 -89
- evalscope/perf/main.py +91 -18
- evalscope/perf/plugin/__init__.py +3 -2
- evalscope/perf/plugin/api/__init__.py +4 -3
- evalscope/perf/plugin/api/base.py +27 -7
- evalscope/perf/plugin/api/custom_api.py +170 -57
- evalscope/perf/plugin/api/dashscope_api.py +4 -10
- evalscope/perf/plugin/api/default_api.py +214 -0
- evalscope/perf/plugin/api/openai_api.py +120 -41
- evalscope/perf/plugin/datasets/__init__.py +10 -6
- evalscope/perf/plugin/datasets/base.py +43 -1
- evalscope/perf/plugin/datasets/custom.py +22 -3
- evalscope/perf/plugin/datasets/flickr8k.py +5 -27
- evalscope/perf/plugin/datasets/kontext_bench.py +28 -0
- evalscope/perf/plugin/datasets/line_by_line.py +7 -3
- evalscope/perf/plugin/datasets/longalpaca.py +7 -3
- evalscope/perf/plugin/datasets/openqa.py +13 -14
- evalscope/perf/plugin/datasets/random_dataset.py +67 -0
- evalscope/perf/plugin/datasets/random_vl_dataset.py +80 -0
- evalscope/perf/plugin/datasets/speed_benchmark.py +11 -0
- evalscope/perf/plugin/registry.py +36 -16
- evalscope/perf/utils/analysis_result.py +24 -23
- evalscope/perf/utils/benchmark_util.py +95 -55
- evalscope/perf/utils/db_util.py +115 -78
- evalscope/perf/utils/local_server.py +12 -47
- evalscope/perf/utils/log_utils.py +63 -0
- evalscope/perf/utils/rich_display.py +192 -0
- evalscope/report/__init__.py +46 -3
- evalscope/report/combinator.py +143 -32
- evalscope/report/generator.py +74 -34
- evalscope/report/report.py +238 -0
- evalscope/run.py +71 -46
- evalscope/summarizer.py +5 -5
- evalscope/third_party/longbench_write/infer.py +1 -1
- evalscope/third_party/thinkbench/__init__.py +3 -0
- evalscope/third_party/thinkbench/eval.py +441 -0
- evalscope/third_party/thinkbench/infer.py +130 -0
- evalscope/third_party/thinkbench/resources/critique_template.txt +17 -0
- evalscope/third_party/thinkbench/resources/reformat_template.txt +31 -0
- evalscope/third_party/thinkbench/tools/__init__.py +0 -0
- evalscope/third_party/thinkbench/tools/llm.py +48 -0
- evalscope/third_party/thinkbench/tools/utils.py +13 -0
- evalscope/third_party/toolbench_static/llm/swift_infer.py +46 -20
- evalscope/third_party/toolbench_static/toolbench_static.py +2 -1
- evalscope/utils/__init__.py +82 -2
- evalscope/utils/argument_utils.py +64 -0
- evalscope/utils/chat_service.py +8 -6
- evalscope/utils/deprecation_utils.py +53 -0
- evalscope/utils/function_utils.py +266 -0
- evalscope/utils/import_utils.py +154 -0
- evalscope/utils/io_utils.py +336 -8
- evalscope/utils/json_schema.py +231 -0
- evalscope/utils/logger.py +121 -31
- evalscope/utils/model_utils.py +57 -1
- evalscope/utils/multi_choices.py +303 -0
- evalscope/utils/ner.py +377 -0
- evalscope/utils/url_utils.py +65 -0
- evalscope/version.py +2 -2
- evalscope-1.2.0.dist-info/METADATA +553 -0
- evalscope-1.2.0.dist-info/RECORD +628 -0
- {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/WHEEL +1 -1
- {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/top_level.txt +0 -1
- evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -46
- evalscope/benchmarks/arc/ai2_arc.py +0 -151
- evalscope/benchmarks/benchmark.py +0 -76
- evalscope/benchmarks/ceval/ceval_exam.py +0 -146
- evalscope/benchmarks/ceval/samples.jsonl +0 -1
- evalscope/benchmarks/cmmlu/cmmlu.py +0 -161
- evalscope/benchmarks/cmmlu/samples.jsonl +0 -5
- evalscope/benchmarks/competition_math/competition_math.py +0 -79
- evalscope/benchmarks/data_adapter.py +0 -291
- evalscope/benchmarks/gsm8k/gsm8k.py +0 -121
- evalscope/benchmarks/hellaswag/hellaswag.py +0 -112
- evalscope/benchmarks/humaneval/humaneval.py +0 -79
- evalscope/benchmarks/mmlu/mmlu.py +0 -160
- evalscope/benchmarks/mmlu/samples.jsonl +0 -5
- evalscope/benchmarks/race/race.py +0 -104
- evalscope/benchmarks/race/samples.jsonl +0 -5
- evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -89
- evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -163
- evalscope/collections/evaluator.py +0 -198
- evalscope/evaluator/rating_eval.py +0 -157
- evalscope/evaluator/reviewer/__init__.py +0 -1
- evalscope/evaluator/reviewer/auto_reviewer.py +0 -391
- evalscope/metrics/code_metric.py +0 -98
- evalscope/metrics/named_metrics.py +0 -17
- evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
- evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
- evalscope/models/base_adapter.py +0 -52
- evalscope/models/chat_adapter.py +0 -138
- evalscope/models/choice_adapter.py +0 -211
- evalscope/models/custom/__init__.py +0 -3
- evalscope/models/custom/custom_model.py +0 -53
- evalscope/models/custom/dummy_model.py +0 -63
- evalscope/models/custom_adapter.py +0 -67
- evalscope/models/local_model.py +0 -74
- evalscope/models/model.py +0 -229
- evalscope/models/server_adapter.py +0 -111
- evalscope/registry/__init__.py +0 -1
- evalscope/registry/config/cfg_arena.yaml +0 -77
- evalscope/registry/config/cfg_arena_zhihu.yaml +0 -63
- evalscope/registry/config/cfg_pairwise_baseline.yaml +0 -83
- evalscope/registry/config/cfg_single.yaml +0 -78
- evalscope/registry/data/prompt_template/lmsys_v2.jsonl +0 -8
- evalscope/registry/data/prompt_template/prompt_templates.jsonl +0 -8
- evalscope/registry/data/qa_browser/battle.jsonl +0 -634
- evalscope/registry/data/qa_browser/category_mapping.yaml +0 -10
- evalscope/registry/data/question.jsonl +0 -80
- evalscope/registry/tasks/arc.yaml +0 -28
- evalscope/registry/tasks/bbh.yaml +0 -26
- evalscope/registry/tasks/bbh_mini.yaml +0 -26
- evalscope/registry/tasks/ceval.yaml +0 -27
- evalscope/registry/tasks/ceval_mini.yaml +0 -26
- evalscope/registry/tasks/cmmlu.yaml +0 -27
- evalscope/registry/tasks/eval_qwen-7b-chat_v100.yaml +0 -28
- evalscope/registry/tasks/general_qa.yaml +0 -27
- evalscope/registry/tasks/gsm8k.yaml +0 -29
- evalscope/registry/tasks/mmlu.yaml +0 -29
- evalscope/registry/tasks/mmlu_mini.yaml +0 -27
- evalscope/report/app.py +0 -506
- evalscope/report/utils.py +0 -133
- evalscope/run_arena.py +0 -202
- evalscope/utils/arena_utils.py +0 -217
- evalscope/utils/completion_parsers.py +0 -82
- evalscope/utils/utils.py +0 -301
- evalscope-0.10.0.dist-info/METADATA +0 -565
- evalscope-0.10.0.dist-info/RECORD +0 -286
- tests/__init__.py +0 -1
- tests/cli/__init__.py +0 -1
- tests/cli/test_collection.py +0 -57
- tests/cli/test_run.py +0 -165
- tests/perf/__init__.py +0 -1
- tests/perf/test_perf.py +0 -101
- tests/rag/test_clip_benchmark.py +0 -85
- tests/rag/test_mteb.py +0 -138
- tests/rag/test_ragas.py +0 -120
- tests/swift/__init__.py +0 -1
- tests/swift/test_run_swift_eval.py +0 -145
- tests/swift/test_run_swift_vlm_eval.py +0 -127
- tests/swift/test_run_swift_vlm_jugde_eval.py +0 -156
- tests/test_run_all.py +0 -12
- tests/vlm/__init__.py +0 -1
- tests/vlm/test_vlmeval.py +0 -60
- {tests/rag → evalscope/api}/__init__.py +0 -0
- {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/entry_points.txt +0 -0
- {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info/licenses}/LICENSE +0 -0
evalscope/utils/io_utils.py
CHANGED
|
@@ -1,7 +1,18 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import csv
|
|
3
|
+
import hashlib
|
|
4
|
+
import io
|
|
1
5
|
import json
|
|
2
6
|
import jsonlines as jsonl
|
|
3
7
|
import os
|
|
8
|
+
import re
|
|
9
|
+
import string
|
|
10
|
+
import unicodedata
|
|
4
11
|
import yaml
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from io import BytesIO
|
|
14
|
+
from PIL import Image
|
|
15
|
+
from typing import Tuple
|
|
5
16
|
|
|
6
17
|
from evalscope.constants import DumpMode
|
|
7
18
|
from evalscope.utils.logger import get_logger
|
|
@@ -27,7 +38,7 @@ class OutputsStructure:
|
|
|
27
38
|
'configs_dir': None
|
|
28
39
|
}
|
|
29
40
|
|
|
30
|
-
def _get_dir(self, attr_name, dir_name):
|
|
41
|
+
def _get_dir(self, attr_name, dir_name) -> str:
|
|
31
42
|
if self._dirs[attr_name] is None:
|
|
32
43
|
dir_path = os.path.join(self.outputs_dir, dir_name)
|
|
33
44
|
if self.is_make:
|
|
@@ -66,10 +77,20 @@ def jsonl_to_list(jsonl_file):
|
|
|
66
77
|
Returns:
|
|
67
78
|
list: list of lines. Each line is a dict.
|
|
68
79
|
"""
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
80
|
+
try:
|
|
81
|
+
res_list = []
|
|
82
|
+
with jsonl.open(jsonl_file, mode='r') as reader:
|
|
83
|
+
for line in reader.iter(type=dict, allow_none=True, skip_invalid=False):
|
|
84
|
+
res_list.append(line)
|
|
85
|
+
except Exception:
|
|
86
|
+
# Fallback to reading line by line
|
|
87
|
+
res_list = []
|
|
88
|
+
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
|
89
|
+
for line in f:
|
|
90
|
+
if line.strip(): # Skip empty lines
|
|
91
|
+
res_list.append(json.loads(line.strip()))
|
|
92
|
+
if not res_list:
|
|
93
|
+
logger.warning(f'No data found in {jsonl_file}.')
|
|
73
94
|
return res_list
|
|
74
95
|
|
|
75
96
|
|
|
@@ -104,6 +125,9 @@ def dump_jsonl_data(data_list, jsonl_file, dump_mode=DumpMode.OVERWRITE):
|
|
|
104
125
|
if not isinstance(data_list, list):
|
|
105
126
|
data_list = [data_list]
|
|
106
127
|
|
|
128
|
+
# Convert non-serializable types to serializable ones
|
|
129
|
+
data_list = convert_normal_types(data_list)
|
|
130
|
+
|
|
107
131
|
if dump_mode == DumpMode.OVERWRITE:
|
|
108
132
|
dump_mode = 'w'
|
|
109
133
|
elif dump_mode == DumpMode.APPEND:
|
|
@@ -112,8 +136,76 @@ def dump_jsonl_data(data_list, jsonl_file, dump_mode=DumpMode.OVERWRITE):
|
|
|
112
136
|
writer.write_all(data_list)
|
|
113
137
|
|
|
114
138
|
|
|
115
|
-
def jsonl_to_csv():
|
|
116
|
-
|
|
139
|
+
def jsonl_to_csv(jsonl_file, csv_file):
|
|
140
|
+
"""
|
|
141
|
+
Convert jsonl file to csv file.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
jsonl_file: jsonl file path.
|
|
145
|
+
csv_file: csv file path.
|
|
146
|
+
"""
|
|
147
|
+
data = jsonl_to_list(jsonl_file)
|
|
148
|
+
if not data:
|
|
149
|
+
logger.warning(f'No data found in {jsonl_file}.')
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
|
|
153
|
+
writer = csv.writer(f)
|
|
154
|
+
writer.writerow(data[0].keys()) # Write header
|
|
155
|
+
for item in data:
|
|
156
|
+
writer.writerow(item.values())
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def csv_to_list(csv_file) -> list:
|
|
160
|
+
"""
|
|
161
|
+
Read csv file to list.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
csv_file: csv file path.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
list: list of lines. Each line is a dict.
|
|
168
|
+
"""
|
|
169
|
+
res_list = []
|
|
170
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
171
|
+
reader = csv.DictReader(f)
|
|
172
|
+
for row in reader:
|
|
173
|
+
res_list.append(row)
|
|
174
|
+
return res_list
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def tsv_to_list(tsv_file) -> list:
|
|
178
|
+
"""
|
|
179
|
+
Read tsv file to list.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
tsv_file: tsv file path.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
list: list of lines. Each line is a dict.
|
|
186
|
+
"""
|
|
187
|
+
res_list = []
|
|
188
|
+
with open(tsv_file, 'r', encoding='utf-8') as f:
|
|
189
|
+
reader = csv.DictReader(f, delimiter='\t')
|
|
190
|
+
for row in reader:
|
|
191
|
+
res_list.append(row)
|
|
192
|
+
return res_list
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def csv_to_jsonl(csv_file, jsonl_file):
|
|
196
|
+
"""
|
|
197
|
+
Convert csv file to jsonl file.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
csv_file: csv file path.
|
|
201
|
+
jsonl_file: jsonl file path.
|
|
202
|
+
"""
|
|
203
|
+
data = csv_to_list(csv_file)
|
|
204
|
+
if not data:
|
|
205
|
+
logger.warning(f'No data found in {csv_file}.')
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
dump_jsonl_data(data, jsonl_file, dump_mode=DumpMode.OVERWRITE)
|
|
117
209
|
|
|
118
210
|
|
|
119
211
|
def yaml_to_dict(yaml_file) -> dict:
|
|
@@ -135,7 +227,7 @@ def dict_to_yaml(d: dict, yaml_file: str):
|
|
|
135
227
|
Dump dict to yaml file.
|
|
136
228
|
"""
|
|
137
229
|
with open(yaml_file, 'w') as f:
|
|
138
|
-
yaml.dump(d, f, default_flow_style=False)
|
|
230
|
+
yaml.dump(d, f, default_flow_style=False, allow_unicode=True)
|
|
139
231
|
|
|
140
232
|
|
|
141
233
|
def json_to_dict(json_file) -> dict:
|
|
@@ -168,3 +260,239 @@ def dict_to_json(d: dict, json_file: str):
|
|
|
168
260
|
"""
|
|
169
261
|
with open(json_file, 'w') as f:
|
|
170
262
|
json.dump(d, f, indent=4, ensure_ascii=False)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def get_latest_folder_path(work_dir):
|
|
266
|
+
from datetime import datetime
|
|
267
|
+
|
|
268
|
+
# Get all subdirectories in the work_dir
|
|
269
|
+
folders = [f for f in os.listdir(work_dir) if os.path.isdir(os.path.join(work_dir, f))]
|
|
270
|
+
|
|
271
|
+
# Get the timestamp(YYYYMMDD_HHMMSS)
|
|
272
|
+
timestamp_pattern = re.compile(r'^\d{8}_\d{6}$')
|
|
273
|
+
|
|
274
|
+
# Filter out the folders
|
|
275
|
+
timestamped_folders = [f for f in folders if timestamp_pattern.match(f)]
|
|
276
|
+
|
|
277
|
+
if not timestamped_folders:
|
|
278
|
+
print(f'>> No timestamped folders found in {work_dir}!')
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
# timestamp parser
|
|
282
|
+
def parse_timestamp(folder_name):
|
|
283
|
+
return datetime.strptime(folder_name, '%Y%m%d_%H%M%S')
|
|
284
|
+
|
|
285
|
+
# Find the latest folder
|
|
286
|
+
latest_folder = max(timestamped_folders, key=parse_timestamp)
|
|
287
|
+
|
|
288
|
+
return os.path.join(work_dir, latest_folder)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def gen_hash(name: str, bits: int = 32):
|
|
292
|
+
return hashlib.md5(name.encode(encoding='UTF-8')).hexdigest()[:bits]
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def get_valid_list(input_list, candidate_list):
|
|
296
|
+
"""
|
|
297
|
+
Get the valid and invalid list from input_list based on candidate_list.
|
|
298
|
+
Args:
|
|
299
|
+
input_list: The input list.
|
|
300
|
+
candidate_list: The candidate list.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
valid_list: The valid list.
|
|
304
|
+
invalid_list: The invalid list.
|
|
305
|
+
"""
|
|
306
|
+
return [i for i in input_list if i in candidate_list], \
|
|
307
|
+
[i for i in input_list if i not in candidate_list]
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def PIL_to_base64(image: Image.Image, format: str = 'JPEG', add_header: bool = False) -> str:
|
|
311
|
+
"""
|
|
312
|
+
Convert a PIL Image to a base64 encoded string.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
image (Image.Image): The PIL Image to convert.
|
|
316
|
+
format (str): The format to save the image in. Default is 'JPEG'.
|
|
317
|
+
add_header (bool): Whether to add the base64 header. Default is False.
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
str: Base64 encoded string of the image.
|
|
321
|
+
"""
|
|
322
|
+
buffered = BytesIO()
|
|
323
|
+
image.save(buffered, format=format)
|
|
324
|
+
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
|
325
|
+
if add_header:
|
|
326
|
+
img_str = f'data:image/{format.lower()};base64,{img_str}'
|
|
327
|
+
return img_str
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def bytes_to_base64(bytes_data: bytes, *, format: str = 'png', add_header: bool = False, content_type='image') -> str:
|
|
331
|
+
"""Convert bytes to a base64 encoded string.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
bytes_data (bytes): The bytes to convert.
|
|
335
|
+
format (str): The format of the image. Default is 'png'.
|
|
336
|
+
add_header (bool): Whether to add the base64 header. Default is False.
|
|
337
|
+
content_type (str): The type of the data, 'image' or 'audio'. Default is 'image'.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
str: Base64 encoded string of the bytes.
|
|
341
|
+
"""
|
|
342
|
+
base64_str = base64.b64encode(bytes_data).decode('utf-8')
|
|
343
|
+
if add_header:
|
|
344
|
+
base64_str = f'data:{content_type}/{format};base64,{base64_str}'
|
|
345
|
+
return base64_str
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def base64_to_PIL(base64_str):
|
|
349
|
+
"""Convert a base64 encoded string to a PIL Image.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
base64_str (str): The base64 encoded string.
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Image.Image: The decoded PIL Image.
|
|
356
|
+
"""
|
|
357
|
+
# remove header
|
|
358
|
+
if ',' in base64_str:
|
|
359
|
+
base64_str = base64_str.split(',', 1)[1]
|
|
360
|
+
|
|
361
|
+
# decode
|
|
362
|
+
img_data = base64.b64decode(base64_str)
|
|
363
|
+
img_file = io.BytesIO(img_data)
|
|
364
|
+
img = Image.open(img_file)
|
|
365
|
+
return img
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def safe_filename(s: str, max_length: int = 255) -> str:
|
|
369
|
+
"""
|
|
370
|
+
Convert a string into a safe filename by removing or replacing unsafe characters.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
s (str): The input string to convert
|
|
374
|
+
max_length (int): Maximum length of the resulting filename (default 255)
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
str: A safe filename string
|
|
378
|
+
|
|
379
|
+
Examples:
|
|
380
|
+
>>> safe_filename("Hello/World?.txt")
|
|
381
|
+
'Hello_World.txt'
|
|
382
|
+
"""
|
|
383
|
+
# normalize unicode characters
|
|
384
|
+
s = unicodedata.normalize('NFKD', s)
|
|
385
|
+
s = s.encode('ASCII', 'ignore').decode('ASCII')
|
|
386
|
+
|
|
387
|
+
# remove or replace unsafe characters
|
|
388
|
+
# Keep only alphanumeric characters, dots, dashes, and underscores
|
|
389
|
+
safe_chars = string.ascii_letters + string.digits + '.-_'
|
|
390
|
+
s = ''.join(c if c in safe_chars else '_' for c in s)
|
|
391
|
+
|
|
392
|
+
# remove consecutive underscores
|
|
393
|
+
s = re.sub(r'_+', '_', s)
|
|
394
|
+
|
|
395
|
+
# remove leading/trailing periods and underscores
|
|
396
|
+
s = s.strip('._')
|
|
397
|
+
|
|
398
|
+
# handle empty string case
|
|
399
|
+
if not s:
|
|
400
|
+
s = 'untitled'
|
|
401
|
+
|
|
402
|
+
# handle starting with a period (hidden files)
|
|
403
|
+
if s.startswith('.'):
|
|
404
|
+
s = '_' + s
|
|
405
|
+
|
|
406
|
+
# enforce length limit
|
|
407
|
+
if len(s) > max_length:
|
|
408
|
+
# If we need to truncate, preserve the file extension if present
|
|
409
|
+
name, ext = os.path.splitext(s)
|
|
410
|
+
ext_len = len(ext)
|
|
411
|
+
if ext_len > 0:
|
|
412
|
+
max_name_length = max_length - ext_len
|
|
413
|
+
s = name[:max_name_length] + ext
|
|
414
|
+
else:
|
|
415
|
+
s = s[:max_length]
|
|
416
|
+
|
|
417
|
+
return s
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def convert_normal_types(obj):
|
|
421
|
+
"""Recursively convert numpy types and datetime objects to native Python types for JSON serialization."""
|
|
422
|
+
import numpy as np
|
|
423
|
+
|
|
424
|
+
if isinstance(obj, datetime):
|
|
425
|
+
return obj.isoformat()
|
|
426
|
+
elif isinstance(obj, np.bool_):
|
|
427
|
+
return bool(obj)
|
|
428
|
+
elif isinstance(obj, np.integer):
|
|
429
|
+
return int(obj)
|
|
430
|
+
elif isinstance(obj, np.floating):
|
|
431
|
+
return float(obj)
|
|
432
|
+
elif isinstance(obj, np.ndarray):
|
|
433
|
+
return obj.tolist()
|
|
434
|
+
elif isinstance(obj, dict):
|
|
435
|
+
return {key: convert_normal_types(value) for key, value in obj.items()}
|
|
436
|
+
elif isinstance(obj, list):
|
|
437
|
+
return [convert_normal_types(item) for item in obj]
|
|
438
|
+
elif isinstance(obj, tuple):
|
|
439
|
+
return tuple(convert_normal_types(item) for item in obj)
|
|
440
|
+
elif isinstance(obj, os.PathLike):
|
|
441
|
+
return str(obj)
|
|
442
|
+
else:
|
|
443
|
+
return obj
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def compress_image_to_limit(image_bytes: bytes, max_bytes: int = 10_000_000) -> Tuple[bytes, str]:
|
|
447
|
+
"""
|
|
448
|
+
Ensure image bytes are under max_bytes by re-encoding to JPEG with quality reduction
|
|
449
|
+
and optional downscaling. Returns (processed_bytes, format_str).
|
|
450
|
+
If the original bytes are already below the limit, returns them as PNG.
|
|
451
|
+
"""
|
|
452
|
+
if len(image_bytes) <= max_bytes:
|
|
453
|
+
return image_bytes, 'png'
|
|
454
|
+
|
|
455
|
+
try:
|
|
456
|
+
img = Image.open(BytesIO(image_bytes))
|
|
457
|
+
except Exception as exc:
|
|
458
|
+
logger.warning(f'Failed to open image bytes with PIL, sending original image; may exceed API limit: {exc}')
|
|
459
|
+
return image_bytes, 'png'
|
|
460
|
+
|
|
461
|
+
# Convert to RGB for JPEG if needed
|
|
462
|
+
if img.mode not in ('RGB', 'L'):
|
|
463
|
+
img = img.convert('RGB')
|
|
464
|
+
|
|
465
|
+
def encode_jpeg(source: Image.Image, quality: int) -> bytes:
|
|
466
|
+
buf = BytesIO()
|
|
467
|
+
source.save(buf, format='JPEG', quality=quality, optimize=True, progressive=True)
|
|
468
|
+
return buf.getvalue()
|
|
469
|
+
|
|
470
|
+
# Start with moderate quality and reduce
|
|
471
|
+
quality: int = 85
|
|
472
|
+
out: bytes = encode_jpeg(img, quality)
|
|
473
|
+
quality_floor: int = 40
|
|
474
|
+
|
|
475
|
+
while len(out) > max_bytes and quality > quality_floor:
|
|
476
|
+
quality -= 10
|
|
477
|
+
out = encode_jpeg(img, quality)
|
|
478
|
+
|
|
479
|
+
# If still too large, progressively downscale
|
|
480
|
+
min_side_floor: int = 256
|
|
481
|
+
scale: float = 0.9
|
|
482
|
+
while len(out) > max_bytes and min(img.size) > min_side_floor:
|
|
483
|
+
new_w = max(min_side_floor, int(img.width * scale))
|
|
484
|
+
new_h = max(min_side_floor, int(img.height * scale))
|
|
485
|
+
if (new_w, new_h) == img.size:
|
|
486
|
+
break
|
|
487
|
+
img = img.resize((new_w, new_h), Image.LANCZOS)
|
|
488
|
+
out = encode_jpeg(img, quality)
|
|
489
|
+
|
|
490
|
+
if len(out) > max_bytes:
|
|
491
|
+
logger.warning(f'Image remains above limit after compression: size={len(out)} bytes (limit={max_bytes}).')
|
|
492
|
+
else:
|
|
493
|
+
logger.info(
|
|
494
|
+
f'Compressed image from {len(image_bytes)} to {len(out)} bytes; '
|
|
495
|
+
f'quality={quality}, size={img.width}x{img.height}.'
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
return out, 'jpeg'
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import types
|
|
2
|
+
import typing
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from dataclasses import is_dataclass
|
|
5
|
+
from datetime import date, datetime, time
|
|
6
|
+
from enum import EnumMeta
|
|
7
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
8
|
+
from typing import (
|
|
9
|
+
Any,
|
|
10
|
+
Dict,
|
|
11
|
+
List,
|
|
12
|
+
Literal,
|
|
13
|
+
Optional,
|
|
14
|
+
Set,
|
|
15
|
+
Tuple,
|
|
16
|
+
Type,
|
|
17
|
+
Union,
|
|
18
|
+
cast,
|
|
19
|
+
get_args,
|
|
20
|
+
get_origin,
|
|
21
|
+
get_type_hints,
|
|
22
|
+
is_typeddict,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
JSONType = Literal['string', 'integer', 'number', 'boolean', 'array', 'object', 'null']
|
|
26
|
+
"""Valid types within JSON schema."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class JSONSchema(BaseModel):
|
|
30
|
+
"""JSON Schema for type."""
|
|
31
|
+
|
|
32
|
+
type: Optional[JSONType] = Field(default=None)
|
|
33
|
+
"""JSON type of tool parameter."""
|
|
34
|
+
|
|
35
|
+
format: Optional[str] = Field(default=None)
|
|
36
|
+
"""Format of the parameter (e.g. date-time)."""
|
|
37
|
+
|
|
38
|
+
description: Optional[str] = Field(default=None)
|
|
39
|
+
"""Parameter description."""
|
|
40
|
+
|
|
41
|
+
default: Any = Field(default=None)
|
|
42
|
+
"""Default value for parameter."""
|
|
43
|
+
|
|
44
|
+
enum: Optional[List[Any]] = Field(default=None)
|
|
45
|
+
"""Valid values for enum parameters."""
|
|
46
|
+
|
|
47
|
+
items: Optional['JSONSchema'] = Field(default=None)
|
|
48
|
+
"""Valid type for array parameters."""
|
|
49
|
+
|
|
50
|
+
properties: Optional[Dict[str, 'JSONSchema']] = Field(default=None)
|
|
51
|
+
"""Valid fields for object parametrs."""
|
|
52
|
+
|
|
53
|
+
additionalProperties: Optional[Union['JSONSchema', bool]] = Field(default=None)
|
|
54
|
+
"""Are additional properties allowed?"""
|
|
55
|
+
|
|
56
|
+
anyOf: Optional[List['JSONSchema']] = Field(default=None)
|
|
57
|
+
"""Valid types for union parameters."""
|
|
58
|
+
|
|
59
|
+
required: Optional[List[str]] = Field(default=None)
|
|
60
|
+
"""Required fields for object parameters."""
|
|
61
|
+
|
|
62
|
+
@model_validator(mode='before')
|
|
63
|
+
def convert_type_before_validation(cls, values):
|
|
64
|
+
values = deepcopy(values)
|
|
65
|
+
|
|
66
|
+
def recursive_convert_type(obj):
|
|
67
|
+
if isinstance(obj, dict):
|
|
68
|
+
# Convert 'type' field if it's a string
|
|
69
|
+
if 'type' in obj and isinstance(obj['type'], str):
|
|
70
|
+
try:
|
|
71
|
+
obj['type'] = python_type_to_json_type(obj['type'])
|
|
72
|
+
except ValueError:
|
|
73
|
+
# If conversion fails, leave it as is
|
|
74
|
+
pass
|
|
75
|
+
# Recursively process nested structures
|
|
76
|
+
for k, v in obj.items():
|
|
77
|
+
obj[k] = recursive_convert_type(v)
|
|
78
|
+
elif isinstance(obj, list):
|
|
79
|
+
return [recursive_convert_type(item) for item in obj]
|
|
80
|
+
return obj
|
|
81
|
+
|
|
82
|
+
return recursive_convert_type(values)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def json_schema(t: Type[Any]) -> JSONSchema:
|
|
86
|
+
"""Provide a JSON Schema for the specified type.
|
|
87
|
+
|
|
88
|
+
Schemas can be automatically inferred for a wide variety of
|
|
89
|
+
Python class types including Pydantic BaseModel, dataclasses,
|
|
90
|
+
and typed dicts.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
t: Python type
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
JSON Schema for type.
|
|
97
|
+
"""
|
|
98
|
+
origin = get_origin(t)
|
|
99
|
+
args = get_args(t)
|
|
100
|
+
|
|
101
|
+
if origin is None:
|
|
102
|
+
if t is int:
|
|
103
|
+
return JSONSchema(type='integer')
|
|
104
|
+
elif t is float:
|
|
105
|
+
return JSONSchema(type='number')
|
|
106
|
+
elif t is str:
|
|
107
|
+
return JSONSchema(type='string')
|
|
108
|
+
elif t is bool:
|
|
109
|
+
return JSONSchema(type='boolean')
|
|
110
|
+
elif t is datetime:
|
|
111
|
+
return JSONSchema(type='string', format='date-time')
|
|
112
|
+
elif t is date:
|
|
113
|
+
return JSONSchema(type='string', format='date')
|
|
114
|
+
elif t is time:
|
|
115
|
+
return JSONSchema(type='string', format='time')
|
|
116
|
+
elif t is list or t is set:
|
|
117
|
+
return JSONSchema(type='array', items=JSONSchema())
|
|
118
|
+
elif t is dict:
|
|
119
|
+
return JSONSchema(type='object', additionalProperties=JSONSchema())
|
|
120
|
+
elif (is_dataclass(t) or is_typeddict(t) or (isinstance(t, type) and issubclass(t, BaseModel))):
|
|
121
|
+
return cls_json_schema(t)
|
|
122
|
+
elif isinstance(t, EnumMeta):
|
|
123
|
+
return JSONSchema(enum=[item.value for item in t])
|
|
124
|
+
elif t is type(None):
|
|
125
|
+
return JSONSchema(type='null')
|
|
126
|
+
else:
|
|
127
|
+
return JSONSchema()
|
|
128
|
+
elif (origin is list or origin is List or origin is tuple or origin is Tuple or origin is set or origin is Set):
|
|
129
|
+
return JSONSchema(type='array', items=json_schema(args[0]) if args else JSONSchema())
|
|
130
|
+
elif origin is dict or origin is Dict:
|
|
131
|
+
return JSONSchema(
|
|
132
|
+
type='object',
|
|
133
|
+
additionalProperties=json_schema(args[1]) if len(args) > 1 else JSONSchema(),
|
|
134
|
+
)
|
|
135
|
+
elif origin is Union or origin is types.UnionType:
|
|
136
|
+
return JSONSchema(anyOf=[json_schema(arg) for arg in args])
|
|
137
|
+
elif origin is Optional:
|
|
138
|
+
return JSONSchema(anyOf=[json_schema(arg) for arg in args] + [JSONSchema(type='null')])
|
|
139
|
+
elif origin is typing.Literal:
|
|
140
|
+
return JSONSchema(enum=list(args))
|
|
141
|
+
|
|
142
|
+
return JSONSchema() # Default case if we can't determine the type
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def cls_json_schema(cls: Type[Any]) -> JSONSchema:
|
|
146
|
+
properties: Dict[str, JSONSchema] = {}
|
|
147
|
+
required: List[str] = []
|
|
148
|
+
|
|
149
|
+
if is_dataclass(cls):
|
|
150
|
+
fields = cls.__dataclass_fields__ # type: ignore
|
|
151
|
+
for name, field in fields.items():
|
|
152
|
+
properties[name] = json_schema(field.type) # type: ignore
|
|
153
|
+
if field.default == field.default_factory:
|
|
154
|
+
required.append(name)
|
|
155
|
+
elif isinstance(cls, type) and issubclass(cls, BaseModel):
|
|
156
|
+
schema = cls.model_json_schema()
|
|
157
|
+
schema = resolve_schema_references(schema)
|
|
158
|
+
for name, prop in schema.get('properties', {}).items():
|
|
159
|
+
properties[name] = JSONSchema(**prop)
|
|
160
|
+
required = schema.get('required', [])
|
|
161
|
+
elif is_typeddict(cls):
|
|
162
|
+
annotations = get_type_hints(cls)
|
|
163
|
+
for name, type_hint in annotations.items():
|
|
164
|
+
properties[name] = json_schema(type_hint)
|
|
165
|
+
if name in cls.__required_keys__:
|
|
166
|
+
required.append(name)
|
|
167
|
+
|
|
168
|
+
return JSONSchema(
|
|
169
|
+
type='object',
|
|
170
|
+
properties=properties,
|
|
171
|
+
required=required if required else None,
|
|
172
|
+
additionalProperties=False,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def python_type_to_json_type(python_type: Optional[str]) -> JSONType:
|
|
177
|
+
if python_type is not None and python_type in get_args(JSONType):
|
|
178
|
+
return python_type
|
|
179
|
+
if python_type == 'str':
|
|
180
|
+
return 'string'
|
|
181
|
+
elif python_type == 'int':
|
|
182
|
+
return 'integer'
|
|
183
|
+
elif python_type == 'float':
|
|
184
|
+
return 'number'
|
|
185
|
+
elif python_type == 'bool':
|
|
186
|
+
return 'boolean'
|
|
187
|
+
elif python_type == 'list':
|
|
188
|
+
return 'array'
|
|
189
|
+
elif python_type == 'dict':
|
|
190
|
+
return 'object'
|
|
191
|
+
elif python_type == 'None':
|
|
192
|
+
return 'null'
|
|
193
|
+
elif python_type is None:
|
|
194
|
+
# treat 'unknown' as string as anything can be converted to string
|
|
195
|
+
return 'string'
|
|
196
|
+
else:
|
|
197
|
+
raise ValueError(f'Unsupported type: {python_type} for Python to JSON conversion.')
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def resolve_schema_references(schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
201
|
+
"""Resolves all $ref references in a JSON schema by inlining the definitions."""
|
|
202
|
+
schema = deepcopy(schema)
|
|
203
|
+
definitions = schema.pop('$defs', {})
|
|
204
|
+
|
|
205
|
+
def _resolve_refs(obj: Any) -> Any:
|
|
206
|
+
if isinstance(obj, dict):
|
|
207
|
+
if '$ref' in obj and obj['$ref'].startswith('#/$defs/'):
|
|
208
|
+
ref_key = obj['$ref'].split('/')[-1]
|
|
209
|
+
if ref_key in definitions:
|
|
210
|
+
# Replace with a deep copy of the definition
|
|
211
|
+
resolved = deepcopy(definitions[ref_key])
|
|
212
|
+
# Process any nested references in the definition
|
|
213
|
+
resolved = _resolve_refs(resolved)
|
|
214
|
+
|
|
215
|
+
# Merge in the current object fields, which should take priority
|
|
216
|
+
# This means that if you have e.g.
|
|
217
|
+
# {"$ref": "#/$defs/SubType", "description": "subtype of type SubType"},
|
|
218
|
+
# and SubType resolves to
|
|
219
|
+
# {"description": "The SubType Class", "parameters": {"param1": {"type": "string"}}},
|
|
220
|
+
# the final result will be:
|
|
221
|
+
# {"description": "subtype of type SubType", "parameters": {"param1": {"type": "string"}}}
|
|
222
|
+
return resolved | {k: o for k, o in obj.items() if k != '$ref'}
|
|
223
|
+
|
|
224
|
+
# Process all entries in the dictionary
|
|
225
|
+
return {k: _resolve_refs(v) for k, v in obj.items()}
|
|
226
|
+
elif isinstance(obj, list):
|
|
227
|
+
return [_resolve_refs(item) for item in obj]
|
|
228
|
+
else:
|
|
229
|
+
return obj
|
|
230
|
+
|
|
231
|
+
return cast(Dict[str, Any], _resolve_refs(schema))
|