evalscope 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. evalscope/api/benchmark/__init__.py +9 -1
  2. evalscope/api/benchmark/adapters/__init__.py +4 -0
  3. evalscope/api/benchmark/adapters/agent_adapter.py +8 -0
  4. evalscope/api/benchmark/adapters/default_data_adapter.py +75 -4
  5. evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
  6. evalscope/api/benchmark/adapters/multi_choice_adapter.py +5 -2
  7. evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
  8. evalscope/api/benchmark/adapters/text2image_adapter.py +12 -10
  9. evalscope/api/benchmark/adapters/vision_language_adapter.py +8 -0
  10. evalscope/api/benchmark/benchmark.py +85 -2
  11. evalscope/api/benchmark/meta.py +10 -1
  12. evalscope/api/dataset/dataset.py +27 -6
  13. evalscope/api/dataset/loader.py +8 -3
  14. evalscope/api/evaluator/cache.py +31 -4
  15. evalscope/api/evaluator/evaluator.py +5 -0
  16. evalscope/api/evaluator/state.py +17 -1
  17. evalscope/api/messages/__init__.py +1 -0
  18. evalscope/api/messages/chat_message.py +52 -2
  19. evalscope/api/metric/__init__.py +1 -1
  20. evalscope/api/metric/metric.py +6 -1
  21. evalscope/api/metric/scorer.py +15 -7
  22. evalscope/api/mixin/__init__.py +1 -1
  23. evalscope/api/mixin/llm_judge_mixin.py +2 -0
  24. evalscope/api/mixin/sandbox_mixin.py +182 -0
  25. evalscope/api/model/generate_config.py +10 -6
  26. evalscope/api/model/model.py +5 -2
  27. evalscope/api/tool/tool_info.py +1 -1
  28. evalscope/app/app.py +3 -0
  29. evalscope/app/ui/multi_model.py +6 -1
  30. evalscope/app/ui/single_model.py +11 -5
  31. evalscope/app/utils/data_utils.py +8 -7
  32. evalscope/app/utils/env_utils.py +12 -0
  33. evalscope/app/utils/text_utils.py +14 -12
  34. evalscope/app/utils/visualization.py +2 -2
  35. evalscope/arguments.py +8 -4
  36. evalscope/backend/opencompass/backend_manager.py +0 -2
  37. evalscope/backend/rag_eval/utils/embedding.py +9 -1
  38. evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
  39. evalscope/benchmarks/ai2d/ai2d_adapter.py +54 -0
  40. evalscope/benchmarks/aime/aime24_adapter.py +5 -0
  41. evalscope/benchmarks/aime/aime25_adapter.py +136 -1
  42. evalscope/benchmarks/aime/grader.py +307 -0
  43. evalscope/benchmarks/aime/math_normalize.py +189 -0
  44. evalscope/benchmarks/amc/amc_adapter.py +51 -0
  45. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -0
  46. evalscope/benchmarks/bbh/bbh_adapter.py +43 -17
  47. evalscope/benchmarks/bfcl/{bfcl_adapter.py → v3/bfcl_v3_adapter.py} +131 -19
  48. evalscope/benchmarks/bfcl/{generation.py → v3/generation.py} +9 -9
  49. evalscope/benchmarks/bfcl/v3/utils.py +23 -0
  50. evalscope/benchmarks/bfcl/v4/__init__.py +0 -0
  51. evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py +229 -0
  52. evalscope/benchmarks/bfcl/v4/utils.py +410 -0
  53. evalscope/benchmarks/biomix_qa/__init__.py +0 -0
  54. evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py +36 -0
  55. evalscope/benchmarks/blink/__init__.py +0 -0
  56. evalscope/benchmarks/blink/blink_adapter.py +61 -0
  57. evalscope/benchmarks/ceval/ceval_adapter.py +1 -2
  58. evalscope/benchmarks/chartqa/__init__.py +0 -0
  59. evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
  60. evalscope/benchmarks/chartqa/utils.py +38 -0
  61. evalscope/benchmarks/coin_flip/__init__.py +0 -0
  62. evalscope/benchmarks/coin_flip/coin_flip_adapter.py +128 -0
  63. evalscope/benchmarks/commonsense_qa/__init__.py +0 -0
  64. evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py +32 -0
  65. evalscope/benchmarks/competition_math/competition_math_adapter.py +5 -0
  66. evalscope/benchmarks/data_collection/data_collection_adapter.py +24 -19
  67. evalscope/benchmarks/docvqa/__init__.py +0 -0
  68. evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
  69. evalscope/benchmarks/drivelology/__init__.py +0 -0
  70. evalscope/benchmarks/drivelology/drivelology_binary_adapter.py +170 -0
  71. evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py +254 -0
  72. evalscope/benchmarks/drivelology/drivelology_selection_adapter.py +49 -0
  73. evalscope/benchmarks/drivelology/drivelology_writing_adapter.py +218 -0
  74. evalscope/benchmarks/drop/drop_adapter.py +15 -44
  75. evalscope/benchmarks/drop/utils.py +97 -0
  76. evalscope/benchmarks/frames/frames_adapter.py +2 -1
  77. evalscope/benchmarks/general_arena/general_arena_adapter.py +7 -2
  78. evalscope/benchmarks/general_arena/utils.py +2 -1
  79. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +1 -1
  80. evalscope/benchmarks/general_qa/general_qa_adapter.py +1 -1
  81. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +25 -9
  82. evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
  83. evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +159 -0
  84. evalscope/benchmarks/halu_eval/__init__.py +0 -0
  85. evalscope/benchmarks/halu_eval/halu_eval_adapter.py +128 -0
  86. evalscope/benchmarks/halu_eval/halu_eval_instructions.py +84 -0
  87. evalscope/benchmarks/healthbench/__init__.py +0 -0
  88. evalscope/benchmarks/healthbench/healthbench_adapter.py +282 -0
  89. evalscope/benchmarks/healthbench/utils.py +102 -0
  90. evalscope/benchmarks/hle/hle_adapter.py +3 -2
  91. evalscope/benchmarks/humaneval/humaneval_adapter.py +24 -52
  92. evalscope/benchmarks/humaneval/utils.py +235 -0
  93. evalscope/benchmarks/ifeval/instructions_util.py +2 -3
  94. evalscope/benchmarks/image_edit/__init__.py +0 -0
  95. evalscope/benchmarks/image_edit/gedit/__init__.py +0 -0
  96. evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
  97. evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
  98. evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
  99. evalscope/benchmarks/infovqa/__init__.py +0 -0
  100. evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
  101. evalscope/benchmarks/live_code_bench/evaluate_utils.py +13 -6
  102. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +66 -54
  103. evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py +220 -0
  104. evalscope/benchmarks/logi_qa/__int__.py +0 -0
  105. evalscope/benchmarks/logi_qa/logi_qa_adapter.py +41 -0
  106. evalscope/benchmarks/math_500/math_500_adapter.py +5 -1
  107. evalscope/benchmarks/math_qa/__init__.py +0 -0
  108. evalscope/benchmarks/math_qa/math_qa_adapter.py +35 -0
  109. evalscope/benchmarks/math_verse/__init__.py +0 -0
  110. evalscope/benchmarks/math_verse/math_verse_adapter.py +105 -0
  111. evalscope/benchmarks/math_vision/__init__.py +0 -0
  112. evalscope/benchmarks/math_vision/math_vision_adapter.py +116 -0
  113. evalscope/benchmarks/math_vista/__init__.py +0 -0
  114. evalscope/benchmarks/math_vista/math_vista_adapter.py +114 -0
  115. evalscope/benchmarks/med_mcqa/__init__.py +0 -0
  116. evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py +32 -0
  117. evalscope/benchmarks/minerva_math/__init__.py +0 -0
  118. evalscope/benchmarks/minerva_math/minerva_math_adapter.py +53 -0
  119. evalscope/benchmarks/mm_bench/__init__.py +0 -0
  120. evalscope/benchmarks/mm_bench/mm_bench_adapter.py +99 -0
  121. evalscope/benchmarks/mm_star/__init__.py +0 -0
  122. evalscope/benchmarks/mm_star/mm_star_adapter.py +73 -0
  123. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +1 -1
  124. evalscope/benchmarks/mmmu/__init__.py +0 -0
  125. evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
  126. evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
  127. evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +124 -0
  128. evalscope/benchmarks/mri_mcqa/__init__.py +0 -0
  129. evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py +34 -0
  130. evalscope/benchmarks/multi_if/__init__.py +0 -0
  131. evalscope/benchmarks/multi_if/ifeval.py +3354 -0
  132. evalscope/benchmarks/multi_if/metrics.py +120 -0
  133. evalscope/benchmarks/multi_if/multi_if_adapter.py +161 -0
  134. evalscope/benchmarks/music_trivia/__init__.py +0 -0
  135. evalscope/benchmarks/music_trivia/music_trivia_adapter.py +36 -0
  136. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +7 -6
  137. evalscope/benchmarks/ner/__init__.py +0 -0
  138. evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
  139. evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
  140. evalscope/benchmarks/ner/copious_adapter.py +85 -0
  141. evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
  142. evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
  143. evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
  144. evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
  145. evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
  146. evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
  147. evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
  148. evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
  149. evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
  150. evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
  151. evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
  152. evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
  153. evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
  154. evalscope/benchmarks/ocr_bench/__init__.py +0 -0
  155. evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py +0 -0
  156. evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py +101 -0
  157. evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py +87 -0
  158. evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py +963 -0
  159. evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py +0 -0
  160. evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
  161. evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py +50 -0
  162. evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py +46 -0
  163. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py +0 -0
  164. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt +26 -0
  165. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
  166. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py +481 -0
  167. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py +179 -0
  168. evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py +433 -0
  169. evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py +254 -0
  170. evalscope/benchmarks/olympiad_bench/__init__.py +0 -0
  171. evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py +163 -0
  172. evalscope/benchmarks/olympiad_bench/utils.py +565 -0
  173. evalscope/benchmarks/omni_bench/__init__.py +0 -0
  174. evalscope/benchmarks/omni_bench/omni_bench_adapter.py +86 -0
  175. evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
  176. evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
  177. evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
  178. evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
  179. evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
  180. evalscope/benchmarks/piqa/__init__.py +0 -0
  181. evalscope/benchmarks/piqa/piqa_adapter.py +32 -0
  182. evalscope/benchmarks/poly_math/__init__.py +0 -0
  183. evalscope/benchmarks/poly_math/poly_math_adapter.py +132 -0
  184. evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
  185. evalscope/benchmarks/pope/__init__.py +0 -0
  186. evalscope/benchmarks/pope/pope_adapter.py +112 -0
  187. evalscope/benchmarks/process_bench/process_bench_adapter.py +1 -0
  188. evalscope/benchmarks/pumed_qa/__init__.py +0 -0
  189. evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py +175 -0
  190. evalscope/benchmarks/qasc/__init__.py +0 -0
  191. evalscope/benchmarks/qasc/qasc_adapter.py +35 -0
  192. evalscope/benchmarks/real_world_qa/__init__.py +0 -0
  193. evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py +64 -0
  194. evalscope/benchmarks/sciq/__init__.py +0 -0
  195. evalscope/benchmarks/sciq/sciq_adapter.py +36 -0
  196. evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
  197. evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
  198. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -1
  199. evalscope/benchmarks/simple_vqa/__init__.py +0 -0
  200. evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
  201. evalscope/benchmarks/siqa/__init__.py +0 -0
  202. evalscope/benchmarks/siqa/siqa_adapter.py +39 -0
  203. evalscope/benchmarks/tau_bench/tau2_bench/__init__.py +0 -0
  204. evalscope/benchmarks/tau_bench/tau2_bench/generation.py +158 -0
  205. evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py +146 -0
  206. evalscope/benchmarks/tau_bench/tau_bench/__init__.py +0 -0
  207. evalscope/benchmarks/tau_bench/{generation.py → tau_bench/generation.py} +1 -1
  208. evalscope/benchmarks/tau_bench/{tau_bench_adapter.py → tau_bench/tau_bench_adapter.py} +29 -29
  209. evalscope/benchmarks/text2image/__init__.py +0 -0
  210. evalscope/benchmarks/{aigc/t2i → text2image}/evalmuse_adapter.py +3 -1
  211. evalscope/benchmarks/{aigc/t2i → text2image}/genai_bench_adapter.py +2 -2
  212. evalscope/benchmarks/{aigc/t2i → text2image}/general_t2i_adapter.py +1 -1
  213. evalscope/benchmarks/{aigc/t2i → text2image}/hpdv2_adapter.py +7 -2
  214. evalscope/benchmarks/{aigc/t2i → text2image}/tifa_adapter.py +1 -0
  215. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +3 -3
  216. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +1 -2
  217. evalscope/benchmarks/visu_logic/__init__.py +0 -0
  218. evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
  219. evalscope/benchmarks/wmt/__init__.py +0 -0
  220. evalscope/benchmarks/wmt/wmt24_adapter.py +294 -0
  221. evalscope/benchmarks/zerobench/__init__.py +0 -0
  222. evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
  223. evalscope/cli/start_app.py +7 -1
  224. evalscope/cli/start_perf.py +7 -1
  225. evalscope/config.py +103 -18
  226. evalscope/constants.py +18 -0
  227. evalscope/evaluator/evaluator.py +138 -82
  228. evalscope/metrics/bert_score/__init__.py +0 -0
  229. evalscope/metrics/bert_score/scorer.py +338 -0
  230. evalscope/metrics/bert_score/utils.py +697 -0
  231. evalscope/metrics/llm_judge.py +19 -7
  232. evalscope/metrics/math_parser.py +14 -0
  233. evalscope/metrics/metric.py +317 -13
  234. evalscope/metrics/metrics.py +37 -0
  235. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +0 -0
  236. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +0 -0
  237. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +0 -0
  238. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +0 -0
  239. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +0 -0
  240. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +0 -0
  241. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +0 -0
  242. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +0 -0
  243. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +0 -0
  244. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +0 -0
  245. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +2 -6
  246. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +2 -6
  247. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +2 -6
  248. evalscope/models/image_edit_model.py +125 -0
  249. evalscope/models/model_apis.py +22 -0
  250. evalscope/models/openai_compatible.py +21 -0
  251. evalscope/models/text2image_model.py +2 -2
  252. evalscope/models/utils/openai.py +16 -6
  253. evalscope/perf/arguments.py +26 -4
  254. evalscope/perf/benchmark.py +76 -89
  255. evalscope/perf/http_client.py +31 -16
  256. evalscope/perf/main.py +15 -2
  257. evalscope/perf/plugin/api/base.py +9 -7
  258. evalscope/perf/plugin/api/custom_api.py +13 -58
  259. evalscope/perf/plugin/api/default_api.py +188 -79
  260. evalscope/perf/plugin/api/openai_api.py +85 -20
  261. evalscope/perf/plugin/datasets/base.py +21 -0
  262. evalscope/perf/plugin/datasets/custom.py +2 -3
  263. evalscope/perf/plugin/datasets/flickr8k.py +2 -2
  264. evalscope/perf/plugin/datasets/kontext_bench.py +2 -2
  265. evalscope/perf/plugin/datasets/line_by_line.py +2 -3
  266. evalscope/perf/plugin/datasets/longalpaca.py +2 -3
  267. evalscope/perf/plugin/datasets/openqa.py +2 -4
  268. evalscope/perf/plugin/datasets/random_dataset.py +1 -3
  269. evalscope/perf/plugin/datasets/random_vl_dataset.py +2 -2
  270. evalscope/perf/utils/benchmark_util.py +43 -27
  271. evalscope/perf/utils/db_util.py +14 -19
  272. evalscope/perf/utils/local_server.py +3 -44
  273. evalscope/perf/utils/log_utils.py +21 -6
  274. evalscope/report/__init__.py +13 -3
  275. evalscope/report/combinator.py +91 -20
  276. evalscope/report/generator.py +8 -87
  277. evalscope/report/report.py +8 -4
  278. evalscope/run.py +13 -5
  279. evalscope/third_party/toolbench_static/llm/swift_infer.py +0 -4
  280. evalscope/utils/argument_utils.py +1 -1
  281. evalscope/utils/chat_service.py +1 -1
  282. evalscope/utils/function_utils.py +249 -12
  283. evalscope/utils/import_utils.py +73 -1
  284. evalscope/utils/io_utils.py +132 -7
  285. evalscope/utils/json_schema.py +25 -2
  286. evalscope/utils/logger.py +69 -18
  287. evalscope/utils/model_utils.py +4 -3
  288. evalscope/utils/multi_choices.py +39 -7
  289. evalscope/utils/ner.py +377 -0
  290. evalscope/version.py +2 -2
  291. {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/METADATA +252 -408
  292. {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/RECORD +290 -154
  293. {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/WHEEL +1 -1
  294. {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/top_level.txt +0 -1
  295. evalscope/api/mixin/dataset_mixin.py +0 -105
  296. evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +0 -44
  297. tests/__init__.py +0 -1
  298. tests/aigc/__init__.py +0 -1
  299. tests/aigc/test_t2i.py +0 -142
  300. tests/benchmark/__init__.py +0 -1
  301. tests/benchmark/test_eval.py +0 -386
  302. tests/cli/__init__.py +0 -1
  303. tests/cli/test_all.py +0 -229
  304. tests/cli/test_collection.py +0 -96
  305. tests/cli/test_custom.py +0 -268
  306. tests/perf/__init__.py +0 -1
  307. tests/perf/test_perf.py +0 -176
  308. tests/rag/test_clip_benchmark.py +0 -90
  309. tests/rag/test_mteb.py +0 -213
  310. tests/rag/test_ragas.py +0 -128
  311. tests/swift/__init__.py +0 -1
  312. tests/swift/test_run_swift_eval.py +0 -146
  313. tests/swift/test_run_swift_vlm_eval.py +0 -128
  314. tests/swift/test_run_swift_vlm_jugde_eval.py +0 -157
  315. tests/test_run_all.py +0 -12
  316. tests/utils.py +0 -13
  317. tests/vlm/__init__.py +0 -1
  318. tests/vlm/test_vlmeval.py +0 -102
  319. /evalscope/benchmarks/{aigc → aa_lcr}/__init__.py +0 -0
  320. /evalscope/benchmarks/{aigc/i2i → ai2d}/__init__.py +0 -0
  321. /evalscope/benchmarks/{aigc/t2i → amc}/__init__.py +0 -0
  322. {tests/rag → evalscope/benchmarks/bfcl/v3}/__init__.py +0 -0
  323. {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info}/entry_points.txt +0 -0
  324. {evalscope-1.0.0.dist-info → evalscope-1.2.0.dist-info/licenses}/LICENSE +0 -0
@@ -1,29 +1,266 @@
1
+ import asyncio
1
2
  import threading
3
+ import time
4
+ from concurrent.futures import ThreadPoolExecutor, wait
5
+ from contextlib import contextmanager
2
6
  from functools import wraps
7
+ from tqdm import tqdm
8
+ from typing import Any, Awaitable, Callable, List, Optional, Sequence, TypeVar, Union
3
9
 
10
+ from evalscope.utils.logger import get_logger
4
11
 
5
- def run_once(func):
6
- """Decorator to ensure a function is only run once."""
7
- has_run = False
8
- result = None
12
+ logger = get_logger()
9
13
 
14
+ T = TypeVar('T')
15
+ R = TypeVar('R')
16
+
17
+ # Global lock to safely create per-instance locks in decorators
18
+ _THREAD_SAFE_GLOBAL_LOCK = threading.RLock()
19
+
20
+
21
+ def thread_safe(func: Callable[..., T]) -> Callable[..., T]:
22
+ """Thread-safe decorator.
23
+ - If decorating a bound method, uses a per-instance, per-method lock.
24
+ - If decorating a function, uses a function-scoped lock.
25
+ """
26
+ func_lock = threading.RLock()
27
+ lock_attr_name = f'__lock_{func.__name__}'
28
+
29
+ @wraps(func)
10
30
  def wrapper(*args, **kwargs):
11
- nonlocal has_run, result
12
- if not has_run:
13
- result = func(*args, **kwargs)
14
- has_run = True
15
- return result
31
+ # Prefer per-instance lock if the first arg looks like 'self'
32
+ if args and hasattr(args[0], '__dict__'):
33
+ self_obj = args[0]
34
+ lock = getattr(self_obj, lock_attr_name, None)
35
+ if lock is None:
36
+ with _THREAD_SAFE_GLOBAL_LOCK:
37
+ lock = getattr(self_obj, lock_attr_name, None)
38
+ if lock is None:
39
+ lock = threading.RLock()
40
+ setattr(self_obj, lock_attr_name, lock)
41
+ else:
42
+ lock = func_lock
43
+
44
+ with lock:
45
+ return func(*args, **kwargs)
16
46
 
17
47
  return wrapper
18
48
 
19
49
 
20
- def thread_safe(func):
21
- """Thread-safe decorator for functions that need to be executed in a thread-safe manner."""
50
+ def run_once(func: Callable[..., T]) -> Callable[..., T]:
51
+ """Decorator to ensure a function is executed at most once across threads."""
22
52
  lock = threading.RLock()
53
+ has_run: bool = False
54
+ result: Optional[T] = None
23
55
 
24
56
  @wraps(func)
25
57
  def wrapper(*args, **kwargs):
58
+ nonlocal has_run, result
59
+ if has_run:
60
+ return result
61
+ # Double-checked locking to avoid redundant locking on hot path
26
62
  with lock:
27
- return func(*args, **kwargs)
63
+ if not has_run:
64
+ result = func(*args, **kwargs)
65
+ has_run = True
66
+ return result
28
67
 
29
68
  return wrapper
69
+
70
+
71
+ def retry_func(retries=3, sleep_interval=0):
72
+ """A decorator that retries a function call up to `retries` times if an exception occurs."""
73
+
74
+ def decorator(func):
75
+
76
+ @wraps(func)
77
+ def wrapper(*args, **kwargs):
78
+ last_exception = None
79
+ for attempt in range(retries):
80
+ try:
81
+ return func(*args, **kwargs)
82
+ except Exception as e:
83
+ last_exception = e
84
+ if sleep_interval > 0:
85
+ time.sleep(sleep_interval)
86
+ raise last_exception
87
+
88
+ return wrapper
89
+
90
+ return decorator
91
+
92
+
93
+ @contextmanager
94
+ def retry_context(retries=3, sleep_interval=0):
95
+ """A context manager that retries the code block up to `retries` times if an exception occurs."""
96
+ last_exception = None
97
+ for attempt in range(retries):
98
+ try:
99
+ yield
100
+ return # If no exception, exit successfully
101
+ except Exception as e:
102
+ last_exception = e
103
+ if sleep_interval > 0:
104
+ time.sleep(sleep_interval)
105
+ if attempt == retries - 1: # Last attempt
106
+ break
107
+ raise last_exception
108
+
109
+
110
+ class AsyncioLoopRunner:
111
+ """Singleton background asyncio loop runner for sync→async bridging."""
112
+ _instance: Optional['AsyncioLoopRunner'] = None
113
+ _inst_lock = threading.Lock()
114
+
115
+ def __init__(self) -> None:
116
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
117
+ self._thread: Optional[threading.Thread] = None
118
+ self._start_loop()
119
+
120
+ def _start_loop(self) -> None:
121
+ loop = asyncio.new_event_loop()
122
+ self._loop = loop
123
+
124
+ def run_loop() -> None:
125
+ asyncio.set_event_loop(loop)
126
+ loop.run_forever()
127
+
128
+ self._thread = threading.Thread(target=run_loop, daemon=True, name='AsyncioLoopRunner')
129
+ self._thread.start()
130
+
131
+ @classmethod
132
+ def instance(cls) -> 'AsyncioLoopRunner':
133
+ if cls._instance is not None:
134
+ return cls._instance
135
+ with cls._inst_lock:
136
+ if cls._instance is None:
137
+ cls._instance = AsyncioLoopRunner()
138
+ return cls._instance
139
+
140
+ @classmethod
141
+ def run(cls, coro: Awaitable[T], timeout: Optional[float] = None) -> T:
142
+ """Submit a coroutine to the background loop and wait for result."""
143
+ inst = cls.instance()
144
+ fut = asyncio.run_coroutine_threadsafe(coro, inst._loop)
145
+ return fut.result(timeout=timeout)
146
+
147
+ @property
148
+ def loop(self) -> Optional[asyncio.AbstractEventLoop]:
149
+ """Access the underlying event loop (read-only use)."""
150
+ return self._loop
151
+
152
+ def stop(self, join_timeout: float = 5.0) -> None:
153
+ """Optional shutdown of the background loop (generally not needed)."""
154
+ if not self._loop:
155
+ return
156
+ self._loop.call_soon_threadsafe(self._loop.stop)
157
+ if self._thread:
158
+ self._thread.join(timeout=join_timeout)
159
+
160
+
161
+ def run_in_threads_with_progress(
162
+ items: Sequence[T],
163
+ worker: Callable[[T], R],
164
+ *,
165
+ desc: str,
166
+ max_workers: int,
167
+ heartbeat_sec: int,
168
+ on_result: Optional[Callable[[T, R], None]] = None,
169
+ on_error: Optional[Callable[[T, Exception], None]] = None,
170
+ filter_none_results: bool = False,
171
+ ) -> List[R]:
172
+ """
173
+ Execute a collection of tasks concurrently with a ThreadPoolExecutor while
174
+ displaying a tqdm progress bar and emitting periodic heartbeat logs.
175
+
176
+ Key behaviors:
177
+ - Concurrency: Uses up to `min(len(items), max_workers)` threads.
178
+ - Progress: A tqdm bar advances when each task finishes (success or failure).
179
+ - Heartbeat: If no tasks finish within `heartbeat_sec`, a status line is logged.
180
+ - Ordering: Results are appended in completion order (not the original order).
181
+ - Error handling:
182
+ * If `on_error` is provided, it is called for each failed item; execution continues
183
+ unless `on_error` itself raises.
184
+ * If `on_error` is None, the first exception is raised immediately and stops processing.
185
+ - Callbacks:
186
+ * `on_result(item, result)` is called after a successful result is obtained.
187
+ * Both callbacks run in the main thread (not worker threads).
188
+
189
+ Args:
190
+ items: A sequence of items (inputs) to process. Converted to a list internally.
191
+ worker: A callable executed in threads to process a single item and return a result.
192
+ desc: A short text shown as the tqdm progress bar description.
193
+ max_workers: Upper bound on the number of concurrent threads.
194
+ heartbeat_sec: Interval (in seconds) to wait before emitting a heartbeat log if
195
+ no tasks complete in that window.
196
+ on_result: Optional callback invoked as on_result(item, result) after success.
197
+ on_error: Optional callback invoked as on_error(item, exception) on failure. If omitted,
198
+ the exception is propagated and the function terminates early.
199
+
200
+ Returns:
201
+ A list of results collected as tasks complete (completion order).
202
+ If some tasks fail and `on_error` is provided (and does not re-raise), those failures
203
+ are skipped and not included in the returned results.
204
+
205
+ Raises:
206
+ Exception: Propagates the first task exception if `on_error` is not provided, or if
207
+ `on_error` re-raises.
208
+
209
+ Notes:
210
+ - The function is blocking until all tasks complete or an exception is propagated.
211
+ - Use `on_error` to implement "best-effort" processing where failures are logged
212
+ and the rest continue.
213
+ """
214
+ # Defensive copy to avoid consuming a generator multiple times and to compute pool size.
215
+ pending_items: List[T] = list(items)
216
+ if not pending_items:
217
+ return []
218
+
219
+ # Include indices to ensure results are returned in input order
220
+ indexed_items = list(enumerate(items))
221
+ results: List[Optional[R]] = [None] * len(items) # Preallocate results list
222
+
223
+ # Bound the pool by actual workload size for efficiency.
224
+ with ThreadPoolExecutor(max_workers=min(len(indexed_items), max_workers)) as executor:
225
+ # Submit all tasks up-front and map futures back to their originating item.
226
+ future_to_index = {executor.submit(worker, item): index for index, item in indexed_items}
227
+
228
+ # Progress bar reflects total number of submitted tasks; updated per finished future.
229
+ with tqdm(total=len(indexed_items), desc=desc, mininterval=1, dynamic_ncols=True) as pbar:
230
+ # Track unfinished futures and poll with a timeout to enable heartbeat logs.
231
+ pending = set(future_to_index.keys())
232
+ while pending:
233
+ # Wait with timeout to detect stalls and emit heartbeats proactively.
234
+ done, not_done = wait(pending, timeout=heartbeat_sec)
235
+ if not done:
236
+ # Heartbeat when nothing has completed within the window.
237
+ logger.info(f'{desc} still processing... pending={len(not_done)}')
238
+ continue
239
+
240
+ # Consume completed futures.
241
+ for future in done:
242
+ index = future_to_index[future]
243
+ try:
244
+ res = future.result()
245
+ results[index] = res # Store result at the correct index
246
+ # Invoke success callback in caller thread (not in worker).
247
+ if on_result is not None:
248
+ on_result(items[index], res)
249
+ except Exception as exc:
250
+ # Delegate failure handling to on_error if provided; otherwise bubble up.
251
+ if on_error is not None:
252
+ on_error(items[index], exc)
253
+ else:
254
+ raise
255
+ finally:
256
+ # Always advance progress for completed futures (success or failure).
257
+ pbar.update(1)
258
+
259
+ # Continue polling remaining futures.
260
+ pending = not_done
261
+
262
+ # Return results, which are now guaranteed to be in input order
263
+ if filter_none_results:
264
+ # Filter out None results if on_error was used and some tasks failed
265
+ results = [res for res in results if res is not None]
266
+ return results
@@ -5,13 +5,85 @@ import importlib
5
5
  import os
6
6
  from itertools import chain
7
7
  from types import ModuleType
8
- from typing import Any
8
+ from typing import Any, Optional, Union
9
9
 
10
+ from evalscope.constants import IS_BUILD_DOC
10
11
  from .logger import get_logger
11
12
 
12
13
  logger = get_logger() # pylint: disable=invalid-name
13
14
 
14
15
 
16
+ def check_import(
17
+ module_name: Union[str, list[str]],
18
+ package: Optional[Union[str, list[str]]] = None,
19
+ raise_warning: bool = True,
20
+ raise_error: bool = False,
21
+ feature_name: Optional[str] = 'this feature',
22
+ ) -> bool:
23
+ """Check if a module or list of modules can be imported.
24
+
25
+ Args:
26
+ module_name (Union[str, list[str]]): The name(s) of the module(s) to check.
27
+ package (Union[str, list[str]], optional): The package(s) to install if the module(s) are not found.
28
+ Defaults to None.
29
+ raise_error (bool, optional): Whether to raise an error if any module is not found. Defaults to False.
30
+ raise_warning (bool, optional): Whether to log a warning if any module is not found. Defaults to True.
31
+ feature_name (str, optional): The feature name that requires the module(s). Used in the warning/error message.
32
+ Defaults to 'this feature'.
33
+
34
+ Returns:
35
+ bool: True if all modules can be imported, False otherwise.
36
+ """
37
+ # Convert single strings to lists for uniform processing
38
+ if isinstance(module_name, str):
39
+ module_names = [module_name]
40
+ else:
41
+ module_names = module_name
42
+
43
+ if package is None:
44
+ packages = [None] * len(module_names)
45
+ elif isinstance(package, str):
46
+ packages = [package] * len(module_names)
47
+ else:
48
+ packages = package
49
+ # Ensure packages list has same length as module_names
50
+ if len(packages) < len(module_names):
51
+ packages.extend([None] * (len(module_names) - len(packages)))
52
+
53
+ missing_modules = []
54
+ missing_packages = []
55
+
56
+ for i, mod_name in enumerate(module_names):
57
+ try:
58
+ importlib.import_module(mod_name)
59
+ except ImportError:
60
+ missing_modules.append(mod_name)
61
+ if i < len(packages) and packages[i]:
62
+ missing_packages.append(packages[i])
63
+
64
+ if missing_modules:
65
+ if len(missing_modules) == 1:
66
+ error_msg = f'`{missing_modules[0]}` not found.'
67
+ else:
68
+ error_msg = f'The following modules are not found: {", ".join(f"`{mod}`" for mod in missing_modules)}.'
69
+
70
+ if missing_packages:
71
+ if len(missing_packages) == 1:
72
+ error_msg += f' Please run `pip install {missing_packages[0]}` to use {feature_name}.'
73
+ else:
74
+ unique_packages = list(dict.fromkeys(missing_packages)) # Remove duplicates while preserving order
75
+ error_msg += f' Please run `pip install {" ".join(unique_packages)}` to use {feature_name}.'
76
+
77
+ if raise_warning:
78
+ logger.warning(error_msg)
79
+
80
+ if not IS_BUILD_DOC and raise_error:
81
+ raise ImportError(error_msg)
82
+ return False
83
+
84
+ return True
85
+
86
+
15
87
  class _LazyModule(ModuleType):
16
88
  """
17
89
  Module class that surfaces all objects but only performs associated imports when the objects are requested.
@@ -1,6 +1,7 @@
1
1
  import base64
2
2
  import csv
3
3
  import hashlib
4
+ import io
4
5
  import json
5
6
  import jsonlines as jsonl
6
7
  import os
@@ -8,8 +9,10 @@ import re
8
9
  import string
9
10
  import unicodedata
10
11
  import yaml
12
+ from datetime import datetime
11
13
  from io import BytesIO
12
14
  from PIL import Image
15
+ from typing import Tuple
13
16
 
14
17
  from evalscope.constants import DumpMode
15
18
  from evalscope.utils.logger import get_logger
@@ -122,6 +125,9 @@ def dump_jsonl_data(data_list, jsonl_file, dump_mode=DumpMode.OVERWRITE):
122
125
  if not isinstance(data_list, list):
123
126
  data_list = [data_list]
124
127
 
128
+ # Convert non-serializable types to serializable ones
129
+ data_list = convert_normal_types(data_list)
130
+
125
131
  if dump_mode == DumpMode.OVERWRITE:
126
132
  dump_mode = 'w'
127
133
  elif dump_mode == DumpMode.APPEND:
@@ -168,6 +174,24 @@ def csv_to_list(csv_file) -> list:
168
174
  return res_list
169
175
 
170
176
 
177
+ def tsv_to_list(tsv_file) -> list:
178
+ """
179
+ Read tsv file to list.
180
+
181
+ Args:
182
+ tsv_file: tsv file path.
183
+
184
+ Returns:
185
+ list: list of lines. Each line is a dict.
186
+ """
187
+ res_list = []
188
+ with open(tsv_file, 'r', encoding='utf-8') as f:
189
+ reader = csv.DictReader(f, delimiter='\t')
190
+ for row in reader:
191
+ res_list.append(row)
192
+ return res_list
193
+
194
+
171
195
  def csv_to_jsonl(csv_file, jsonl_file):
172
196
  """
173
197
  Convert csv file to jsonl file.
@@ -283,22 +307,64 @@ def get_valid_list(input_list, candidate_list):
283
307
  [i for i in input_list if i not in candidate_list]
284
308
 
285
309
 
286
- def PIL_to_base64(image: Image.Image, format: str = 'JPEG') -> str:
310
+ def PIL_to_base64(image: Image.Image, format: str = 'JPEG', add_header: bool = False) -> str:
287
311
  """
288
312
  Convert a PIL Image to a base64 encoded string.
289
313
 
290
314
  Args:
291
315
  image (Image.Image): The PIL Image to convert.
292
316
  format (str): The format to save the image in. Default is 'JPEG'.
317
+ add_header (bool): Whether to add the base64 header. Default is False.
318
+
293
319
  Returns:
294
320
  str: Base64 encoded string of the image.
295
321
  """
296
322
  buffered = BytesIO()
297
323
  image.save(buffered, format=format)
298
324
  img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
325
+ if add_header:
326
+ img_str = f'data:image/{format.lower()};base64,{img_str}'
299
327
  return img_str
300
328
 
301
329
 
330
+ def bytes_to_base64(bytes_data: bytes, *, format: str = 'png', add_header: bool = False, content_type='image') -> str:
331
+ """Convert bytes to a base64 encoded string.
332
+
333
+ Args:
334
+ bytes_data (bytes): The bytes to convert.
335
+ format (str): The format of the image. Default is 'png'.
336
+ add_header (bool): Whether to add the base64 header. Default is False.
337
+ content_type (str): The type of the data, 'image' or 'audio'. Default is 'image'.
338
+
339
+ Returns:
340
+ str: Base64 encoded string of the bytes.
341
+ """
342
+ base64_str = base64.b64encode(bytes_data).decode('utf-8')
343
+ if add_header:
344
+ base64_str = f'data:{content_type}/{format};base64,{base64_str}'
345
+ return base64_str
346
+
347
+
348
+ def base64_to_PIL(base64_str):
349
+ """Convert a base64 encoded string to a PIL Image.
350
+
351
+ Args:
352
+ base64_str (str): The base64 encoded string.
353
+
354
+ Returns:
355
+ Image.Image: The decoded PIL Image.
356
+ """
357
+ # remove header
358
+ if ',' in base64_str:
359
+ base64_str = base64_str.split(',', 1)[1]
360
+
361
+ # decode
362
+ img_data = base64.b64decode(base64_str)
363
+ img_file = io.BytesIO(img_data)
364
+ img = Image.open(img_file)
365
+ return img
366
+
367
+
302
368
  def safe_filename(s: str, max_length: int = 255) -> str:
303
369
  """
304
370
  Convert a string into a safe filename by removing or replacing unsafe characters.
@@ -351,11 +417,13 @@ def safe_filename(s: str, max_length: int = 255) -> str:
351
417
  return s
352
418
 
353
419
 
354
- def convert_numpy_types(obj):
355
- """Recursively convert numpy types to native Python types for JSON serialization."""
420
+ def convert_normal_types(obj):
421
+ """Recursively convert numpy types and datetime objects to native Python types for JSON serialization."""
356
422
  import numpy as np
357
423
 
358
- if isinstance(obj, np.bool_):
424
+ if isinstance(obj, datetime):
425
+ return obj.isoformat()
426
+ elif isinstance(obj, np.bool_):
359
427
  return bool(obj)
360
428
  elif isinstance(obj, np.integer):
361
429
  return int(obj)
@@ -364,10 +432,67 @@ def convert_numpy_types(obj):
364
432
  elif isinstance(obj, np.ndarray):
365
433
  return obj.tolist()
366
434
  elif isinstance(obj, dict):
367
- return {key: convert_numpy_types(value) for key, value in obj.items()}
435
+ return {key: convert_normal_types(value) for key, value in obj.items()}
368
436
  elif isinstance(obj, list):
369
- return [convert_numpy_types(item) for item in obj]
437
+ return [convert_normal_types(item) for item in obj]
370
438
  elif isinstance(obj, tuple):
371
- return tuple(convert_numpy_types(item) for item in obj)
439
+ return tuple(convert_normal_types(item) for item in obj)
440
+ elif isinstance(obj, os.PathLike):
441
+ return str(obj)
372
442
  else:
373
443
  return obj
444
+
445
+
446
+ def compress_image_to_limit(image_bytes: bytes, max_bytes: int = 10_000_000) -> Tuple[bytes, str]:
447
+ """
448
+ Ensure image bytes are under max_bytes by re-encoding to JPEG with quality reduction
449
+ and optional downscaling. Returns (processed_bytes, format_str).
450
+ If the original bytes are already below the limit, returns them as PNG.
451
+ """
452
+ if len(image_bytes) <= max_bytes:
453
+ return image_bytes, 'png'
454
+
455
+ try:
456
+ img = Image.open(BytesIO(image_bytes))
457
+ except Exception as exc:
458
+ logger.warning(f'Failed to open image bytes with PIL, sending original image; may exceed API limit: {exc}')
459
+ return image_bytes, 'png'
460
+
461
+ # Convert to RGB for JPEG if needed
462
+ if img.mode not in ('RGB', 'L'):
463
+ img = img.convert('RGB')
464
+
465
+ def encode_jpeg(source: Image.Image, quality: int) -> bytes:
466
+ buf = BytesIO()
467
+ source.save(buf, format='JPEG', quality=quality, optimize=True, progressive=True)
468
+ return buf.getvalue()
469
+
470
+ # Start with moderate quality and reduce
471
+ quality: int = 85
472
+ out: bytes = encode_jpeg(img, quality)
473
+ quality_floor: int = 40
474
+
475
+ while len(out) > max_bytes and quality > quality_floor:
476
+ quality -= 10
477
+ out = encode_jpeg(img, quality)
478
+
479
+ # If still too large, progressively downscale
480
+ min_side_floor: int = 256
481
+ scale: float = 0.9
482
+ while len(out) > max_bytes and min(img.size) > min_side_floor:
483
+ new_w = max(min_side_floor, int(img.width * scale))
484
+ new_h = max(min_side_floor, int(img.height * scale))
485
+ if (new_w, new_h) == img.size:
486
+ break
487
+ img = img.resize((new_w, new_h), Image.LANCZOS)
488
+ out = encode_jpeg(img, quality)
489
+
490
+ if len(out) > max_bytes:
491
+ logger.warning(f'Image remains above limit after compression: size={len(out)} bytes (limit={max_bytes}).')
492
+ else:
493
+ logger.info(
494
+ f'Compressed image from {len(image_bytes)} to {len(out)} bytes; '
495
+ f'quality={quality}, size={img.width}x{img.height}.'
496
+ )
497
+
498
+ return out, 'jpeg'
@@ -4,7 +4,7 @@ from copy import deepcopy
4
4
  from dataclasses import is_dataclass
5
5
  from datetime import date, datetime, time
6
6
  from enum import EnumMeta
7
- from pydantic import BaseModel, Field
7
+ from pydantic import BaseModel, Field, field_validator, model_validator
8
8
  from typing import (
9
9
  Any,
10
10
  Dict,
@@ -59,6 +59,28 @@ class JSONSchema(BaseModel):
59
59
  required: Optional[List[str]] = Field(default=None)
60
60
  """Required fields for object parameters."""
61
61
 
62
+ @model_validator(mode='before')
63
+ def convert_type_before_validation(cls, values):
64
+ values = deepcopy(values)
65
+
66
+ def recursive_convert_type(obj):
67
+ if isinstance(obj, dict):
68
+ # Convert 'type' field if it's a string
69
+ if 'type' in obj and isinstance(obj['type'], str):
70
+ try:
71
+ obj['type'] = python_type_to_json_type(obj['type'])
72
+ except ValueError:
73
+ # If conversion fails, leave it as is
74
+ pass
75
+ # Recursively process nested structures
76
+ for k, v in obj.items():
77
+ obj[k] = recursive_convert_type(v)
78
+ elif isinstance(obj, list):
79
+ return [recursive_convert_type(item) for item in obj]
80
+ return obj
81
+
82
+ return recursive_convert_type(values)
83
+
62
84
 
63
85
  def json_schema(t: Type[Any]) -> JSONSchema:
64
86
  """Provide a JSON Schema for the specified type.
@@ -152,6 +174,8 @@ def cls_json_schema(cls: Type[Any]) -> JSONSchema:
152
174
 
153
175
 
154
176
  def python_type_to_json_type(python_type: Optional[str]) -> JSONType:
177
+ if python_type is not None and python_type in get_args(JSONType):
178
+ return python_type
155
179
  if python_type == 'str':
156
180
  return 'string'
157
181
  elif python_type == 'int':
@@ -205,4 +229,3 @@ def resolve_schema_references(schema: Dict[str, Any]) -> Dict[str, Any]:
205
229
  return obj
206
230
 
207
231
  return cast(Dict[str, Any], _resolve_refs(schema))
208
- return cast(Dict[str, Any], _resolve_refs(schema))