evalscope 0.10.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (606) hide show
  1. evalscope/__init__.py +4 -1
  2. evalscope/api/benchmark/__init__.py +11 -0
  3. evalscope/api/benchmark/adapters/__init__.py +7 -0
  4. evalscope/api/benchmark/adapters/agent_adapter.py +8 -0
  5. evalscope/api/benchmark/adapters/default_data_adapter.py +754 -0
  6. evalscope/api/benchmark/adapters/image_edit_adapter.py +82 -0
  7. evalscope/api/benchmark/adapters/multi_choice_adapter.py +86 -0
  8. evalscope/api/benchmark/adapters/ner_adapter.py +212 -0
  9. evalscope/api/benchmark/adapters/text2image_adapter.py +157 -0
  10. evalscope/api/benchmark/adapters/vision_language_adapter.py +8 -0
  11. evalscope/api/benchmark/benchmark.py +404 -0
  12. evalscope/api/benchmark/meta.py +124 -0
  13. evalscope/api/dataset/__init__.py +2 -0
  14. evalscope/api/dataset/dataset.py +370 -0
  15. evalscope/api/dataset/loader.py +266 -0
  16. evalscope/api/dataset/utils.py +143 -0
  17. evalscope/api/evaluator/__init__.py +3 -0
  18. evalscope/api/evaluator/cache.py +382 -0
  19. evalscope/api/evaluator/evaluator.py +61 -0
  20. evalscope/api/evaluator/state.py +280 -0
  21. evalscope/api/filter/__init__.py +1 -0
  22. evalscope/api/filter/filter.py +72 -0
  23. evalscope/api/messages/__init__.py +12 -0
  24. evalscope/api/messages/chat_message.py +248 -0
  25. evalscope/api/messages/content.py +102 -0
  26. evalscope/api/messages/utils.py +35 -0
  27. evalscope/api/metric/__init__.py +2 -0
  28. evalscope/api/metric/metric.py +60 -0
  29. evalscope/api/metric/scorer.py +113 -0
  30. evalscope/api/mixin/__init__.py +2 -0
  31. evalscope/api/mixin/llm_judge_mixin.py +170 -0
  32. evalscope/api/mixin/sandbox_mixin.py +182 -0
  33. evalscope/api/model/__init__.py +12 -0
  34. evalscope/api/model/generate_config.py +161 -0
  35. evalscope/api/model/model.py +386 -0
  36. evalscope/api/model/model_output.py +285 -0
  37. evalscope/api/registry.py +182 -0
  38. evalscope/api/tool/__init__.py +3 -0
  39. evalscope/api/tool/tool_call.py +101 -0
  40. evalscope/api/tool/tool_info.py +173 -0
  41. evalscope/api/tool/utils.py +64 -0
  42. evalscope/app/__init__.py +28 -0
  43. evalscope/app/app.py +38 -0
  44. evalscope/app/arguments.py +11 -0
  45. evalscope/app/constants.py +22 -0
  46. evalscope/app/ui/__init__.py +20 -0
  47. evalscope/app/ui/app_ui.py +53 -0
  48. evalscope/app/ui/multi_model.py +353 -0
  49. evalscope/app/ui/sidebar.py +42 -0
  50. evalscope/app/ui/single_model.py +220 -0
  51. evalscope/app/ui/visualization.py +36 -0
  52. evalscope/app/utils/data_utils.py +195 -0
  53. evalscope/app/utils/env_utils.py +12 -0
  54. evalscope/app/utils/localization.py +221 -0
  55. evalscope/app/utils/text_utils.py +119 -0
  56. evalscope/app/utils/visualization.py +96 -0
  57. evalscope/arguments.py +32 -9
  58. evalscope/backend/opencompass/api_meta_template.py +2 -1
  59. evalscope/backend/opencompass/backend_manager.py +10 -7
  60. evalscope/backend/rag_eval/__init__.py +1 -1
  61. evalscope/backend/rag_eval/backend_manager.py +23 -6
  62. evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +33 -21
  63. evalscope/backend/rag_eval/clip_benchmark/task_template.py +8 -4
  64. evalscope/backend/rag_eval/cmteb/arguments.py +14 -1
  65. evalscope/backend/rag_eval/cmteb/task_template.py +19 -3
  66. evalscope/backend/rag_eval/cmteb/tasks/CustomTask.py +1 -1
  67. evalscope/backend/rag_eval/ragas/arguments.py +0 -1
  68. evalscope/backend/rag_eval/ragas/task_template.py +2 -1
  69. evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +2 -1
  70. evalscope/backend/rag_eval/ragas/tasks/build_transform.py +7 -4
  71. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +9 -3
  72. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +2 -6
  73. evalscope/backend/rag_eval/utils/embedding.py +125 -32
  74. evalscope/backend/rag_eval/utils/llm.py +16 -16
  75. evalscope/backend/vlm_eval_kit/backend_manager.py +8 -3
  76. evalscope/benchmarks/__init__.py +17 -5
  77. evalscope/benchmarks/aa_lcr/__init__.py +0 -0
  78. evalscope/benchmarks/aa_lcr/aa_lcr_adapter.py +205 -0
  79. evalscope/benchmarks/ai2d/__init__.py +0 -0
  80. evalscope/benchmarks/ai2d/ai2d_adapter.py +54 -0
  81. evalscope/benchmarks/aime/__init__.py +0 -0
  82. evalscope/benchmarks/aime/aime24_adapter.py +55 -0
  83. evalscope/benchmarks/aime/aime25_adapter.py +181 -0
  84. evalscope/benchmarks/aime/grader.py +307 -0
  85. evalscope/{metrics/math_accuracy.py → benchmarks/aime/math_normalize.py} +61 -72
  86. evalscope/benchmarks/alpaca_eval/__init__.py +0 -0
  87. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +133 -0
  88. evalscope/benchmarks/amc/__init__.py +0 -0
  89. evalscope/benchmarks/amc/amc_adapter.py +51 -0
  90. evalscope/benchmarks/arc/arc_adapter.py +34 -149
  91. evalscope/benchmarks/arena_hard/__init__.py +0 -0
  92. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +149 -0
  93. evalscope/benchmarks/arena_hard/utils.py +186 -0
  94. evalscope/benchmarks/bbh/bbh_adapter.py +117 -157
  95. evalscope/benchmarks/bfcl/__init__.py +0 -0
  96. evalscope/benchmarks/bfcl/v3/__init__.py +0 -0
  97. evalscope/benchmarks/bfcl/v3/bfcl_v3_adapter.py +370 -0
  98. evalscope/benchmarks/bfcl/v3/generation.py +222 -0
  99. evalscope/benchmarks/bfcl/v3/utils.py +23 -0
  100. evalscope/benchmarks/bfcl/v4/__init__.py +0 -0
  101. evalscope/benchmarks/bfcl/v4/bfcl_v4_adapter.py +229 -0
  102. evalscope/benchmarks/bfcl/v4/utils.py +410 -0
  103. evalscope/benchmarks/biomix_qa/__init__.py +0 -0
  104. evalscope/benchmarks/biomix_qa/biomix_qa_adapter.py +36 -0
  105. evalscope/benchmarks/blink/__init__.py +0 -0
  106. evalscope/benchmarks/blink/blink_adapter.py +61 -0
  107. evalscope/benchmarks/ceval/ceval_adapter.py +93 -174
  108. evalscope/benchmarks/chartqa/__init__.py +0 -0
  109. evalscope/benchmarks/chartqa/chartqa_adapter.py +80 -0
  110. evalscope/benchmarks/chartqa/utils.py +38 -0
  111. evalscope/benchmarks/chinese_simple_qa/__init__.py +0 -0
  112. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +170 -0
  113. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +34 -140
  114. evalscope/benchmarks/coin_flip/__init__.py +0 -0
  115. evalscope/benchmarks/coin_flip/coin_flip_adapter.py +128 -0
  116. evalscope/benchmarks/commonsense_qa/__init__.py +0 -0
  117. evalscope/benchmarks/commonsense_qa/commonsense_qa_adapter.py +32 -0
  118. evalscope/benchmarks/competition_math/competition_math_adapter.py +64 -112
  119. evalscope/benchmarks/data_collection/__init__.py +0 -0
  120. evalscope/benchmarks/data_collection/data_collection_adapter.py +215 -0
  121. evalscope/benchmarks/docmath/__init__.py +0 -0
  122. evalscope/benchmarks/docmath/docmath_adapter.py +143 -0
  123. evalscope/benchmarks/docmath/utils.py +219 -0
  124. evalscope/benchmarks/docvqa/__init__.py +0 -0
  125. evalscope/benchmarks/docvqa/docvqa_adapter.py +67 -0
  126. evalscope/benchmarks/drivelology/__init__.py +0 -0
  127. evalscope/benchmarks/drivelology/drivelology_binary_adapter.py +170 -0
  128. evalscope/benchmarks/drivelology/drivelology_multilabel_adapter.py +254 -0
  129. evalscope/benchmarks/drivelology/drivelology_selection_adapter.py +49 -0
  130. evalscope/benchmarks/drivelology/drivelology_writing_adapter.py +218 -0
  131. evalscope/benchmarks/drop/__init__.py +0 -0
  132. evalscope/benchmarks/drop/drop_adapter.py +155 -0
  133. evalscope/benchmarks/drop/utils.py +156 -0
  134. evalscope/benchmarks/frames/__init__.py +0 -0
  135. evalscope/benchmarks/frames/frames_adapter.py +175 -0
  136. evalscope/benchmarks/frames/utils.py +37 -0
  137. evalscope/benchmarks/general_arena/__init__.py +0 -0
  138. evalscope/benchmarks/general_arena/general_arena_adapter.py +454 -0
  139. evalscope/benchmarks/general_arena/utils.py +223 -0
  140. evalscope/benchmarks/general_mcq/__init__.py +0 -0
  141. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +58 -0
  142. evalscope/benchmarks/general_qa/general_qa_adapter.py +75 -107
  143. evalscope/benchmarks/gpqa/__init__.py +0 -0
  144. evalscope/benchmarks/gpqa/gpqa_adapter.py +90 -0
  145. evalscope/benchmarks/gpqa/prompt.py +88 -0
  146. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +77 -144
  147. evalscope/benchmarks/hallusion_bench/__init__.py +0 -0
  148. evalscope/benchmarks/hallusion_bench/hallusion_bench_adapter.py +159 -0
  149. evalscope/benchmarks/halu_eval/__init__.py +0 -0
  150. evalscope/benchmarks/halu_eval/halu_eval_adapter.py +128 -0
  151. evalscope/benchmarks/halu_eval/halu_eval_instructions.py +84 -0
  152. evalscope/benchmarks/healthbench/__init__.py +0 -0
  153. evalscope/benchmarks/healthbench/healthbench_adapter.py +282 -0
  154. evalscope/benchmarks/healthbench/utils.py +102 -0
  155. evalscope/benchmarks/hellaswag/hellaswag_adapter.py +36 -134
  156. evalscope/benchmarks/hle/__init__.py +0 -0
  157. evalscope/benchmarks/hle/hle_adapter.py +153 -0
  158. evalscope/benchmarks/humaneval/humaneval_adapter.py +80 -88
  159. evalscope/benchmarks/humaneval/utils.py +235 -0
  160. evalscope/benchmarks/ifeval/ifeval_adapter.py +71 -45
  161. evalscope/benchmarks/ifeval/instructions.py +112 -68
  162. evalscope/benchmarks/ifeval/instructions_registry.py +1 -1
  163. evalscope/benchmarks/ifeval/instructions_util.py +2 -3
  164. evalscope/benchmarks/ifeval/utils.py +6 -7
  165. evalscope/benchmarks/image_edit/__init__.py +0 -0
  166. evalscope/benchmarks/image_edit/gedit/__init__.py +0 -0
  167. evalscope/benchmarks/image_edit/gedit/gedit_adapter.py +138 -0
  168. evalscope/benchmarks/image_edit/gedit/utils.py +372 -0
  169. evalscope/benchmarks/image_edit/gedit/vie_prompts.py +406 -0
  170. evalscope/benchmarks/infovqa/__init__.py +0 -0
  171. evalscope/benchmarks/infovqa/infovqa_adapter.py +66 -0
  172. evalscope/benchmarks/iquiz/iquiz_adapter.py +30 -58
  173. evalscope/benchmarks/live_code_bench/__init__.py +0 -0
  174. evalscope/benchmarks/live_code_bench/evaluate_utils.py +195 -0
  175. evalscope/benchmarks/live_code_bench/extract_utils.py +70 -0
  176. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +150 -0
  177. evalscope/benchmarks/live_code_bench/load_utils.py +63 -0
  178. evalscope/benchmarks/live_code_bench/pass_k_utils.py +56 -0
  179. evalscope/benchmarks/live_code_bench/prompts.py +207 -0
  180. evalscope/benchmarks/live_code_bench/sandbox_evaluate_utils.py +220 -0
  181. evalscope/benchmarks/live_code_bench/testing_util.py +544 -0
  182. evalscope/benchmarks/logi_qa/__int__.py +0 -0
  183. evalscope/benchmarks/logi_qa/logi_qa_adapter.py +41 -0
  184. evalscope/benchmarks/maritime_bench/__init__.py +0 -0
  185. evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +56 -0
  186. evalscope/benchmarks/math_500/__init__.py +0 -0
  187. evalscope/benchmarks/math_500/math_500_adapter.py +55 -0
  188. evalscope/benchmarks/math_qa/__init__.py +0 -0
  189. evalscope/benchmarks/math_qa/math_qa_adapter.py +35 -0
  190. evalscope/benchmarks/math_verse/__init__.py +0 -0
  191. evalscope/benchmarks/math_verse/math_verse_adapter.py +105 -0
  192. evalscope/benchmarks/math_vision/__init__.py +0 -0
  193. evalscope/benchmarks/math_vision/math_vision_adapter.py +116 -0
  194. evalscope/benchmarks/math_vista/__init__.py +0 -0
  195. evalscope/benchmarks/math_vista/math_vista_adapter.py +114 -0
  196. evalscope/benchmarks/med_mcqa/__init__.py +0 -0
  197. evalscope/benchmarks/med_mcqa/med_mcqa_adapter.py +32 -0
  198. evalscope/benchmarks/minerva_math/__init__.py +0 -0
  199. evalscope/benchmarks/minerva_math/minerva_math_adapter.py +53 -0
  200. evalscope/benchmarks/mm_bench/__init__.py +0 -0
  201. evalscope/benchmarks/mm_bench/mm_bench_adapter.py +99 -0
  202. evalscope/benchmarks/mm_star/__init__.py +0 -0
  203. evalscope/benchmarks/mm_star/mm_star_adapter.py +73 -0
  204. evalscope/benchmarks/mmlu/mmlu_adapter.py +32 -210
  205. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +87 -103
  206. evalscope/benchmarks/mmlu_redux/__init__.py +0 -0
  207. evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +139 -0
  208. evalscope/benchmarks/mmmu/__init__.py +0 -0
  209. evalscope/benchmarks/mmmu/mmmu_adapter.py +159 -0
  210. evalscope/benchmarks/mmmu_pro/__init__.py +0 -0
  211. evalscope/benchmarks/mmmu_pro/mmmu_pro_adapter.py +124 -0
  212. evalscope/benchmarks/mri_mcqa/__init__.py +0 -0
  213. evalscope/benchmarks/mri_mcqa/mri_mcqa_adapter.py +34 -0
  214. evalscope/benchmarks/multi_if/__init__.py +0 -0
  215. evalscope/benchmarks/multi_if/ifeval.py +3354 -0
  216. evalscope/benchmarks/multi_if/metrics.py +120 -0
  217. evalscope/benchmarks/multi_if/multi_if_adapter.py +161 -0
  218. evalscope/benchmarks/music_trivia/__init__.py +0 -0
  219. evalscope/benchmarks/music_trivia/music_trivia_adapter.py +36 -0
  220. evalscope/benchmarks/musr/__init__.py +0 -0
  221. evalscope/benchmarks/musr/musr_adapter.py +43 -0
  222. evalscope/benchmarks/needle_haystack/__init__.py +0 -0
  223. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +389 -0
  224. evalscope/benchmarks/needle_haystack/utils.py +79 -0
  225. evalscope/benchmarks/ner/__init__.py +0 -0
  226. evalscope/benchmarks/ner/broad_twitter_corpus_adapter.py +52 -0
  227. evalscope/benchmarks/ner/conll2003_adapter.py +48 -0
  228. evalscope/benchmarks/ner/copious_adapter.py +85 -0
  229. evalscope/benchmarks/ner/cross_ner_adapter.py +120 -0
  230. evalscope/benchmarks/ner/cross_ner_entities/__init__.py +0 -0
  231. evalscope/benchmarks/ner/cross_ner_entities/ai.py +54 -0
  232. evalscope/benchmarks/ner/cross_ner_entities/literature.py +36 -0
  233. evalscope/benchmarks/ner/cross_ner_entities/music.py +39 -0
  234. evalscope/benchmarks/ner/cross_ner_entities/politics.py +37 -0
  235. evalscope/benchmarks/ner/cross_ner_entities/science.py +58 -0
  236. evalscope/benchmarks/ner/genia_ner_adapter.py +66 -0
  237. evalscope/benchmarks/ner/harvey_ner_adapter.py +58 -0
  238. evalscope/benchmarks/ner/mit_movie_trivia_adapter.py +74 -0
  239. evalscope/benchmarks/ner/mit_restaurant_adapter.py +66 -0
  240. evalscope/benchmarks/ner/ontonotes5_adapter.py +87 -0
  241. evalscope/benchmarks/ner/wnut2017_adapter.py +61 -0
  242. evalscope/benchmarks/ocr_bench/__init__.py +0 -0
  243. evalscope/benchmarks/ocr_bench/ocr_bench/__init__.py +0 -0
  244. evalscope/benchmarks/ocr_bench/ocr_bench/ocr_bench_adapter.py +101 -0
  245. evalscope/benchmarks/ocr_bench/ocr_bench_v2/IoUscore_metric.py +87 -0
  246. evalscope/benchmarks/ocr_bench/ocr_bench_v2/TEDS_metric.py +963 -0
  247. evalscope/benchmarks/ocr_bench/ocr_bench_v2/__init__.py +0 -0
  248. evalscope/benchmarks/ocr_bench/ocr_bench_v2/ocr_bench_v2_adapter.py +161 -0
  249. evalscope/benchmarks/ocr_bench/ocr_bench_v2/page_ocr_metric.py +50 -0
  250. evalscope/benchmarks/ocr_bench/ocr_bench_v2/parallel.py +46 -0
  251. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/__init__.py +0 -0
  252. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/readme.txt +26 -0
  253. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py +537 -0
  254. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_eval/script.py +481 -0
  255. evalscope/benchmarks/ocr_bench/ocr_bench_v2/spotting_metric.py +179 -0
  256. evalscope/benchmarks/ocr_bench/ocr_bench_v2/utils.py +433 -0
  257. evalscope/benchmarks/ocr_bench/ocr_bench_v2/vqa_metric.py +254 -0
  258. evalscope/benchmarks/olympiad_bench/__init__.py +0 -0
  259. evalscope/benchmarks/olympiad_bench/olympiad_bench_adapter.py +163 -0
  260. evalscope/benchmarks/olympiad_bench/utils.py +565 -0
  261. evalscope/benchmarks/omni_bench/__init__.py +0 -0
  262. evalscope/benchmarks/omni_bench/omni_bench_adapter.py +86 -0
  263. evalscope/benchmarks/omnidoc_bench/__init__.py +0 -0
  264. evalscope/benchmarks/omnidoc_bench/end2end_eval.py +349 -0
  265. evalscope/benchmarks/omnidoc_bench/metrics.py +547 -0
  266. evalscope/benchmarks/omnidoc_bench/omnidoc_bench_adapter.py +135 -0
  267. evalscope/benchmarks/omnidoc_bench/utils.py +1937 -0
  268. evalscope/benchmarks/piqa/__init__.py +0 -0
  269. evalscope/benchmarks/piqa/piqa_adapter.py +32 -0
  270. evalscope/benchmarks/poly_math/__init__.py +0 -0
  271. evalscope/benchmarks/poly_math/poly_math_adapter.py +132 -0
  272. evalscope/benchmarks/poly_math/utils/instruction.py +105 -0
  273. evalscope/benchmarks/pope/__init__.py +0 -0
  274. evalscope/benchmarks/pope/pope_adapter.py +112 -0
  275. evalscope/benchmarks/process_bench/__init__.py +0 -0
  276. evalscope/benchmarks/process_bench/process_bench_adapter.py +171 -0
  277. evalscope/benchmarks/pumed_qa/__init__.py +0 -0
  278. evalscope/benchmarks/pumed_qa/pubmed_qa_adapter.py +175 -0
  279. evalscope/benchmarks/qasc/__init__.py +0 -0
  280. evalscope/benchmarks/qasc/qasc_adapter.py +35 -0
  281. evalscope/benchmarks/race/race_adapter.py +33 -120
  282. evalscope/benchmarks/real_world_qa/__init__.py +0 -0
  283. evalscope/benchmarks/real_world_qa/real_world_qa_adapter.py +64 -0
  284. evalscope/benchmarks/sciq/__init__.py +0 -0
  285. evalscope/benchmarks/sciq/sciq_adapter.py +36 -0
  286. evalscope/benchmarks/seed_bench_2_plus/__init__.py +0 -0
  287. evalscope/benchmarks/seed_bench_2_plus/seed_bench_2_plus_adapter.py +72 -0
  288. evalscope/benchmarks/simple_qa/__init__.py +0 -0
  289. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +169 -0
  290. evalscope/benchmarks/simple_vqa/__init__.py +0 -0
  291. evalscope/benchmarks/simple_vqa/simple_vqa_adapter.py +169 -0
  292. evalscope/benchmarks/siqa/__init__.py +0 -0
  293. evalscope/benchmarks/siqa/siqa_adapter.py +39 -0
  294. evalscope/benchmarks/super_gpqa/__init__.py +0 -0
  295. evalscope/benchmarks/super_gpqa/prompt.py +88 -0
  296. evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py +165 -0
  297. evalscope/benchmarks/super_gpqa/utils.py +86 -0
  298. evalscope/benchmarks/tau_bench/__init__.py +0 -0
  299. evalscope/benchmarks/tau_bench/tau2_bench/__init__.py +0 -0
  300. evalscope/benchmarks/tau_bench/tau2_bench/generation.py +158 -0
  301. evalscope/benchmarks/tau_bench/tau2_bench/tau2_bench_adapter.py +146 -0
  302. evalscope/benchmarks/tau_bench/tau_bench/__init__.py +0 -0
  303. evalscope/benchmarks/tau_bench/tau_bench/generation.py +147 -0
  304. evalscope/benchmarks/tau_bench/tau_bench/tau_bench_adapter.py +168 -0
  305. evalscope/benchmarks/text2image/__init__.py +0 -0
  306. evalscope/benchmarks/text2image/evalmuse_adapter.py +78 -0
  307. evalscope/benchmarks/text2image/genai_bench_adapter.py +53 -0
  308. evalscope/benchmarks/text2image/general_t2i_adapter.py +42 -0
  309. evalscope/benchmarks/text2image/hpdv2_adapter.py +52 -0
  310. evalscope/benchmarks/text2image/tifa_adapter.py +27 -0
  311. evalscope/benchmarks/tool_bench/__init__.py +0 -0
  312. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +102 -0
  313. evalscope/benchmarks/tool_bench/utils.py +203 -0
  314. evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +56 -118
  315. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +70 -270
  316. evalscope/benchmarks/visu_logic/__init__.py +0 -0
  317. evalscope/benchmarks/visu_logic/visu_logic_adapter.py +75 -0
  318. evalscope/benchmarks/winogrande/__init__.py +0 -0
  319. evalscope/benchmarks/winogrande/winogrande_adapter.py +34 -0
  320. evalscope/benchmarks/wmt/__init__.py +0 -0
  321. evalscope/benchmarks/wmt/wmt24_adapter.py +294 -0
  322. evalscope/benchmarks/zerobench/__init__.py +0 -0
  323. evalscope/benchmarks/zerobench/zerobench_adapter.py +64 -0
  324. evalscope/cli/cli.py +2 -0
  325. evalscope/cli/start_app.py +12 -2
  326. evalscope/cli/start_eval.py +4 -3
  327. evalscope/cli/start_perf.py +10 -2
  328. evalscope/cli/start_server.py +6 -3
  329. evalscope/collections/__init__.py +27 -3
  330. evalscope/collections/sampler.py +12 -11
  331. evalscope/collections/schema.py +13 -12
  332. evalscope/config.py +218 -147
  333. evalscope/constants.py +78 -82
  334. evalscope/evaluator/__init__.py +1 -1
  335. evalscope/evaluator/evaluator.py +334 -318
  336. evalscope/filters/__init__.py +2 -0
  337. evalscope/filters/extraction.py +126 -0
  338. evalscope/filters/selection.py +57 -0
  339. evalscope/metrics/__init__.py +59 -3
  340. evalscope/metrics/bert_score/__init__.py +0 -0
  341. evalscope/metrics/bert_score/scorer.py +338 -0
  342. evalscope/metrics/bert_score/utils.py +697 -0
  343. evalscope/metrics/bundled_rouge_score/rouge_scorer.py +20 -15
  344. evalscope/metrics/llm_judge.py +211 -0
  345. evalscope/metrics/math_parser.py +545 -0
  346. evalscope/metrics/metric.py +611 -0
  347. evalscope/metrics/metrics.py +112 -23
  348. evalscope/metrics/rouge_metric.py +11 -13
  349. evalscope/metrics/t2v_metrics/__init__.py +0 -0
  350. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  351. evalscope/metrics/t2v_metrics/constants.py +12 -0
  352. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  353. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  354. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  355. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  356. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  357. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +134 -0
  358. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +282 -0
  359. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +115 -0
  360. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +87 -0
  361. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +86 -0
  362. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  363. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  364. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +85 -0
  365. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +99 -0
  366. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +176 -0
  367. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  368. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +82 -0
  369. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +74 -0
  370. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  371. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  372. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  373. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  374. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  375. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +306 -0
  376. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  377. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +84 -0
  378. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  379. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +223 -0
  380. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +153 -0
  381. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  382. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  383. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  384. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +24 -0
  385. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +190 -0
  386. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +100 -0
  387. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +313 -0
  388. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  389. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  390. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +192 -0
  391. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +320 -0
  392. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  393. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  394. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  395. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  396. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  397. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  398. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  399. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  400. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  401. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  402. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  403. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  404. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  405. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  406. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  407. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  408. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  409. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  410. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  411. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  412. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  413. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  414. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  415. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +212 -0
  416. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  417. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1111 -0
  418. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  419. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  420. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  421. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +457 -0
  422. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +370 -0
  423. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +765 -0
  424. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +274 -0
  425. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +896 -0
  426. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1876 -0
  427. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +83 -0
  428. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +58 -0
  429. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  430. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  431. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  432. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +187 -0
  433. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +179 -0
  434. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +115 -0
  435. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  436. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +348 -0
  437. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +870 -0
  438. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +273 -0
  439. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +514 -0
  440. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1291 -0
  441. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +476 -0
  442. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +35 -0
  443. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  444. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  445. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +393 -0
  446. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +129 -0
  447. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +18 -0
  448. evalscope/metrics/t2v_metrics/score.py +78 -0
  449. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  450. evalscope/models/__init__.py +23 -13
  451. evalscope/models/image_edit_model.py +125 -0
  452. evalscope/models/mockllm.py +65 -0
  453. evalscope/models/model_apis.py +69 -0
  454. evalscope/models/modelscope.py +455 -0
  455. evalscope/models/openai_compatible.py +144 -0
  456. evalscope/models/text2image_model.py +124 -0
  457. evalscope/models/utils/openai.py +708 -0
  458. evalscope/perf/__init__.py +0 -1
  459. evalscope/perf/arguments.py +103 -69
  460. evalscope/perf/benchmark.py +114 -163
  461. evalscope/perf/http_client.py +59 -89
  462. evalscope/perf/main.py +91 -18
  463. evalscope/perf/plugin/__init__.py +3 -2
  464. evalscope/perf/plugin/api/__init__.py +4 -3
  465. evalscope/perf/plugin/api/base.py +27 -7
  466. evalscope/perf/plugin/api/custom_api.py +170 -57
  467. evalscope/perf/plugin/api/dashscope_api.py +4 -10
  468. evalscope/perf/plugin/api/default_api.py +214 -0
  469. evalscope/perf/plugin/api/openai_api.py +120 -41
  470. evalscope/perf/plugin/datasets/__init__.py +10 -6
  471. evalscope/perf/plugin/datasets/base.py +43 -1
  472. evalscope/perf/plugin/datasets/custom.py +22 -3
  473. evalscope/perf/plugin/datasets/flickr8k.py +5 -27
  474. evalscope/perf/plugin/datasets/kontext_bench.py +28 -0
  475. evalscope/perf/plugin/datasets/line_by_line.py +7 -3
  476. evalscope/perf/plugin/datasets/longalpaca.py +7 -3
  477. evalscope/perf/plugin/datasets/openqa.py +13 -14
  478. evalscope/perf/plugin/datasets/random_dataset.py +67 -0
  479. evalscope/perf/plugin/datasets/random_vl_dataset.py +80 -0
  480. evalscope/perf/plugin/datasets/speed_benchmark.py +11 -0
  481. evalscope/perf/plugin/registry.py +36 -16
  482. evalscope/perf/utils/analysis_result.py +24 -23
  483. evalscope/perf/utils/benchmark_util.py +95 -55
  484. evalscope/perf/utils/db_util.py +115 -78
  485. evalscope/perf/utils/local_server.py +12 -47
  486. evalscope/perf/utils/log_utils.py +63 -0
  487. evalscope/perf/utils/rich_display.py +192 -0
  488. evalscope/report/__init__.py +46 -3
  489. evalscope/report/combinator.py +143 -32
  490. evalscope/report/generator.py +74 -34
  491. evalscope/report/report.py +238 -0
  492. evalscope/run.py +71 -46
  493. evalscope/summarizer.py +5 -5
  494. evalscope/third_party/longbench_write/infer.py +1 -1
  495. evalscope/third_party/thinkbench/__init__.py +3 -0
  496. evalscope/third_party/thinkbench/eval.py +441 -0
  497. evalscope/third_party/thinkbench/infer.py +130 -0
  498. evalscope/third_party/thinkbench/resources/critique_template.txt +17 -0
  499. evalscope/third_party/thinkbench/resources/reformat_template.txt +31 -0
  500. evalscope/third_party/thinkbench/tools/__init__.py +0 -0
  501. evalscope/third_party/thinkbench/tools/llm.py +48 -0
  502. evalscope/third_party/thinkbench/tools/utils.py +13 -0
  503. evalscope/third_party/toolbench_static/llm/swift_infer.py +46 -20
  504. evalscope/third_party/toolbench_static/toolbench_static.py +2 -1
  505. evalscope/utils/__init__.py +82 -2
  506. evalscope/utils/argument_utils.py +64 -0
  507. evalscope/utils/chat_service.py +8 -6
  508. evalscope/utils/deprecation_utils.py +53 -0
  509. evalscope/utils/function_utils.py +266 -0
  510. evalscope/utils/import_utils.py +154 -0
  511. evalscope/utils/io_utils.py +336 -8
  512. evalscope/utils/json_schema.py +231 -0
  513. evalscope/utils/logger.py +121 -31
  514. evalscope/utils/model_utils.py +57 -1
  515. evalscope/utils/multi_choices.py +303 -0
  516. evalscope/utils/ner.py +377 -0
  517. evalscope/utils/url_utils.py +65 -0
  518. evalscope/version.py +2 -2
  519. evalscope-1.2.0.dist-info/METADATA +553 -0
  520. evalscope-1.2.0.dist-info/RECORD +628 -0
  521. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/WHEEL +1 -1
  522. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/top_level.txt +0 -1
  523. evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -46
  524. evalscope/benchmarks/arc/ai2_arc.py +0 -151
  525. evalscope/benchmarks/benchmark.py +0 -76
  526. evalscope/benchmarks/ceval/ceval_exam.py +0 -146
  527. evalscope/benchmarks/ceval/samples.jsonl +0 -1
  528. evalscope/benchmarks/cmmlu/cmmlu.py +0 -161
  529. evalscope/benchmarks/cmmlu/samples.jsonl +0 -5
  530. evalscope/benchmarks/competition_math/competition_math.py +0 -79
  531. evalscope/benchmarks/data_adapter.py +0 -291
  532. evalscope/benchmarks/gsm8k/gsm8k.py +0 -121
  533. evalscope/benchmarks/hellaswag/hellaswag.py +0 -112
  534. evalscope/benchmarks/humaneval/humaneval.py +0 -79
  535. evalscope/benchmarks/mmlu/mmlu.py +0 -160
  536. evalscope/benchmarks/mmlu/samples.jsonl +0 -5
  537. evalscope/benchmarks/race/race.py +0 -104
  538. evalscope/benchmarks/race/samples.jsonl +0 -5
  539. evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -89
  540. evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -163
  541. evalscope/collections/evaluator.py +0 -198
  542. evalscope/evaluator/rating_eval.py +0 -157
  543. evalscope/evaluator/reviewer/__init__.py +0 -1
  544. evalscope/evaluator/reviewer/auto_reviewer.py +0 -391
  545. evalscope/metrics/code_metric.py +0 -98
  546. evalscope/metrics/named_metrics.py +0 -17
  547. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  548. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  549. evalscope/models/base_adapter.py +0 -52
  550. evalscope/models/chat_adapter.py +0 -138
  551. evalscope/models/choice_adapter.py +0 -211
  552. evalscope/models/custom/__init__.py +0 -3
  553. evalscope/models/custom/custom_model.py +0 -53
  554. evalscope/models/custom/dummy_model.py +0 -63
  555. evalscope/models/custom_adapter.py +0 -67
  556. evalscope/models/local_model.py +0 -74
  557. evalscope/models/model.py +0 -229
  558. evalscope/models/server_adapter.py +0 -111
  559. evalscope/registry/__init__.py +0 -1
  560. evalscope/registry/config/cfg_arena.yaml +0 -77
  561. evalscope/registry/config/cfg_arena_zhihu.yaml +0 -63
  562. evalscope/registry/config/cfg_pairwise_baseline.yaml +0 -83
  563. evalscope/registry/config/cfg_single.yaml +0 -78
  564. evalscope/registry/data/prompt_template/lmsys_v2.jsonl +0 -8
  565. evalscope/registry/data/prompt_template/prompt_templates.jsonl +0 -8
  566. evalscope/registry/data/qa_browser/battle.jsonl +0 -634
  567. evalscope/registry/data/qa_browser/category_mapping.yaml +0 -10
  568. evalscope/registry/data/question.jsonl +0 -80
  569. evalscope/registry/tasks/arc.yaml +0 -28
  570. evalscope/registry/tasks/bbh.yaml +0 -26
  571. evalscope/registry/tasks/bbh_mini.yaml +0 -26
  572. evalscope/registry/tasks/ceval.yaml +0 -27
  573. evalscope/registry/tasks/ceval_mini.yaml +0 -26
  574. evalscope/registry/tasks/cmmlu.yaml +0 -27
  575. evalscope/registry/tasks/eval_qwen-7b-chat_v100.yaml +0 -28
  576. evalscope/registry/tasks/general_qa.yaml +0 -27
  577. evalscope/registry/tasks/gsm8k.yaml +0 -29
  578. evalscope/registry/tasks/mmlu.yaml +0 -29
  579. evalscope/registry/tasks/mmlu_mini.yaml +0 -27
  580. evalscope/report/app.py +0 -506
  581. evalscope/report/utils.py +0 -133
  582. evalscope/run_arena.py +0 -202
  583. evalscope/utils/arena_utils.py +0 -217
  584. evalscope/utils/completion_parsers.py +0 -82
  585. evalscope/utils/utils.py +0 -301
  586. evalscope-0.10.0.dist-info/METADATA +0 -565
  587. evalscope-0.10.0.dist-info/RECORD +0 -286
  588. tests/__init__.py +0 -1
  589. tests/cli/__init__.py +0 -1
  590. tests/cli/test_collection.py +0 -57
  591. tests/cli/test_run.py +0 -165
  592. tests/perf/__init__.py +0 -1
  593. tests/perf/test_perf.py +0 -101
  594. tests/rag/test_clip_benchmark.py +0 -85
  595. tests/rag/test_mteb.py +0 -138
  596. tests/rag/test_ragas.py +0 -120
  597. tests/swift/__init__.py +0 -1
  598. tests/swift/test_run_swift_eval.py +0 -145
  599. tests/swift/test_run_swift_vlm_eval.py +0 -127
  600. tests/swift/test_run_swift_vlm_jugde_eval.py +0 -156
  601. tests/test_run_all.py +0 -12
  602. tests/vlm/__init__.py +0 -1
  603. tests/vlm/test_vlmeval.py +0 -60
  604. {tests/rag → evalscope/api}/__init__.py +0 -0
  605. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info}/entry_points.txt +0 -0
  606. {evalscope-0.10.0.dist-info → evalscope-1.2.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,202 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import torch
9
+ import torch.nn.functional as F
10
+ import warnings
11
+ from torch import nn
12
+
13
+ from ...common.registry import registry
14
+ from ..med import XBertEncoder
15
+ from ..vit import VisionTransformerEncoder
16
+ from .blip import BlipBase
17
+ from .blip_outputs import BlipOutputFeatures
18
+
19
+
20
+ @registry.register_model('blip_feature_extractor')
21
+ class BlipFeatureExtractor(BlipBase):
22
+ """
23
+ Class for BLIP feature extractor.
24
+
25
+ Supported model types:
26
+ - base: BLIP base model with pre-trained weights from capfilt by BLIP large model.
27
+
28
+ Usage:
29
+ >>> from lavis.models import load_model
30
+ >>> model = load_model("blip_feature_extractor", "base")
31
+ """
32
+
33
+ PRETRAINED_MODEL_CONFIG_DICT = {
34
+ 'base': 'configs/models/blip_feature_extractor_base.yaml',
35
+ # "large": "configs/models/blip_feature_extractor_large.yaml",
36
+ }
37
+
38
+ def __init__(self, image_encoder, text_encoder, embed_dim, max_txt_len=40):
39
+ super().__init__()
40
+
41
+ self.tokenizer = self.init_tokenizer()
42
+
43
+ self.visual_encoder = image_encoder
44
+ self.text_encoder = text_encoder
45
+
46
+ # creating projection layers for ITC
47
+ text_width = text_encoder.config.hidden_size
48
+ vision_width = image_encoder.vision_width
49
+
50
+ self.vision_proj = nn.Linear(vision_width, embed_dim)
51
+ self.text_proj = nn.Linear(text_width, embed_dim)
52
+
53
+ self.max_txt_len = max_txt_len
54
+
55
+ self.temp = nn.Parameter(0.07 * torch.ones([]))
56
+
57
+ @torch.no_grad()
58
+ def extract_features(self, samples, mode='multimodal'):
59
+ """
60
+ Extract features for multimodal or unimodal samples.
61
+
62
+ Args:
63
+ samples (dict): A dictionary of samples, containing the following keys:
64
+ - image (torch.Tensor): A tensor of shape (B, C, H, W) containing the image.
65
+ Raw images should be preprocessed before being passed to feature extractor.
66
+ - text_input (list): A list of strings containing the text, length B.
67
+ mode (str): The mode of feature extraction. Can be either "multimodal", "text" or "image".
68
+ If "multimodal", return image features and multimodal features;
69
+ if "text", return text features;
70
+ if "image", return image features.
71
+ Default: "multimodal".
72
+
73
+ Returns:
74
+ BlipOutputFeatures: A BlipOutputFeatures object containing the features.
75
+ See lavis/models/blip_models/blip_outputs.py for more details.
76
+
77
+ Examples:
78
+ ```python
79
+ >>> from PIL import Image
80
+ >>> from lavis.models import load_model_and_preprocess
81
+ >>> raw_image = Image.open("docs/data/merlion.png").convert("RGB")
82
+ >>> caption = "a large fountain spewing water into the air"
83
+ >>> model, vis_processors, txt_processors = load_model_and_preprocess("blip_feature_extractor", is_eval=True)
84
+ >>> image = vis_processors["eval"](raw_image).unsqueeze(0)
85
+ >>> text_input = txt_processors["eval"](caption)
86
+
87
+ >>> sample = {"image": image, "text_input": [text_input]}
88
+
89
+ >>> features_multimodal = model.extract_features(sample)
90
+ >>> features_multimodal.keys()
91
+ odict_keys(['image_embeds', 'multimodal_embeds'])
92
+ >>> features_multimodal.image_embeds.shape
93
+ torch.Size([1, 197, 768])
94
+ >>> features_multimodal.multimodal_embeds.shape
95
+ torch.Size([1, 12, 768])
96
+
97
+ >>> features_text = model.extract_features(sample, mode="text")
98
+ >>> features_text.keys()
99
+ odict_keys(['text_embeds', 'text_features'])
100
+ >>> features_text.text_embeds.shape
101
+ torch.Size([1, 12, 768])
102
+ >>> features_text.text_features.shape
103
+ torch.Size([1, 12, 256])
104
+
105
+ >>> features_image = model.extract_features(sample, mode="image")
106
+ >>> features_image.keys()
107
+ odict_keys(['image_embeds', 'image_features'])
108
+ >>> features_image.image_embeds.shape
109
+ torch.Size([1, 197, 768])
110
+ >>> features_image.image_features.shape
111
+ torch.Size([1, 197, 256])
112
+ ```
113
+ """
114
+ image = samples.get('image')
115
+ caption = samples.get('text_input')
116
+
117
+ # assert mode is one of "image", "text", "multimodal"
118
+ assert mode in [
119
+ 'image',
120
+ 'text',
121
+ 'multimodal',
122
+ ], "mode must be one of 'image', 'text', 'multimodal'"
123
+
124
+ # initalize output
125
+ image_embeds, text_embeds, multimodal_embeds = None, None, None
126
+ image_features, text_features = None, None
127
+
128
+ if mode == 'image':
129
+ assert (image is not None), "Image is not provided for mode 'image' or 'multimodal'"
130
+ # return image features
131
+ image_embeds = self.visual_encoder.forward_features(image)
132
+
133
+ image_features = self.vision_proj(image_embeds)
134
+ image_features = F.normalize(image_features, dim=-1)
135
+
136
+ elif mode == 'text':
137
+ assert (caption is not None), "text input is None for mode 'text' or 'multimodal'"
138
+
139
+ text = self.tokenizer(caption, return_tensors='pt', padding=True).to(self.device)
140
+
141
+ # return text features
142
+ text_output = self.text_encoder(
143
+ text.input_ids,
144
+ attention_mask=text.attention_mask,
145
+ return_dict=True,
146
+ mode='text',
147
+ )
148
+ text_embeds = text_output.last_hidden_state
149
+
150
+ text_features = self.text_proj(text_embeds)
151
+ text_features = F.normalize(text_features, dim=-1)
152
+
153
+ elif mode == 'multimodal':
154
+ # return multimodel features
155
+ image_embeds = self.visual_encoder.forward_features(image)
156
+ image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(self.device)
157
+
158
+ text = self.tokenizer(caption, return_tensors='pt', padding=True).to(self.device)
159
+ text.input_ids[:, 0] = self.tokenizer.enc_token_id
160
+
161
+ output = self.text_encoder(
162
+ text.input_ids,
163
+ attention_mask=text.attention_mask,
164
+ encoder_hidden_states=image_embeds,
165
+ encoder_attention_mask=image_atts,
166
+ return_dict=True,
167
+ )
168
+
169
+ multimodal_embeds = output.last_hidden_state
170
+
171
+ return BlipOutputFeatures(
172
+ image_embeds=image_embeds,
173
+ image_embeds_proj=image_features,
174
+ text_embeds=text_embeds,
175
+ text_embeds_proj=text_features,
176
+ multimodal_embeds=multimodal_embeds,
177
+ )
178
+
179
+ @classmethod
180
+ def from_config(cls, cfg=None):
181
+ # set from_pretrained=True to load weights for 'bert-base-uncased'
182
+ image_encoder = VisionTransformerEncoder.from_config(cfg)
183
+ text_encoder = XBertEncoder.from_config(cfg)
184
+
185
+ embed_dim = cfg.get('embed_dim', 256)
186
+ max_txt_len = cfg.get('max_txt_len', 30)
187
+
188
+ model = cls(
189
+ image_encoder=image_encoder,
190
+ text_encoder=text_encoder,
191
+ embed_dim=embed_dim,
192
+ max_txt_len=max_txt_len,
193
+ )
194
+
195
+ # load pre-trained weights
196
+ pretrain_path = cfg.get('pretrained', None)
197
+ if pretrain_path is not None:
198
+ msg = model.load_from_pretrained(url_or_filename=pretrain_path)
199
+ else:
200
+ warnings.warn('No pretrained weights are loaded.')
201
+
202
+ return model
@@ -0,0 +1,187 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import torch
9
+ import torch.nn.functional as F
10
+ from torch import nn
11
+
12
+ from ...common.registry import registry
13
+ from ..med import XBertEncoder
14
+ from ..vit import VisionTransformerEncoder
15
+ from .blip import BlipBase
16
+
17
+
18
+ @registry.register_model('blip_image_text_matching')
19
+ class BlipITM(BlipBase):
20
+ """
21
+ BLIP Image-Text Matching (ITM) model.
22
+
23
+ Supported model types:
24
+ - base: fine-tuned BLIP retrieval weights on COCO dataset (Karpathy split).
25
+ - large: fine-tuned BLIP retrieval weights on COCO dataset (Karpathy split).
26
+
27
+ Usage:
28
+ >>> from lavis.models import load_model
29
+ >>> model = load_model("blip_image_text_matching", "base")
30
+ >>> model = load_model("blip_image_text_matching", "large")
31
+ """
32
+
33
+ PRETRAINED_MODEL_CONFIG_DICT = {
34
+ 'base': 'configs/models/blip_itm_base.yaml',
35
+ 'large': 'configs/models/blip_itm_large.yaml',
36
+ }
37
+
38
+ def __init__(self, image_encoder, text_encoder, embed_dim=256, max_txt_len=35):
39
+ super().__init__()
40
+
41
+ self.tokenizer = self.init_tokenizer()
42
+
43
+ self.text_encoder = text_encoder
44
+
45
+ self.visual_encoder = image_encoder
46
+
47
+ self.max_txt_len = max_txt_len
48
+
49
+ # creating projection layers for ITC
50
+ text_width = text_encoder.config.hidden_size
51
+ vision_width = image_encoder.vision_width
52
+
53
+ self.vision_proj = nn.Linear(vision_width, embed_dim)
54
+ self.text_proj = nn.Linear(text_width, embed_dim)
55
+
56
+ self.itm_head = nn.Linear(text_width, 2)
57
+
58
+ def forward(self, samples, match_head='itm'):
59
+ image = samples['image']
60
+ caption = samples['text_input']
61
+
62
+ image_embeds = self.visual_encoder.forward_features(image)
63
+ image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(image.device)
64
+
65
+ text = self.tokenizer(
66
+ caption,
67
+ padding='longest',
68
+ truncation=True,
69
+ max_length=self.max_txt_len,
70
+ return_tensors='pt',
71
+ ).to(image.device)
72
+ if match_head == 'itm':
73
+ encoder_input_ids = text.input_ids.clone()
74
+ encoder_input_ids[:, 0] = self.tokenizer.enc_token_id # extra code
75
+ output = self.text_encoder(
76
+ encoder_input_ids,
77
+ attention_mask=text.attention_mask,
78
+ encoder_hidden_states=image_embeds,
79
+ encoder_attention_mask=image_atts,
80
+ return_dict=True,
81
+ )
82
+ itm_output = self.itm_head(output.last_hidden_state[:, 0, :])
83
+ return itm_output
84
+
85
+ elif match_head == 'itc':
86
+ text_output = self.text_encoder(
87
+ text.input_ids,
88
+ attention_mask=text.attention_mask,
89
+ return_dict=True,
90
+ mode='text',
91
+ )
92
+ image_feat = F.normalize(self.vision_proj(image_embeds[:, 0, :]), dim=-1)
93
+ text_feat = F.normalize(self.text_proj(text_output.last_hidden_state[:, 0, :]), dim=-1)
94
+
95
+ sim = image_feat @ text_feat.t()
96
+ return sim
97
+
98
+ def itm_rank(self, image_embeds, image_atts, encoder_input_ids, match_head='itm'):
99
+ # breakpoint()
100
+ encoder_input_ids = encoder_input_ids.clone()
101
+ encoder_input_ids = encoder_input_ids[:, 3:]
102
+ text_attention_mask = (encoder_input_ids != self.tokenizer.pad_token_id).long()
103
+
104
+ if match_head == 'itm':
105
+ # encoder_input_ids = encoder_input_ids.clone()
106
+ encoder_input_ids[:, 0] = self.tokenizer.enc_token_id
107
+ output = self.text_encoder(
108
+ encoder_input_ids,
109
+ attention_mask=text_attention_mask,
110
+ encoder_hidden_states=image_embeds,
111
+ encoder_attention_mask=image_atts,
112
+ return_dict=True,
113
+ )
114
+ # print(output.last_hidden_state.shape)
115
+ itm_output = self.itm_head(output.last_hidden_state[:, 0, :])
116
+ itm_output = F.softmax(itm_output, dim=1)[:, 1]
117
+ return itm_output #, mask, token_length
118
+
119
+ elif match_head == 'itc':
120
+ encoder_input_ids[:, 0] = self.tokenizer.cls_token_id
121
+ text_output = self.text_encoder(
122
+ encoder_input_ids, attention_mask=text_attention_mask, return_dict=True, mode='text'
123
+ )
124
+ image_feat = F.normalize(self.vision_proj(image_embeds[:, 0, :]), dim=-1)
125
+ text_feat = F.normalize(self.text_proj(text_output.last_hidden_state[:, 0, :]), dim=-1)
126
+
127
+ sim = image_feat @ text_feat.t()
128
+ return sim
129
+
130
+ @classmethod
131
+ def from_config(cls, cfg=None):
132
+ image_encoder = VisionTransformerEncoder.from_config(cfg)
133
+ text_encoder = XBertEncoder.from_config(cfg)
134
+
135
+ embed_dim = cfg.get('embed_dim', 256)
136
+ max_txt_len = cfg.get('max_txt_len', 35)
137
+
138
+ model = cls(
139
+ image_encoder=image_encoder,
140
+ text_encoder=text_encoder,
141
+ embed_dim=embed_dim,
142
+ max_txt_len=max_txt_len,
143
+ )
144
+
145
+ model.load_checkpoint_from_config(cfg)
146
+
147
+ return model
148
+
149
+
150
+ def compute_gradcam(model, visual_input, text_input, tokenized_text, block_num=6):
151
+ model.text_encoder.base_model.base_model.encoder.layer[block_num].crossattention.self.save_attention = True
152
+
153
+ output = model({'image': visual_input, 'text_input': text_input}, match_head='itm')
154
+ loss = output[:, 1].sum()
155
+
156
+ model.zero_grad()
157
+ loss.backward()
158
+ with torch.no_grad():
159
+ mask = tokenized_text.attention_mask.view(
160
+ tokenized_text.attention_mask.size(0), 1, -1, 1, 1
161
+ ) # (bsz,1,token_len, 1,1)
162
+ token_length = tokenized_text.attention_mask.sum(dim=-1) - 2
163
+ token_length = token_length.cpu()
164
+ # grads and cams [bsz, num_head, seq_len, image_patch]
165
+ grads = model.text_encoder.base_model.base_model.encoder.layer[block_num
166
+ ].crossattention.self.get_attn_gradients()
167
+ cams = model.text_encoder.base_model.base_model.encoder.layer[block_num].crossattention.self.get_attention_map()
168
+
169
+ # assume using vit with 576 num image patch
170
+ cams = cams[:, :, :, 1:].reshape(visual_input.size(0), 12, -1, 24, 24) * mask
171
+ grads = (grads[:, :, :, 1:].clamp(0).reshape(visual_input.size(0), 12, -1, 24, 24) * mask)
172
+
173
+ gradcams = cams * grads
174
+ gradcam_list = []
175
+
176
+ for ind in range(visual_input.size(0)):
177
+ token_length_ = token_length[ind]
178
+ gradcam = gradcams[ind].mean(0).cpu().detach()
179
+ # [enc token gradcam, average gradcam across token, gradcam for individual token]
180
+ gradcam = torch.cat((
181
+ gradcam[0:1, :],
182
+ gradcam[1:token_length_ + 1, :].sum(dim=0, keepdim=True) / token_length_,
183
+ gradcam[1:, :],
184
+ ))
185
+ gradcam_list.append(gradcam)
186
+
187
+ return gradcam_list, output
@@ -0,0 +1,179 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import os
9
+ import torch
10
+ import torch.nn.functional as F
11
+ from torch import nn
12
+ from transformers import BertConfig
13
+
14
+ from ...common.dist_utils import download_cached_file
15
+ from ...common.registry import registry
16
+ from ...common.utils import get_abs_path, is_url
17
+ from ..base_model import MomentumDistilationMixin
18
+ from ..vit import VisionTransformerEncoder, interpolate_pos_embed
19
+ from .blip import BlipBase
20
+ from .blip_outputs import BlipIntermediateOutput, BlipOutput
21
+ from .nlvr_encoder import BertModel
22
+
23
+
24
+ @registry.register_model('blip_nlvr')
25
+ class BlipNLVR(BlipBase, MomentumDistilationMixin):
26
+ """
27
+ Class for BLIP NLVR model.
28
+
29
+ Supported model types:
30
+ - base: model with pre-trained BLIP weights, used as initialization for fine-tuning.
31
+ - nlvr: finetuned model on NLVR2 dataset.
32
+
33
+ Usage:
34
+ >>> from lavis.models import load_model
35
+ >>> model = load_model("blip_nlvr", "nlvr")
36
+ """
37
+
38
+ PRETRAINED_MODEL_CONFIG_DICT = {
39
+ 'nlvr': 'configs/models/blip_nlvr.yaml',
40
+ }
41
+
42
+ def __init__(self, image_encoder, text_encoder, num_classes):
43
+ super().__init__()
44
+
45
+ self.tokenizer = self.init_tokenizer()
46
+ self.visual_encoder = image_encoder
47
+ self.text_encoder = text_encoder
48
+
49
+ hidden_size = text_encoder.config.hidden_size
50
+ self.cls_head = nn.Sequential(
51
+ nn.Linear(hidden_size, hidden_size),
52
+ nn.ReLU(),
53
+ nn.Linear(hidden_size, num_classes),
54
+ )
55
+
56
+ def forward(self, samples, is_train=True):
57
+ """
58
+ Forward function for training and evaluation.
59
+
60
+ Args:
61
+ samples (dict): a dict of input samples, which contains the following keys:
62
+ - image0 (torch.Tensor): input image 0, shape (batch_size, 3, H, W), default H=384, W=384.
63
+ - image1 (torch.Tensor): input image 1, shape (batch_size, 3, H, W), default H=384, W=384.
64
+ - text_input (list): list of strings, each string is a natural language sentence.
65
+ - label (torch.LongTensor): ground truth label with shape (batch_size,).
66
+ is_train (bool): whether the model is in training mode.
67
+ If True, the model will return the loss;
68
+ If False, the model will return the prediction.
69
+
70
+ Examples:
71
+ >>> import torch
72
+ >>> from lavis.models import load_model
73
+ >>> model = load_model("blip_nlvr", "nlvr")
74
+ >>> samples = {
75
+ ... "image0": torch.randn(2, 3, 384, 384),
76
+ ... "image1": torch.randn(2, 3, 384, 384),
77
+ ... "text_input": ["there is a ferret in tall grass", "there are lips in one of the images"],
78
+ ... "label": torch.tensor([0, 1]),
79
+ ... }
80
+ >>> output = model(samples)
81
+ >>> output.keys()
82
+ odict_keys(['intermediate_output', 'loss'])
83
+ """
84
+ text = samples['text_input']
85
+ text = self.tokenizer(text, padding='longest', return_tensors='pt').to(self.device)
86
+ text.input_ids[:, 0] = self.tokenizer.enc_token_id
87
+
88
+ targets = samples['label']
89
+
90
+ image0 = samples['image0']
91
+ image1 = samples['image1']
92
+ images = torch.cat([image0, image1], dim=0)
93
+
94
+ image_embeds = self.visual_encoder.forward_features(images)
95
+ image_atts = torch.ones(image_embeds.size()[:-1], dtype=torch.long).to(self.device)
96
+ image0_embeds, image1_embeds = torch.split(image_embeds, targets.size(0))
97
+
98
+ encoder_output = self.text_encoder(
99
+ text.input_ids,
100
+ attention_mask=text.attention_mask,
101
+ encoder_hidden_states=[image0_embeds, image1_embeds],
102
+ encoder_attention_mask=[
103
+ image_atts[:image0_embeds.size(0)],
104
+ image_atts[image0_embeds.size(0):],
105
+ ],
106
+ return_dict=True,
107
+ )
108
+
109
+ prediction = self.cls_head(encoder_output.last_hidden_state[:, 0, :])
110
+
111
+ if is_train:
112
+ loss = F.cross_entropy(prediction, targets)
113
+ # return {"loss": loss}
114
+ return BlipOutput(
115
+ loss=loss,
116
+ intermediate_output=BlipIntermediateOutput(
117
+ image_embeds=torch.stack([image0_embeds, image1_embeds], dim=0),
118
+ encoder_output=encoder_output,
119
+ ),
120
+ )
121
+ else:
122
+ return {'predictions': prediction, 'targets': targets}
123
+
124
+ def predict(self, samples):
125
+ output = self.forward(samples, is_train=False)
126
+ return output
127
+
128
+ @classmethod
129
+ def from_config(cls, cfg=None):
130
+ image_encoder = VisionTransformerEncoder.from_config(cfg)
131
+
132
+ # text encoder + multimodal encoder
133
+ bert_config = BertConfig.from_json_file(get_abs_path(cfg['med_config_path']))
134
+ text_encoder = BertModel(config=bert_config, add_pooling_layer=False)
135
+
136
+ num_classes = cfg.get('num_classes', 3)
137
+
138
+ assert num_classes > 1, 'Invalid number of classes provided, found {}'.format(num_classes)
139
+
140
+ model = cls(
141
+ image_encoder=image_encoder,
142
+ text_encoder=text_encoder,
143
+ num_classes=num_classes,
144
+ )
145
+
146
+ model.load_checkpoint_from_config(cfg)
147
+
148
+ return model
149
+
150
+ def load_from_pretrained(self, url_or_filename):
151
+ if is_url(url_or_filename):
152
+ cached_file = download_cached_file(url_or_filename, check_hash=False, progress=True)
153
+ checkpoint = torch.load(cached_file, map_location='cpu')
154
+ elif os.path.isfile(url_or_filename):
155
+ checkpoint = torch.load(url_or_filename, map_location='cpu')
156
+ else:
157
+ raise RuntimeError('checkpoint url or path is invalid')
158
+ state_dict = checkpoint['model']
159
+
160
+ state_dict['visual_encoder.pos_embed'] = interpolate_pos_embed(
161
+ state_dict['visual_encoder.pos_embed'], self.visual_encoder
162
+ )
163
+
164
+ for key in list(state_dict.keys()):
165
+ if 'crossattention.self.' in key:
166
+ new_key0 = key.replace('self', 'self0')
167
+ new_key1 = key.replace('self', 'self1')
168
+ state_dict[new_key0] = state_dict[key]
169
+ state_dict[new_key1] = state_dict[key]
170
+ elif 'crossattention.output.dense.' in key:
171
+ new_key0 = key.replace('dense', 'dense0')
172
+ new_key1 = key.replace('dense', 'dense1')
173
+ state_dict[new_key0] = state_dict[key]
174
+ state_dict[new_key1] = state_dict[key]
175
+
176
+ msg = self.load_state_dict(state_dict, strict=False)
177
+ print('load checkpoint from %s' % url_or_filename)
178
+ print(f'missing keys {msg.missing_keys}')
179
+ return msg
@@ -0,0 +1,115 @@
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import torch
9
+ from dataclasses import dataclass
10
+ from transformers.modeling_outputs import (
11
+ BaseModelOutputWithPoolingAndCrossAttentions,
12
+ CausalLMOutputWithCrossAttentions,
13
+ ModelOutput,
14
+ )
15
+ from typing import Optional
16
+
17
+
18
+ @dataclass
19
+ class BlipSimilarity(ModelOutput):
20
+ sim_i2t: torch.FloatTensor = None
21
+ sim_t2i: torch.FloatTensor = None
22
+
23
+ sim_i2t_m: Optional[torch.FloatTensor] = None
24
+ sim_t2i_m: Optional[torch.FloatTensor] = None
25
+
26
+ sim_i2t_targets: Optional[torch.FloatTensor] = None
27
+ sim_t2i_targets: Optional[torch.FloatTensor] = None
28
+
29
+
30
+ @dataclass
31
+ class BlipIntermediateOutput(ModelOutput):
32
+ """
33
+ Data class for intermediate outputs of BLIP models.
34
+
35
+ image_embeds (torch.FloatTensor): Image embeddings, shape (batch_size, num_patches, embed_dim).
36
+ text_embeds (torch.FloatTensor): Text embeddings, shape (batch_size, seq_len, embed_dim).
37
+
38
+ image_embeds_m (torch.FloatTensor): Image embeddings from momentum visual encoder, shape (batch_size, num_patches, embed_dim).
39
+ text_embeds_m (torch.FloatTensor): Text embeddings from momentum text encoder, shape (batch_size, seq_len, embed_dim).
40
+
41
+ encoder_output (BaseModelOutputWithPoolingAndCrossAttentions): output from the image-grounded text encoder.
42
+ encoder_output_neg (BaseModelOutputWithPoolingAndCrossAttentions): output from the image-grounded text encoder for negative pairs.
43
+
44
+ decoder_output (CausalLMOutputWithCrossAttentions): output from the image-grounded text decoder.
45
+ decoder_labels (torch.LongTensor): labels for the captioning loss.
46
+
47
+ itm_logits (torch.FloatTensor): logits for the image-text matching loss, shape (batch_size * 3, 2).
48
+ itm_labels (torch.LongTensor): labels for the image-text matching loss, shape (batch_size * 3,)
49
+
50
+ """
51
+
52
+ # uni-modal features
53
+ image_embeds: torch.FloatTensor = None
54
+ text_embeds: Optional[torch.FloatTensor] = None
55
+
56
+ image_embeds_m: Optional[torch.FloatTensor] = None
57
+ text_embeds_m: Optional[torch.FloatTensor] = None
58
+
59
+ # intermediate outputs of multimodal encoder
60
+ encoder_output: Optional[BaseModelOutputWithPoolingAndCrossAttentions] = None
61
+ encoder_output_neg: Optional[BaseModelOutputWithPoolingAndCrossAttentions] = None
62
+
63
+ itm_logits: Optional[torch.FloatTensor] = None
64
+ itm_labels: Optional[torch.LongTensor] = None
65
+
66
+ # intermediate outputs of multimodal decoder
67
+ decoder_output: Optional[CausalLMOutputWithCrossAttentions] = None
68
+ decoder_labels: Optional[torch.LongTensor] = None
69
+
70
+
71
+ @dataclass
72
+ class BlipOutput(ModelOutput):
73
+ # some finetuned models (e.g. BlipVQA) do not compute similarity, thus optional.
74
+ sims: Optional[BlipSimilarity] = None
75
+
76
+ intermediate_output: BlipIntermediateOutput = None
77
+
78
+ loss: Optional[torch.FloatTensor] = None
79
+
80
+ loss_itc: Optional[torch.FloatTensor] = None
81
+
82
+ loss_itm: Optional[torch.FloatTensor] = None
83
+
84
+ loss_lm: Optional[torch.FloatTensor] = None
85
+
86
+
87
+ @dataclass
88
+ class BlipOutputWithLogits(BlipOutput):
89
+ logits: torch.FloatTensor = None
90
+ logits_m: torch.FloatTensor = None
91
+
92
+
93
+ @dataclass
94
+ class BlipOutputFeatures(ModelOutput):
95
+ """
96
+ Data class of features from BlipFeatureExtractor.
97
+
98
+ Args:
99
+ image_embeds: (torch.FloatTensor) of shape (batch_size, num_patches+1, embed_dim), optional
100
+ image_features: (torch.FloatTensor) of shape (batch_size, num_patches+1, feature_dim), optional
101
+ text_embeds: (torch.FloatTensor) of shape (batch_size, sequence_length+1, embed_dim), optional
102
+ text_features: (torch.FloatTensor) of shape (batch_size, sequence_length+1, feature_dim), optional
103
+
104
+ The first embedding or feature is for the [CLS] token.
105
+
106
+ Features are obtained by projecting the corresponding embedding into a normalized low-dimensional space.
107
+ """
108
+
109
+ image_embeds: Optional[torch.FloatTensor] = None
110
+ image_embeds_proj: Optional[torch.FloatTensor] = None
111
+
112
+ text_embeds: Optional[torch.FloatTensor] = None
113
+ text_embeds_proj: Optional[torch.FloatTensor] = None
114
+
115
+ multimodal_embeds: Optional[torch.FloatTensor] = None