evalscope 0.5.4__tar.gz → 0.5.5rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (186) hide show
  1. {evalscope-0.5.4 → evalscope-0.5.5rc1}/PKG-INFO +3 -3
  2. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/tasks/eval_datasets.py +2 -2
  3. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/bundled_rouge_score/rouge_scorer.py +19 -0
  4. evalscope-0.5.5rc1/evalscope/version.py +4 -0
  5. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/PKG-INFO +3 -3
  6. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/requires.txt +2 -2
  7. evalscope-0.5.4/evalscope/version.py +0 -4
  8. {evalscope-0.5.4 → evalscope-0.5.5rc1}/README.md +0 -0
  9. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/__init__.py +0 -0
  10. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/__init__.py +0 -0
  11. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/base.py +0 -0
  12. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/__init__.py +0 -0
  13. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/api_meta_template.py +0 -0
  14. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/backend_manager.py +0 -0
  15. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/tasks/__init__.py +0 -0
  16. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/tasks/eval_api.py +0 -0
  17. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/vlm_eval_kit/__init__.py +0 -0
  18. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/vlm_eval_kit/backend_manager.py +0 -0
  19. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -0
  20. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/__init__.py +0 -0
  21. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/arc/__init__.py +0 -0
  22. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/arc/ai2_arc.py +0 -0
  23. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/arc/arc_adapter.py +0 -0
  24. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/__init__.py +0 -0
  25. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/bbh_adapter.py +0 -0
  26. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt +0 -0
  27. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt +0 -0
  28. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt +0 -0
  29. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/disambiguation_qa.txt +0 -0
  30. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/dyck_languages.txt +0 -0
  31. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/formal_fallacies.txt +0 -0
  32. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/geometric_shapes.txt +0 -0
  33. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/hyperbaton.txt +0 -0
  34. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/logical_deduction_five_objects.txt +0 -0
  35. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/logical_deduction_seven_objects.txt +0 -0
  36. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/logical_deduction_three_objects.txt +0 -0
  37. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/movie_recommendation.txt +0 -0
  38. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/multistep_arithmetic_two.txt +0 -0
  39. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/navigate.txt +0 -0
  40. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/object_counting.txt +0 -0
  41. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/penguins_in_a_table.txt +0 -0
  42. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/reasoning_about_colored_objects.txt +0 -0
  43. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/ruin_names.txt +0 -0
  44. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/salient_translation_error_detection.txt +0 -0
  45. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/snarks.txt +0 -0
  46. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/sports_understanding.txt +0 -0
  47. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/temporal_sequences.txt +0 -0
  48. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
  49. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
  50. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
  51. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt +0 -0
  52. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt +0 -0
  53. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/benchmark.py +0 -0
  54. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/ceval/__init__.py +0 -0
  55. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/ceval/ceval_adapter.py +0 -0
  56. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/ceval/ceval_exam.py +0 -0
  57. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/cmmlu/__init__.py +0 -0
  58. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/cmmlu/cmmlu.py +0 -0
  59. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/cmmlu/cmmlu_adapter.py +0 -0
  60. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/__init__.py +0 -0
  61. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/competition_math.py +0 -0
  62. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/competition_math_adapter.py +0 -0
  63. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/data_adapter.py +0 -0
  64. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/general_qa/__init__.py +0 -0
  65. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/general_qa/general_qa_adapter.py +0 -0
  66. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/gsm8k/__init__.py +0 -0
  67. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/gsm8k/gsm8k.py +0 -0
  68. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/gsm8k/gsm8k_adapter.py +0 -0
  69. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/hellaswag/__init__.py +0 -0
  70. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/hellaswag/hellaswag.py +0 -0
  71. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/hellaswag/hellaswag_adapter.py +0 -0
  72. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/humaneval/__init__.py +0 -0
  73. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/humaneval/humaneval.py +0 -0
  74. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/humaneval/humaneval_adapter.py +0 -0
  75. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/mmlu/__init__.py +0 -0
  76. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/mmlu/mmlu.py +0 -0
  77. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/mmlu/mmlu_adapter.py +0 -0
  78. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/race/__init__.py +0 -0
  79. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/race/race.py +0 -0
  80. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/race/race_adapter.py +0 -0
  81. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/trivia_qa/__init__.py +0 -0
  82. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -0
  83. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +0 -0
  84. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/__init__.py +0 -0
  85. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -0
  86. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +0 -0
  87. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cache.py +0 -0
  88. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/__init__.py +0 -0
  89. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/base.py +0 -0
  90. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/cli.py +0 -0
  91. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/start_perf.py +0 -0
  92. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/start_server.py +0 -0
  93. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/config.py +0 -0
  94. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/constants.py +0 -0
  95. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/__init__.py +0 -0
  96. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/evaluator.py +0 -0
  97. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/rating_eval.py +0 -0
  98. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/reviewer/__init__.py +0 -0
  99. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/reviewer/auto_reviewer.py +0 -0
  100. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/__init__.py +0 -0
  101. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/bundled_rouge_score/__init__.py +0 -0
  102. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/code_metric.py +0 -0
  103. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/math_accuracy.py +0 -0
  104. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/metrics.py +0 -0
  105. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/rouge_metric.py +0 -0
  106. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/__init__.py +0 -0
  107. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/api/__init__.py +0 -0
  108. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/api/openai_api.py +0 -0
  109. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/custom/__init__.py +0 -0
  110. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/custom/custom_model.py +0 -0
  111. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/dummy_chat_model.py +0 -0
  112. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/model.py +0 -0
  113. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/model_adapter.py +0 -0
  114. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/openai_model.py +0 -0
  115. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/template.py +0 -0
  116. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/__init__.py +0 -0
  117. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/_logging.py +0 -0
  118. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/api_plugin_base.py +0 -0
  119. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/custom_api.py +0 -0
  120. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/dashscope_api.py +0 -0
  121. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/dataset_plugin_base.py +0 -0
  122. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/__init__.py +0 -0
  123. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/line_by_line.py +0 -0
  124. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/longalpaca_12k.py +0 -0
  125. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/openqa.py +0 -0
  126. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/how_to_analysis_result.py +0 -0
  127. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/http_client.py +0 -0
  128. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/openai_api.py +0 -0
  129. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/plugin_registry.py +0 -0
  130. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/query_parameters.py +0 -0
  131. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/server_sent_event.py +0 -0
  132. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/preprocess/__init__.py +0 -0
  133. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/preprocess/tokenizers/__init__.py +0 -0
  134. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/preprocess/tokenizers/gpt2_tokenizer.py +0 -0
  135. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/__init__.py +0 -0
  136. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/arc.yaml +0 -0
  137. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/bbh.yaml +0 -0
  138. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/bbh_mini.yaml +0 -0
  139. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/ceval.yaml +0 -0
  140. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/ceval_mini.yaml +0 -0
  141. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/cmmlu.yaml +0 -0
  142. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/eval_qwen-7b-chat_v100.yaml +0 -0
  143. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/general_qa.yaml +0 -0
  144. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/gsm8k.yaml +0 -0
  145. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/mmlu.yaml +0 -0
  146. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/mmlu_mini.yaml +0 -0
  147. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/run.py +0 -0
  148. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/run_arena.py +0 -0
  149. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/run_ms.py +0 -0
  150. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/summarizer.py +0 -0
  151. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/__init__.py +0 -0
  152. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/__init__.py +0 -0
  153. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/eval.py +0 -0
  154. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/infer.py +0 -0
  155. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/longbench_write.py +0 -0
  156. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/__init__.py +0 -0
  157. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/judge.txt +0 -0
  158. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/longbench_write.jsonl +0 -0
  159. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/longbench_write_en.jsonl +0 -0
  160. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/longwrite_ruler.jsonl +0 -0
  161. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/tools/__init__.py +0 -0
  162. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/tools/data_etl.py +0 -0
  163. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/utils.py +0 -0
  164. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/__init__.py +0 -0
  165. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/eval.py +0 -0
  166. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/infer.py +0 -0
  167. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/llm/__init__.py +0 -0
  168. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/llm/swift_infer.py +0 -0
  169. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/toolbench_static.py +0 -0
  170. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/__init__.py +0 -0
  171. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/combine_reports.py +0 -0
  172. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/gen_mmlu_subject_mapping.py +0 -0
  173. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/rewrite_eval_results.py +0 -0
  174. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/__init__.py +0 -0
  175. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/arena_utils.py +0 -0
  176. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/completion_parsers.py +0 -0
  177. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/logger.py +0 -0
  178. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/task_cfg_parser.py +0 -0
  179. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/task_utils.py +0 -0
  180. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/utils.py +0 -0
  181. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/SOURCES.txt +0 -0
  182. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/dependency_links.txt +0 -0
  183. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/entry_points.txt +0 -0
  184. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/not-zip-safe +0 -0
  185. {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/top_level.txt +0 -0
  186. {evalscope-0.5.4 → evalscope-0.5.5rc1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: evalscope
3
- Version: 0.5.4
3
+ Version: 0.5.5rc1
4
4
  Summary: EvalScope: Lightweight LLMs Evaluation Framework
5
5
  Home-page: https://github.com/modelscope/evalscope
6
6
  Author: ModelScope team
@@ -48,7 +48,7 @@ Requires-Dist: transformers_stream_generator
48
48
  Requires-Dist: jieba
49
49
  Requires-Dist: rouge-chinese
50
50
  Provides-Extra: opencompass
51
- Requires-Dist: ms-opencompass>=0.1.0; extra == "opencompass"
51
+ Requires-Dist: ms-opencompass>=0.1.1; extra == "opencompass"
52
52
  Provides-Extra: vlmeval
53
53
  Requires-Dist: ms-vlmeval>=0.0.5; extra == "vlmeval"
54
54
  Provides-Extra: inner
@@ -111,7 +111,7 @@ Requires-Dist: transformers>=4.33; extra == "all"
111
111
  Requires-Dist: transformers_stream_generator; extra == "all"
112
112
  Requires-Dist: jieba; extra == "all"
113
113
  Requires-Dist: rouge-chinese; extra == "all"
114
- Requires-Dist: ms-opencompass>=0.1.0; extra == "all"
114
+ Requires-Dist: ms-opencompass>=0.1.1; extra == "all"
115
115
  Requires-Dist: ms-vlmeval>=0.0.5; extra == "all"
116
116
 
117
117
  English | [简体中文](README_zh.md)
@@ -7,7 +7,7 @@ with read_base():
7
7
  from opencompass.configs.datasets.agieval.agieval_gen_64afd3 import agieval_datasets
8
8
  from opencompass.configs.datasets.GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
9
9
  from opencompass.configs.datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
10
- from opencompass.configs.datasets.mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
10
+ from opencompass.configs.datasets.mbpp.mbpp_gen_830460 import mbpp_datasets
11
11
  from opencompass.configs.datasets.CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
12
12
  from opencompass.configs.datasets.CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
13
13
  from opencompass.configs.datasets.CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
@@ -45,7 +45,7 @@ with read_base():
45
45
  from opencompass.configs.datasets.piqa.piqa_gen_1194eb import piqa_datasets
46
46
  from opencompass.configs.datasets.siqa.siqa_gen_e78df3 import siqa_datasets
47
47
  from opencompass.configs.datasets.strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
48
- from opencompass.configs.datasets.winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
48
+ from opencompass.configs.datasets.winogrande.winogrande_gen_458220 import winogrande_datasets
49
49
  from opencompass.configs.datasets.obqa.obqa_gen_9069e4 import obqa_datasets
50
50
  from opencompass.configs.datasets.nq.nq_gen_c788f6 import nq_datasets
51
51
  from opencompass.configs.datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
@@ -31,6 +31,7 @@ In these examples settings.xml lists input files and formats.
31
31
  from __future__ import absolute_import, division, print_function
32
32
  import collections
33
33
  import re
34
+ import os
34
35
 
35
36
  import nltk
36
37
  import numpy as np
@@ -38,6 +39,24 @@ import six
38
39
  from absl import logging
39
40
  from rouge_score import scoring, tokenizers
40
41
  from six.moves import map, range
42
+ from evalscope.utils import get_logger
43
+
44
+ logger = get_logger()
45
+
46
+ # Deal with nltk punkt_tab.zip tokenizer file to avoid downloading issue
47
+ try:
48
+ nltk_dir = os.path.join(os.path.expanduser('~'), 'nltk_data/tokenizers')
49
+ os.makedirs(nltk_dir, exist_ok=True)
50
+ punkt_path = os.path.join(nltk_dir, 'punkt_tab.zip')
51
+ punkt_tab_url = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/open_data/nltk_data/punkt_tab.zip'
52
+
53
+ if not os.path.exists(punkt_path):
54
+ os.system(f'wget -P {nltk_dir} {punkt_tab_url}')
55
+ os.system(f'unzip {punkt_path} -d {nltk_dir}')
56
+ else:
57
+ logger.info(f'{punkt_path} already exists, skipping download')
58
+ except Exception as e:
59
+ logger.error(f'Try to download punkt_tab.zip for nltk failed: {e}')
41
60
 
42
61
 
43
62
  class RougeScorer(scoring.BaseScorer):
@@ -0,0 +1,4 @@
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+
3
+ __version__ = '0.5.5rc1'
4
+ __release_datetime__ = '2024-09-29 08:00:00'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: evalscope
3
- Version: 0.5.4
3
+ Version: 0.5.5rc1
4
4
  Summary: EvalScope: Lightweight LLMs Evaluation Framework
5
5
  Home-page: https://github.com/modelscope/evalscope
6
6
  Author: ModelScope team
@@ -48,7 +48,7 @@ Requires-Dist: transformers_stream_generator
48
48
  Requires-Dist: jieba
49
49
  Requires-Dist: rouge-chinese
50
50
  Provides-Extra: opencompass
51
- Requires-Dist: ms-opencompass>=0.1.0; extra == "opencompass"
51
+ Requires-Dist: ms-opencompass>=0.1.1; extra == "opencompass"
52
52
  Provides-Extra: vlmeval
53
53
  Requires-Dist: ms-vlmeval>=0.0.5; extra == "vlmeval"
54
54
  Provides-Extra: inner
@@ -111,7 +111,7 @@ Requires-Dist: transformers>=4.33; extra == "all"
111
111
  Requires-Dist: transformers_stream_generator; extra == "all"
112
112
  Requires-Dist: jieba; extra == "all"
113
113
  Requires-Dist: rouge-chinese; extra == "all"
114
- Requires-Dist: ms-opencompass>=0.1.0; extra == "all"
114
+ Requires-Dist: ms-opencompass>=0.1.1; extra == "all"
115
115
  Requires-Dist: ms-vlmeval>=0.0.5; extra == "all"
116
116
 
117
117
  English | [简体中文](README_zh.md)
@@ -64,7 +64,7 @@ transformers>=4.33
64
64
  transformers_stream_generator
65
65
  jieba
66
66
  rouge-chinese
67
- ms-opencompass>=0.1.0
67
+ ms-opencompass>=0.1.1
68
68
  ms-vlmeval>=0.0.5
69
69
 
70
70
  [inner]
@@ -95,7 +95,7 @@ transformers<4.43,>=4.33
95
95
  transformers_stream_generator
96
96
 
97
97
  [opencompass]
98
- ms-opencompass>=0.1.0
98
+ ms-opencompass>=0.1.1
99
99
 
100
100
  [vlmeval]
101
101
  ms-vlmeval>=0.0.5
@@ -1,4 +0,0 @@
1
- # Copyright (c) Alibaba, Inc. and its affiliates.
2
-
3
- __version__ = '0.5.4'
4
- __release_datetime__ = '2024-09-19 08:00:00'
File without changes
File without changes
File without changes