evalscope 0.5.4__tar.gz → 0.5.5rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of evalscope might be problematic. Click here for more details.
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/PKG-INFO +3 -3
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/tasks/eval_datasets.py +2 -2
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/bundled_rouge_score/rouge_scorer.py +19 -0
- evalscope-0.5.5rc1/evalscope/version.py +4 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/PKG-INFO +3 -3
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/requires.txt +2 -2
- evalscope-0.5.4/evalscope/version.py +0 -4
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/README.md +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/base.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/api_meta_template.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/backend_manager.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/tasks/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/opencompass/tasks/eval_api.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/vlm_eval_kit/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/vlm_eval_kit/backend_manager.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/backend/vlm_eval_kit/custom_dataset.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/arc/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/arc/ai2_arc.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/arc/arc_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/bbh_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/disambiguation_qa.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/dyck_languages.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/formal_fallacies.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/geometric_shapes.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/hyperbaton.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/logical_deduction_five_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/logical_deduction_seven_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/logical_deduction_three_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/movie_recommendation.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/multistep_arithmetic_two.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/navigate.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/object_counting.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/penguins_in_a_table.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/reasoning_about_colored_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/ruin_names.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/salient_translation_error_detection.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/snarks.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/sports_understanding.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/temporal_sequences.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_five_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_seven_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/tracking_shuffled_objects_three_objects.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/web_of_lies.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/benchmark.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/ceval/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/ceval/ceval_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/ceval/ceval_exam.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/cmmlu/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/cmmlu/cmmlu.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/cmmlu/cmmlu_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/competition_math.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/competition_math_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/data_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/general_qa/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/general_qa/general_qa_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/gsm8k/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/gsm8k/gsm8k.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/gsm8k/gsm8k_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/hellaswag/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/hellaswag/hellaswag.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/hellaswag/hellaswag_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/humaneval/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/humaneval/humaneval.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/humaneval/humaneval_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/mmlu/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/mmlu/mmlu.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/mmlu/mmlu_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/race/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/race/race.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/race/race_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/trivia_qa/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cache.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/base.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/cli.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/start_perf.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/cli/start_server.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/config.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/constants.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/evaluator.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/rating_eval.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/reviewer/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/evaluator/reviewer/auto_reviewer.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/bundled_rouge_score/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/code_metric.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/math_accuracy.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/metrics.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/rouge_metric.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/api/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/api/openai_api.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/custom/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/custom/custom_model.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/dummy_chat_model.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/model.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/model_adapter.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/openai_model.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/models/template.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/_logging.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/api_plugin_base.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/custom_api.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/dashscope_api.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/dataset_plugin_base.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/line_by_line.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/longalpaca_12k.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/datasets/openqa.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/how_to_analysis_result.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/http_client.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/openai_api.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/plugin_registry.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/query_parameters.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/perf/server_sent_event.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/preprocess/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/preprocess/tokenizers/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/preprocess/tokenizers/gpt2_tokenizer.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/arc.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/bbh.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/bbh_mini.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/ceval.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/ceval_mini.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/cmmlu.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/eval_qwen-7b-chat_v100.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/general_qa.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/gsm8k.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/mmlu.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/registry/tasks/mmlu_mini.yaml +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/run.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/run_arena.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/run_ms.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/summarizer.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/eval.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/infer.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/longbench_write.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/judge.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/longbench_write.jsonl +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/longbench_write_en.jsonl +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/longwrite_ruler.jsonl +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/tools/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/tools/data_etl.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/utils.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/eval.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/infer.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/llm/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/llm/swift_infer.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/toolbench_static.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/combine_reports.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/gen_mmlu_subject_mapping.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/tools/rewrite_eval_results.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/__init__.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/arena_utils.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/completion_parsers.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/logger.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/task_cfg_parser.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/task_utils.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/utils/utils.py +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/SOURCES.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/dependency_links.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/entry_points.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/not-zip-safe +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope.egg-info/top_level.txt +0 -0
- {evalscope-0.5.4 → evalscope-0.5.5rc1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: evalscope
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.5rc1
|
|
4
4
|
Summary: EvalScope: Lightweight LLMs Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/modelscope/evalscope
|
|
6
6
|
Author: ModelScope team
|
|
@@ -48,7 +48,7 @@ Requires-Dist: transformers_stream_generator
|
|
|
48
48
|
Requires-Dist: jieba
|
|
49
49
|
Requires-Dist: rouge-chinese
|
|
50
50
|
Provides-Extra: opencompass
|
|
51
|
-
Requires-Dist: ms-opencompass>=0.1.
|
|
51
|
+
Requires-Dist: ms-opencompass>=0.1.1; extra == "opencompass"
|
|
52
52
|
Provides-Extra: vlmeval
|
|
53
53
|
Requires-Dist: ms-vlmeval>=0.0.5; extra == "vlmeval"
|
|
54
54
|
Provides-Extra: inner
|
|
@@ -111,7 +111,7 @@ Requires-Dist: transformers>=4.33; extra == "all"
|
|
|
111
111
|
Requires-Dist: transformers_stream_generator; extra == "all"
|
|
112
112
|
Requires-Dist: jieba; extra == "all"
|
|
113
113
|
Requires-Dist: rouge-chinese; extra == "all"
|
|
114
|
-
Requires-Dist: ms-opencompass>=0.1.
|
|
114
|
+
Requires-Dist: ms-opencompass>=0.1.1; extra == "all"
|
|
115
115
|
Requires-Dist: ms-vlmeval>=0.0.5; extra == "all"
|
|
116
116
|
|
|
117
117
|
English | [简体中文](README_zh.md)
|
|
@@ -7,7 +7,7 @@ with read_base():
|
|
|
7
7
|
from opencompass.configs.datasets.agieval.agieval_gen_64afd3 import agieval_datasets
|
|
8
8
|
from opencompass.configs.datasets.GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
|
|
9
9
|
from opencompass.configs.datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
|
|
10
|
-
from opencompass.configs.datasets.mbpp.
|
|
10
|
+
from opencompass.configs.datasets.mbpp.mbpp_gen_830460 import mbpp_datasets
|
|
11
11
|
from opencompass.configs.datasets.CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
|
|
12
12
|
from opencompass.configs.datasets.CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
|
|
13
13
|
from opencompass.configs.datasets.CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
|
|
@@ -45,7 +45,7 @@ with read_base():
|
|
|
45
45
|
from opencompass.configs.datasets.piqa.piqa_gen_1194eb import piqa_datasets
|
|
46
46
|
from opencompass.configs.datasets.siqa.siqa_gen_e78df3 import siqa_datasets
|
|
47
47
|
from opencompass.configs.datasets.strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
|
|
48
|
-
from opencompass.configs.datasets.winogrande.
|
|
48
|
+
from opencompass.configs.datasets.winogrande.winogrande_gen_458220 import winogrande_datasets
|
|
49
49
|
from opencompass.configs.datasets.obqa.obqa_gen_9069e4 import obqa_datasets
|
|
50
50
|
from opencompass.configs.datasets.nq.nq_gen_c788f6 import nq_datasets
|
|
51
51
|
from opencompass.configs.datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/metrics/bundled_rouge_score/rouge_scorer.py
RENAMED
|
@@ -31,6 +31,7 @@ In these examples settings.xml lists input files and formats.
|
|
|
31
31
|
from __future__ import absolute_import, division, print_function
|
|
32
32
|
import collections
|
|
33
33
|
import re
|
|
34
|
+
import os
|
|
34
35
|
|
|
35
36
|
import nltk
|
|
36
37
|
import numpy as np
|
|
@@ -38,6 +39,24 @@ import six
|
|
|
38
39
|
from absl import logging
|
|
39
40
|
from rouge_score import scoring, tokenizers
|
|
40
41
|
from six.moves import map, range
|
|
42
|
+
from evalscope.utils import get_logger
|
|
43
|
+
|
|
44
|
+
logger = get_logger()
|
|
45
|
+
|
|
46
|
+
# Deal with nltk punkt_tab.zip tokenizer file to avoid downloading issue
|
|
47
|
+
try:
|
|
48
|
+
nltk_dir = os.path.join(os.path.expanduser('~'), 'nltk_data/tokenizers')
|
|
49
|
+
os.makedirs(nltk_dir, exist_ok=True)
|
|
50
|
+
punkt_path = os.path.join(nltk_dir, 'punkt_tab.zip')
|
|
51
|
+
punkt_tab_url = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/open_data/nltk_data/punkt_tab.zip'
|
|
52
|
+
|
|
53
|
+
if not os.path.exists(punkt_path):
|
|
54
|
+
os.system(f'wget -P {nltk_dir} {punkt_tab_url}')
|
|
55
|
+
os.system(f'unzip {punkt_path} -d {nltk_dir}')
|
|
56
|
+
else:
|
|
57
|
+
logger.info(f'{punkt_path} already exists, skipping download')
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.error(f'Try to download punkt_tab.zip for nltk failed: {e}')
|
|
41
60
|
|
|
42
61
|
|
|
43
62
|
class RougeScorer(scoring.BaseScorer):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: evalscope
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.5rc1
|
|
4
4
|
Summary: EvalScope: Lightweight LLMs Evaluation Framework
|
|
5
5
|
Home-page: https://github.com/modelscope/evalscope
|
|
6
6
|
Author: ModelScope team
|
|
@@ -48,7 +48,7 @@ Requires-Dist: transformers_stream_generator
|
|
|
48
48
|
Requires-Dist: jieba
|
|
49
49
|
Requires-Dist: rouge-chinese
|
|
50
50
|
Provides-Extra: opencompass
|
|
51
|
-
Requires-Dist: ms-opencompass>=0.1.
|
|
51
|
+
Requires-Dist: ms-opencompass>=0.1.1; extra == "opencompass"
|
|
52
52
|
Provides-Extra: vlmeval
|
|
53
53
|
Requires-Dist: ms-vlmeval>=0.0.5; extra == "vlmeval"
|
|
54
54
|
Provides-Extra: inner
|
|
@@ -111,7 +111,7 @@ Requires-Dist: transformers>=4.33; extra == "all"
|
|
|
111
111
|
Requires-Dist: transformers_stream_generator; extra == "all"
|
|
112
112
|
Requires-Dist: jieba; extra == "all"
|
|
113
113
|
Requires-Dist: rouge-chinese; extra == "all"
|
|
114
|
-
Requires-Dist: ms-opencompass>=0.1.
|
|
114
|
+
Requires-Dist: ms-opencompass>=0.1.1; extra == "all"
|
|
115
115
|
Requires-Dist: ms-vlmeval>=0.0.5; extra == "all"
|
|
116
116
|
|
|
117
117
|
English | [简体中文](README_zh.md)
|
|
@@ -64,7 +64,7 @@ transformers>=4.33
|
|
|
64
64
|
transformers_stream_generator
|
|
65
65
|
jieba
|
|
66
66
|
rouge-chinese
|
|
67
|
-
ms-opencompass>=0.1.
|
|
67
|
+
ms-opencompass>=0.1.1
|
|
68
68
|
ms-vlmeval>=0.0.5
|
|
69
69
|
|
|
70
70
|
[inner]
|
|
@@ -95,7 +95,7 @@ transformers<4.43,>=4.33
|
|
|
95
95
|
transformers_stream_generator
|
|
96
96
|
|
|
97
97
|
[opencompass]
|
|
98
|
-
ms-opencompass>=0.1.
|
|
98
|
+
ms-opencompass>=0.1.1
|
|
99
99
|
|
|
100
100
|
[vlmeval]
|
|
101
101
|
ms-vlmeval>=0.0.5
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/boolean_expressions.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/causal_judgement.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/date_understanding.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/disambiguation_qa.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/dyck_languages.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/formal_fallacies.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/geometric_shapes.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/movie_recommendation.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/object_counting.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/penguins_in_a_table.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/sports_understanding.txt
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/temporal_sequences.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/bbh/cot_prompts/word_sorting.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/competition_math/competition_math.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/general_qa/general_qa_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/longbench_write.py
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/__init__.py
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/resources/judge.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/tools/__init__.py
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/longbench_write/tools/data_etl.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/llm/__init__.py
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/llm/swift_infer.py
RENAMED
|
File without changes
|
{evalscope-0.5.4 → evalscope-0.5.5rc1}/evalscope/third_party/toolbench_static/toolbench_static.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|