evalscope 0.6.0rc0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. evalscope/backend/opencompass/tasks/eval_datasets.py +1 -1
  2. evalscope/backend/rag_eval/clip_benchmark/utils/webdataset_convert.py +230 -0
  3. evalscope/backend/rag_eval/clip_benchmark/utils/webdatasets.txt +43 -0
  4. evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerCorrectness/correctness_prompt_chinese.json +87 -0
  5. evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerCorrectness/long_form_answer_prompt_chinese.json +36 -0
  6. evalscope/backend/rag_eval/ragas/prompts/chinese/AnswerRelevancy/question_generation_chinese.json +26 -0
  7. evalscope/backend/rag_eval/ragas/prompts/chinese/ContextPrecision/context_precision_prompt_chinese.json +41 -0
  8. evalscope/backend/rag_eval/ragas/prompts/chinese/Faithfulness/nli_statements_message_chinese.json +60 -0
  9. evalscope/backend/rag_eval/ragas/prompts/chinese/Faithfulness/statement_prompt_chinese.json +36 -0
  10. evalscope/backend/rag_eval/ragas/prompts/chinese/HeadlinesExtractor/prompt_chinese.json +22 -0
  11. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/concept_combination_prompt_chinese.json +35 -0
  12. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/generate_query_reference_prompt_chinese.json +7 -0
  13. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopAbstractQuerySynthesizer/theme_persona_matching_prompt_chinese.json +39 -0
  14. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopSpecificQuerySynthesizer/generate_query_reference_prompt_chinese.json +7 -0
  15. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiHopSpecificQuerySynthesizer/theme_persona_matching_prompt_chinese.json +39 -0
  16. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiModalFaithfulness/faithfulness_prompt_chinese.json +34 -0
  17. evalscope/backend/rag_eval/ragas/prompts/chinese/MultiModalRelevance/relevance_prompt_chinese.json +36 -0
  18. evalscope/backend/rag_eval/ragas/prompts/chinese/NERExtractor/prompt_chinese.json +25 -0
  19. evalscope/backend/rag_eval/ragas/prompts/chinese/SingleHopSpecificQuerySynthesizer/generate_query_reference_prompt_chinese.json +7 -0
  20. evalscope/backend/rag_eval/ragas/prompts/chinese/SingleHopSpecificQuerySynthesizer/theme_persona_matching_prompt_chinese.json +39 -0
  21. evalscope/backend/rag_eval/ragas/prompts/chinese/SummaryExtractor/prompt_chinese.json +16 -0
  22. evalscope/backend/rag_eval/ragas/prompts/chinese/ThemesExtractor/prompt_chinese.json +24 -0
  23. evalscope/backend/rag_eval/ragas/prompts/persona_prompt.py +18 -0
  24. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +120 -100
  25. evalscope/backend/rag_eval/utils/clip.py +149 -0
  26. evalscope/backend/rag_eval/utils/embedding.py +183 -0
  27. evalscope/backend/rag_eval/utils/llm.py +72 -0
  28. evalscope/backend/rag_eval/utils/tools.py +63 -0
  29. evalscope/backend/vlm_eval_kit/backend_manager.py +23 -21
  30. evalscope/benchmarks/ceval/samples.jsonl +1 -0
  31. evalscope/benchmarks/cmmlu/samples.jsonl +5 -0
  32. evalscope/benchmarks/mmlu/samples.jsonl +5 -0
  33. evalscope/benchmarks/race/samples.jsonl +5 -0
  34. evalscope/benchmarks/trivia_qa/samples.jsonl +5 -0
  35. evalscope/cli/start_perf.py +8 -11
  36. evalscope/metrics/bundled_rouge_score/rouge_scorer.py +1 -1
  37. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +58485 -0
  38. evalscope/metrics/resources/gpt2-zhcn3-v4.json +1 -0
  39. evalscope/metrics/rouge_metric.py +30 -15
  40. evalscope/perf/arguments.py +179 -0
  41. evalscope/perf/benchmark.py +245 -0
  42. evalscope/perf/http_client.py +127 -711
  43. evalscope/perf/main.py +35 -0
  44. evalscope/perf/plugin/__init__.py +2 -0
  45. evalscope/perf/plugin/api/__init__.py +3 -0
  46. evalscope/perf/{api_plugin_base.py → plugin/api/base.py} +17 -18
  47. evalscope/perf/{custom_api.py → plugin/api/custom_api.py} +25 -19
  48. evalscope/perf/{dashscope_api.py → plugin/api/dashscope_api.py} +28 -14
  49. evalscope/perf/{openai_api.py → plugin/api/openai_api.py} +51 -27
  50. evalscope/perf/plugin/datasets/__init__.py +6 -0
  51. evalscope/perf/{dataset_plugin_base.py → plugin/datasets/base.py} +13 -10
  52. evalscope/perf/plugin/datasets/custom.py +21 -0
  53. evalscope/perf/plugin/datasets/flickr8k.py +51 -0
  54. evalscope/perf/{datasets → plugin/datasets}/line_by_line.py +9 -5
  55. evalscope/perf/plugin/datasets/longalpaca.py +28 -0
  56. evalscope/perf/plugin/datasets/openqa.py +38 -0
  57. evalscope/perf/plugin/datasets/speed_benchmark.py +50 -0
  58. evalscope/perf/plugin/registry.py +54 -0
  59. evalscope/perf/{how_to_analysis_result.py → utils/analysis_result.py} +11 -5
  60. evalscope/perf/utils/benchmark_util.py +135 -0
  61. evalscope/perf/utils/chat_service.py +252 -0
  62. evalscope/perf/utils/db_util.py +200 -0
  63. evalscope/perf/utils/handler.py +46 -0
  64. evalscope/perf/utils/local_server.py +139 -0
  65. evalscope/registry/config/cfg_arena.yaml +77 -0
  66. evalscope/registry/config/cfg_arena_zhihu.yaml +63 -0
  67. evalscope/registry/config/cfg_pairwise_baseline.yaml +83 -0
  68. evalscope/registry/config/cfg_single.yaml +78 -0
  69. evalscope/registry/data/prompt_template/lmsys_v2.jsonl +8 -0
  70. evalscope/registry/data/prompt_template/prompt_templates.jsonl +8 -0
  71. evalscope/registry/data/qa_browser/battle.jsonl +634 -0
  72. evalscope/registry/data/qa_browser/category_mapping.yaml +10 -0
  73. evalscope/registry/data/question.jsonl +80 -0
  74. evalscope/third_party/longbench_write/README.md +118 -0
  75. evalscope/third_party/longbench_write/default_task.json +27 -0
  76. evalscope/third_party/longbench_write/default_task.yaml +24 -0
  77. evalscope/third_party/toolbench_static/README.md +118 -0
  78. evalscope/third_party/toolbench_static/config_default.json +15 -0
  79. evalscope/third_party/toolbench_static/config_default.yaml +12 -0
  80. evalscope/third_party/toolbench_static/requirements.txt +2 -0
  81. evalscope/utils/logger.py +18 -20
  82. evalscope/utils/utils.py +41 -42
  83. evalscope/version.py +2 -2
  84. evalscope-0.7.0.dist-info/LICENSE +203 -0
  85. {evalscope-0.6.0rc0.dist-info → evalscope-0.7.0.dist-info}/METADATA +162 -103
  86. {evalscope-0.6.0rc0.dist-info → evalscope-0.7.0.dist-info}/RECORD +107 -32
  87. {evalscope-0.6.0rc0.dist-info → evalscope-0.7.0.dist-info}/WHEEL +1 -1
  88. {evalscope-0.6.0rc0.dist-info → evalscope-0.7.0.dist-info}/top_level.txt +1 -0
  89. tests/cli/__init__.py +1 -0
  90. tests/cli/test_run.py +76 -0
  91. tests/perf/__init__.py +1 -0
  92. tests/perf/test_perf.py +96 -0
  93. tests/rag/__init__.py +0 -0
  94. tests/rag/test_clip_benchmark.py +85 -0
  95. tests/rag/test_mteb.py +136 -0
  96. tests/rag/test_ragas.py +120 -0
  97. tests/swift/__init__.py +1 -0
  98. tests/swift/test_run_swift_eval.py +146 -0
  99. tests/swift/test_run_swift_vlm_eval.py +128 -0
  100. tests/swift/test_run_swift_vlm_jugde_eval.py +157 -0
  101. tests/test_run_all.py +12 -0
  102. tests/vlm/__init__.py +1 -0
  103. tests/vlm/test_vlmeval.py +59 -0
  104. evalscope/perf/_logging.py +0 -32
  105. evalscope/perf/datasets/longalpaca_12k.py +0 -20
  106. evalscope/perf/datasets/openqa.py +0 -22
  107. evalscope/perf/plugin_registry.py +0 -35
  108. evalscope/perf/query_parameters.py +0 -42
  109. evalscope/perf/server_sent_event.py +0 -43
  110. evalscope/preprocess/tokenizers/gpt2_tokenizer.py +0 -221
  111. /evalscope/{perf/datasets → backend/rag_eval/utils}/__init__.py +0 -0
  112. /evalscope/{preprocess/tokenizers → perf/utils}/__init__.py +0 -0
  113. {evalscope-0.6.0rc0.dist-info → evalscope-0.7.0.dist-info}/entry_points.txt +0 -0
  114. {evalscope/preprocess → tests}/__init__.py +0 -0

There are too many changes on this page to be displayed.


The amount of changes on this page would crash your brower.

You can still verify the content by downloading the package file manually.