evalscope 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (181) hide show
  1. evalscope/arguments.py +2 -1
  2. evalscope/benchmarks/__init__.py +2 -2
  3. evalscope/benchmarks/aigc/__init__.py +0 -0
  4. evalscope/benchmarks/aigc/t2i/__init__.py +0 -0
  5. evalscope/benchmarks/aigc/t2i/base.py +56 -0
  6. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +77 -0
  7. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +58 -0
  8. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +58 -0
  9. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +57 -0
  10. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +37 -0
  11. evalscope/benchmarks/aime/aime24_adapter.py +1 -1
  12. evalscope/benchmarks/aime/aime25_adapter.py +4 -4
  13. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +1 -2
  14. evalscope/benchmarks/arc/arc_adapter.py +1 -1
  15. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +1 -3
  16. evalscope/benchmarks/ceval/ceval_adapter.py +2 -2
  17. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +1 -3
  18. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +1 -1
  19. evalscope/benchmarks/competition_math/competition_math_adapter.py +1 -2
  20. evalscope/benchmarks/data_adapter.py +16 -9
  21. evalscope/benchmarks/data_collection/data_collection_adapter.py +6 -4
  22. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -2
  23. evalscope/benchmarks/general_qa/general_qa_adapter.py +3 -3
  24. evalscope/benchmarks/live_code_bench/evaluate_utils.py +16 -21
  25. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +4 -1
  26. evalscope/benchmarks/live_code_bench/testing_util.py +6 -3
  27. evalscope/benchmarks/math_500/math_500_adapter.py +1 -1
  28. evalscope/benchmarks/mmlu/mmlu_adapter.py +3 -1
  29. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +1 -2
  30. evalscope/benchmarks/utils.py +7 -16
  31. evalscope/cli/start_app.py +1 -1
  32. evalscope/collections/evaluator.py +16 -4
  33. evalscope/config.py +7 -3
  34. evalscope/constants.py +11 -0
  35. evalscope/evaluator/evaluator.py +9 -3
  36. evalscope/evaluator/reviewer/auto_reviewer.py +1 -1
  37. evalscope/metrics/__init__.py +49 -4
  38. evalscope/metrics/llm_judge.py +1 -1
  39. evalscope/metrics/named_metrics.py +13 -0
  40. evalscope/metrics/t2v_metrics/__init__.py +66 -0
  41. evalscope/metrics/t2v_metrics/clipscore.py +14 -0
  42. evalscope/metrics/t2v_metrics/constants.py +12 -0
  43. evalscope/metrics/t2v_metrics/itmscore.py +14 -0
  44. evalscope/metrics/t2v_metrics/models/__init__.py +0 -0
  45. evalscope/metrics/t2v_metrics/models/clipscore_models/__init__.py +30 -0
  46. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/__init__.py +0 -0
  47. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/base_model.py +6 -0
  48. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +132 -0
  49. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +286 -0
  50. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +114 -0
  51. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +86 -0
  52. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +85 -0
  53. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +62 -0
  54. evalscope/metrics/t2v_metrics/models/itmscore_models/__init__.py +26 -0
  55. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +84 -0
  56. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +97 -0
  57. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +171 -0
  58. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/__init__.py +0 -0
  59. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +80 -0
  60. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +73 -0
  61. evalscope/metrics/t2v_metrics/models/model.py +45 -0
  62. evalscope/metrics/t2v_metrics/models/utils.py +25 -0
  63. evalscope/metrics/t2v_metrics/models/vqascore_models/__init__.py +22 -0
  64. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/__init__.py +0 -0
  65. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/__init__.py +1 -0
  66. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +300 -0
  67. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/builder.py +12 -0
  68. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +82 -0
  69. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_projector/builder.py +50 -0
  70. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +218 -0
  71. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +150 -0
  72. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/__init__.py +26 -0
  73. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +465 -0
  74. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/dist_utils.py +141 -0
  75. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +22 -0
  76. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +188 -0
  77. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +106 -0
  78. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +307 -0
  79. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/utils.py +416 -0
  80. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/__init__.py +8 -0
  81. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +191 -0
  82. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +318 -0
  83. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/default.yaml +10 -0
  84. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_flant5xl.yaml +42 -0
  85. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt2.7b.yaml +42 -0
  86. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_caption_opt6.7b.yaml +42 -0
  87. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_coco.yaml +36 -0
  88. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xl.yaml +43 -0
  89. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_flant5xxl.yaml +43 -0
  90. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna13b.yaml +43 -0
  91. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_instruct_vicuna7b.yaml +43 -0
  92. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain.yaml +36 -0
  93. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl.yaml +42 -0
  94. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_no_prefix.yaml +42 -0
  95. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_iter_80k_total_100k_prefix.yaml +42 -0
  96. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xl_vitL.yaml +43 -0
  97. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_flant5xxl.yaml +42 -0
  98. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt2.7b.yaml +42 -0
  99. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_opt6.7b.yaml +42 -0
  100. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_pretrain_vitL.yaml +37 -0
  101. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna13b.yaml +43 -0
  102. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/blip2/blip2_vicuna7b.yaml +43 -0
  103. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config.json +21 -0
  104. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_config_albef.json +22 -0
  105. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/configs/models/med_large_config.json +21 -0
  106. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +208 -0
  107. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/base_model.py +231 -0
  108. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +1093 -0
  109. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/__init__.py +0 -0
  110. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2.py +211 -0
  111. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_image_text_matching.py +109 -0
  112. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +452 -0
  113. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +364 -0
  114. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +755 -0
  115. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +273 -0
  116. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +880 -0
  117. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +1844 -0
  118. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +81 -0
  119. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +56 -0
  120. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_caption.py +212 -0
  121. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_classification.py +164 -0
  122. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_feature_extractor.py +202 -0
  123. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +185 -0
  124. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +178 -0
  125. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +112 -0
  126. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_pretrain.py +371 -0
  127. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +344 -0
  128. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +858 -0
  129. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +271 -0
  130. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +503 -0
  131. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +1270 -0
  132. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +473 -0
  133. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +31 -0
  134. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/base_processor.py +27 -0
  135. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/blip_processors.py +233 -0
  136. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +392 -0
  137. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +127 -0
  138. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +17 -0
  139. evalscope/metrics/t2v_metrics/score.py +78 -0
  140. evalscope/metrics/t2v_metrics/vqascore.py +14 -0
  141. evalscope/models/__init__.py +50 -14
  142. evalscope/models/adapters/__init__.py +17 -0
  143. evalscope/models/{base_adapter.py → adapters/base_adapter.py} +17 -17
  144. evalscope/models/{chat_adapter.py → adapters/chat_adapter.py} +10 -7
  145. evalscope/models/{choice_adapter.py → adapters/choice_adapter.py} +2 -6
  146. evalscope/models/{custom_adapter.py → adapters/custom_adapter.py} +2 -4
  147. evalscope/models/{server_adapter.py → adapters/server_adapter.py} +1 -3
  148. evalscope/models/adapters/t2i_adapter.py +76 -0
  149. evalscope/models/custom/__init__.py +2 -1
  150. evalscope/models/custom/dummy_model.py +11 -13
  151. evalscope/models/local_model.py +82 -33
  152. evalscope/models/model.py +2 -42
  153. evalscope/models/register.py +26 -0
  154. evalscope/perf/benchmark.py +4 -3
  155. evalscope/perf/main.py +4 -2
  156. evalscope/perf/plugin/datasets/flickr8k.py +2 -1
  157. evalscope/perf/utils/benchmark_util.py +2 -2
  158. evalscope/perf/utils/db_util.py +16 -8
  159. evalscope/report/__init__.py +1 -0
  160. evalscope/report/app.py +117 -67
  161. evalscope/report/app_arguments.py +11 -0
  162. evalscope/report/generator.py +1 -1
  163. evalscope/run.py +3 -3
  164. evalscope/third_party/thinkbench/eval.py +19 -7
  165. evalscope/utils/chat_service.py +2 -2
  166. evalscope/utils/import_utils.py +66 -0
  167. evalscope/utils/utils.py +12 -4
  168. evalscope/version.py +2 -2
  169. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/METADATA +20 -3
  170. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/RECORD +178 -66
  171. tests/aigc/__init__.py +1 -0
  172. tests/aigc/test_t2i.py +87 -0
  173. tests/cli/test_run.py +20 -7
  174. tests/perf/test_perf.py +6 -3
  175. evalscope/metrics/code_metric.py +0 -98
  176. evalscope/metrics/resources/gpt2-zhcn3-v4.bpe +0 -58485
  177. evalscope/metrics/resources/gpt2-zhcn3-v4.json +0 -1
  178. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/LICENSE +0 -0
  179. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/WHEEL +0 -0
  180. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/entry_points.txt +0 -0
  181. {evalscope-0.14.0.dist-info → evalscope-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
- import torch
1
+ import importlib
2
+ from abc import ABC, abstractmethod
2
3
  from typing import TYPE_CHECKING, Optional
3
4
 
4
- from evalscope.constants import DEFAULT_MODEL_CACHE_DIR, DEFAULT_MODEL_REVISION, EvalType
5
+ from evalscope.constants import DEFAULT_MODEL_CACHE_DIR, DEFAULT_MODEL_REVISION, EvalType, ModelTask
5
6
  from evalscope.utils.logger import get_logger
6
7
  from evalscope.utils.model_utils import get_device
7
8
 
@@ -11,31 +12,55 @@ if TYPE_CHECKING:
11
12
  logger = get_logger()
12
13
 
13
14
 
14
- class LocalModel:
15
+ class LocalModel(ABC):
15
16
 
16
17
  def __init__(self,
17
18
  model_id: str,
18
- model_revision: str = DEFAULT_MODEL_REVISION,
19
- device_map: str = 'auto',
19
+ model_revision: str = None,
20
+ device_map: str = None,
20
21
  torch_dtype: str = 'auto',
21
22
  cache_dir: str = None,
22
23
  **kwargs):
23
- from modelscope import AutoModelForCausalLM, AutoTokenizer
24
24
 
25
- model_cache_dir = cache_dir or DEFAULT_MODEL_CACHE_DIR
25
+ self.model_id = model_id
26
+ self.model_revision = model_revision or DEFAULT_MODEL_REVISION
27
+ self.device = device_map or get_device()
28
+ self.cache_dir = cache_dir or DEFAULT_MODEL_CACHE_DIR
29
+ self.kwargs = kwargs
30
+ self.model = None
31
+ self.tokenizer = None
26
32
 
27
33
  if isinstance(torch_dtype, str) and torch_dtype != 'auto':
34
+ import torch
28
35
  torch_dtype = eval(torch_dtype)
36
+ self.torch_dtype = torch_dtype
37
+
38
+ self.model_cfg = {
39
+ 'model_id': self.model_id,
40
+ 'device_map': self.device,
41
+ 'torch_dtype': str(self.torch_dtype),
42
+ }
43
+
44
+ @abstractmethod
45
+ def load_model(self):
46
+ pass
29
47
 
30
- self.model_id = model_id
31
- self.model_revision = model_revision
32
- self.device = device_map
48
+
49
+ class LocalChatModel(LocalModel):
50
+
51
+ def __init__(self, **kwargs):
52
+ super().__init__(**kwargs)
53
+
54
+ def load_model(self):
55
+ from modelscope import AutoModelForCausalLM, AutoTokenizer
56
+
57
+ logger.info(f'Loading model {self.model_id} ...')
33
58
 
34
59
  self.tokenizer = AutoTokenizer.from_pretrained(
35
60
  self.model_id,
36
- revision=model_revision,
61
+ revision=self.model_revision,
37
62
  trust_remote_code=True,
38
- cache_dir=model_cache_dir,
63
+ cache_dir=self.cache_dir,
39
64
  )
40
65
 
41
66
  # Fix no padding
@@ -44,18 +69,45 @@ class LocalModel:
44
69
 
45
70
  self.model = AutoModelForCausalLM.from_pretrained(
46
71
  self.model_id,
47
- revision=model_revision,
48
- device_map=device_map,
72
+ revision=self.model_revision,
73
+ device_map=self.device,
49
74
  trust_remote_code=True,
50
- torch_dtype=torch_dtype,
51
- cache_dir=model_cache_dir,
75
+ torch_dtype=self.torch_dtype,
76
+ cache_dir=self.cache_dir,
52
77
  )
53
78
 
54
- self.model_cfg = {
55
- 'model_id': model_id,
56
- 'device_map': device_map,
57
- 'torch_dtype': str(torch_dtype),
58
- }
79
+
80
+ class LocalImageModel(LocalModel):
81
+
82
+ def __init__(self, **kwargs):
83
+ super().__init__(**kwargs)
84
+
85
+ self.pipeline_cls = kwargs.pop('pipeline_cls', None)
86
+ # default to DiffusionPipeline if not specified
87
+ if self.pipeline_cls is None:
88
+ if 'flux' in self.model_id.lower():
89
+ self.pipeline_cls = 'FluxPipeline'
90
+ else:
91
+ self.pipeline_cls = 'DiffusionPipeline'
92
+
93
+ def load_model(self):
94
+ # from modelscope import pipeline_cls
95
+ module = getattr(importlib.import_module('modelscope'), self.pipeline_cls)
96
+
97
+ logger.info(f'Loading model {self.model_id} with {self.pipeline_cls} ...')
98
+
99
+ self.model = module.from_pretrained(
100
+ self.model_id,
101
+ revision=self.model_revision,
102
+ torch_dtype=self.torch_dtype,
103
+ cache_dir=self.cache_dir,
104
+ **self.kwargs,
105
+ )
106
+
107
+ self.model.to(self.device)
108
+
109
+ def __call__(self, *args, **kwargs):
110
+ return self.model(*args, **kwargs)
59
111
 
60
112
 
61
113
  def get_local_model(task_cfg: 'TaskConfig') -> Optional[LocalModel]:
@@ -64,16 +116,13 @@ def get_local_model(task_cfg: 'TaskConfig') -> Optional[LocalModel]:
64
116
  """
65
117
  if task_cfg.eval_type != EvalType.CHECKPOINT:
66
118
  return None
67
- else:
68
- device_map = task_cfg.model_args.get('device_map', get_device())
69
- cache_dir = task_cfg.model_args.get('cache_dir', None)
70
- model_precision = task_cfg.model_args.get('precision', 'torch.float16')
71
- model_revision = task_cfg.model_args.get('revision', DEFAULT_MODEL_REVISION)
72
-
73
- base_model = LocalModel(
74
- model_id=task_cfg.model,
75
- model_revision=model_revision,
76
- device_map=device_map,
77
- torch_dtype=model_precision,
78
- cache_dir=cache_dir)
119
+ elif task_cfg.model_task == ModelTask.TEXT_GENERATION:
120
+ base_model = LocalChatModel(model_id=task_cfg.model, **task_cfg.model_args)
121
+ base_model.load_model()
122
+ return base_model
123
+ elif task_cfg.model_task == ModelTask.IMAGE_GENERATION:
124
+ base_model = LocalImageModel(model_id=task_cfg.model, **task_cfg.model_args)
125
+ base_model.load_model()
79
126
  return base_model
127
+ else:
128
+ raise ValueError(f'Unsupported model task: {task_cfg.model_task} for model checkpoint.')
evalscope/models/model.py CHANGED
@@ -1,9 +1,8 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
  import os
3
- import random
4
3
  import time
5
4
  from abc import ABC, abstractmethod
6
- from typing import Any
5
+ from typing import Any, List
7
6
 
8
7
  from evalscope.utils.logger import get_logger
9
8
 
@@ -95,6 +94,7 @@ class ChatBaseModel(BaseModel):
95
94
  raise NotImplementedError
96
95
 
97
96
 
97
+ # TODO: Remove this class after refactoring all models
98
98
  class OpenAIModel(ChatBaseModel):
99
99
  """
100
100
  APIs of OpenAI models.
@@ -187,43 +187,3 @@ class OpenAIModel(ChatBaseModel):
187
187
  time.sleep(3)
188
188
  logger.error(f'OpenAI API call failed after {self.MAX_RETRIES} retries')
189
189
  return res
190
-
191
-
192
- class DummyChatModel(ChatBaseModel):
193
-
194
- MODEL_ID = 'dummy_chat_model_0801'
195
- REVISION = 'v1.0.0'
196
-
197
- def __init__(self, model_cfg: dict, **kwargs):
198
- model_cfg['model_id'] = self.MODEL_ID
199
- model_cfg['revision'] = self.REVISION
200
- super(DummyChatModel, self).__init__(model_cfg=model_cfg)
201
-
202
- def predict(self, inputs: dict, **kwargs) -> dict:
203
-
204
- debug: bool = False
205
- if debug:
206
- messages = inputs['messages']
207
- history = inputs['history']
208
-
209
- logger.info(f'** messages: {messages}')
210
- logger.info(f'** history: {history}')
211
-
212
- choice = random.choice(['A', 'B', 'C', 'D'])
213
-
214
- # Build response
215
- res = {
216
- 'choices': [{
217
- 'index': 0,
218
- 'message': {
219
- 'content': choice,
220
- 'role': 'assistant'
221
- }
222
- }],
223
- 'created': time.time(),
224
- 'model': self.MODEL_ID + '-' + self.REVISION,
225
- 'object': 'chat.completion',
226
- 'usage': {}
227
- }
228
-
229
- return res
@@ -1,3 +1,6 @@
1
+ from evalscope.constants import OutputType
2
+ from .adapters import *
3
+
1
4
  MODEL_ADAPTERS = {}
2
5
 
3
6
 
@@ -26,3 +29,26 @@ def get_model_adapter(name):
26
29
  raise ValueError(
27
30
  f"Model adapter '{name}' is not registered. Available model adapters: {list(MODEL_ADAPTERS.keys())}")
28
31
  return MODEL_ADAPTERS[name]
32
+
33
+
34
+ def register_model_adapter_class(cls, name=None):
35
+ """
36
+ Register a model adapter class.
37
+ :param cls: The model adapter class to register
38
+ :param name: Optional name for the model adapter. If not provided, the class name will be used.
39
+ """
40
+ if name is None:
41
+ name = cls.__name__
42
+ if name in MODEL_ADAPTERS:
43
+ raise ValueError(f"Model adapter class '{name}' is already registered.")
44
+ MODEL_ADAPTERS[name] = cls
45
+
46
+
47
+ # register all model adapters
48
+ register_model_adapter_class(BaseModelAdapter, name='base')
49
+ register_model_adapter_class(ChatGenerationModelAdapter, name=OutputType.GENERATION)
50
+ register_model_adapter_class(ContinuationLogitsModelAdapter, name=OutputType.LOGITS)
51
+ register_model_adapter_class(MultiChoiceModelAdapter, name=OutputType.MULTIPLE_CHOICE)
52
+ register_model_adapter_class(CustomModelAdapter, name='custom')
53
+ register_model_adapter_class(ServerModelAdapter, name='server')
54
+ register_model_adapter_class(T2IModelAdapter, name=OutputType.IMAGE_GENERATION)
@@ -9,7 +9,7 @@ import threading
9
9
  import time
10
10
  from http import HTTPStatus
11
11
  from tqdm import tqdm
12
- from typing import AsyncGenerator, List
12
+ from typing import AsyncGenerator, Dict, List, Tuple
13
13
 
14
14
  from evalscope.perf.arguments import Arguments
15
15
  from evalscope.perf.http_client import AioHttpClient, test_connection
@@ -180,7 +180,7 @@ async def connect_test(args: Arguments) -> bool:
180
180
 
181
181
 
182
182
  @exception_handler
183
- async def benchmark(args: Arguments) -> None:
183
+ async def benchmark(args: Arguments) -> Tuple[Dict, Dict]:
184
184
  if platform.system() != 'Windows':
185
185
  loop = asyncio.get_running_loop()
186
186
  add_signal_handlers(loop)
@@ -205,4 +205,5 @@ async def benchmark(args: Arguments) -> None:
205
205
  data_process_completed_event.set()
206
206
 
207
207
  metrics, result_db_path = await statistic_benchmark_metric_task
208
- summary_result(args, metrics, result_db_path)
208
+ metrics_result, percentile_result = summary_result(args, metrics, result_db_path)
209
+ return metrics_result, percentile_result
evalscope/perf/main.py CHANGED
@@ -36,9 +36,11 @@ def run_perf_benchmark(args):
36
36
  if platform.system() != 'Windows':
37
37
  add_signal_handlers(loop)
38
38
 
39
- loop.run_until_complete(benchmark(args))
39
+ return loop.run_until_complete(benchmark(args))
40
40
 
41
41
 
42
42
  if __name__ == '__main__':
43
43
  args = Arguments.from_args(parse_args())
44
- run_perf_benchmark(args)
44
+ metrics_result, percentile_result = run_perf_benchmark(args)
45
+ print(metrics_result)
46
+ print(percentile_result)
@@ -30,6 +30,7 @@ class FlickrDatasetPlugin(DatasetPluginBase):
30
30
 
31
31
  for item in dataset:
32
32
  pil_image = item['jpg']
33
+ text = item['txt']
33
34
  base64_iamge = PIL_to_base64(pil_image)
34
35
 
35
36
  yield [{
@@ -38,7 +39,7 @@ class FlickrDatasetPlugin(DatasetPluginBase):
38
39
  'content': [
39
40
  {
40
41
  'type': 'text',
41
- 'text': 'Describe the image'
42
+ 'text': text,
42
43
  },
43
44
  {
44
45
  'type': 'image_url',
@@ -32,13 +32,13 @@ class BenchmarkData:
32
32
  self.query_latency = self.completed_time - self.start_time
33
33
  if len(self.chunk_times) > 1:
34
34
  self.first_chunk_latency = self.chunk_times[0] - self.start_time
35
- self.n_chunks = len(self.chunk_times) - 2
35
+ self.n_chunks = len(self.chunk_times) - 2 # remove last and first chunk
36
36
  self.n_chunks_time = self.chunk_times[-2] - self.chunk_times[0]
37
37
  else:
38
38
  self.first_chunk_latency = self.query_latency
39
39
  self.n_chunks = 1
40
40
  self.n_chunks_time = self.query_latency
41
- self.time_per_output_token = self.query_latency / self.completion_tokens
41
+ self.time_per_output_token = self.n_chunks_time / self.completion_tokens
42
42
 
43
43
  def _calculate_tokens(self, api_plugin):
44
44
  self.prompt_tokens, self.completion_tokens = \
@@ -7,7 +7,7 @@ import sqlite3
7
7
  import sys
8
8
  from datetime import datetime
9
9
  from tabulate import tabulate
10
- from typing import Dict, List
10
+ from typing import Dict, List, Tuple
11
11
 
12
12
  from evalscope.perf.arguments import Arguments
13
13
  from evalscope.perf.utils.benchmark_util import BenchmarkData, BenchmarkMetrics
@@ -165,6 +165,7 @@ def get_percentile_results(result_db_path: str) -> Dict[str, List[float]]:
165
165
  CHUNK_TIMES_INDEX = 1
166
166
  LATENCY_INDEX = 4
167
167
  FIRST_CHUNK_LATENCY_INDEX = 5
168
+ CHUNK_TIME_INDEX = 7
168
169
  PROMPT_TOKENS_INDEX = 8
169
170
  COMPLETION_TOKENS_INDEX = 9
170
171
 
@@ -177,12 +178,17 @@ def get_percentile_results(result_db_path: str) -> Dict[str, List[float]]:
177
178
  'TTFT (s)': [row[FIRST_CHUNK_LATENCY_INDEX] for row in rows],
178
179
  'ITL (s)':
179
180
  inter_token_latencies_all,
181
+ 'TPOT (s)':
182
+ [(row[CHUNK_TIME_INDEX] / row[COMPLETION_TOKENS_INDEX]) if row[COMPLETION_TOKENS_INDEX] > 0 else float('nan')
183
+ for row in rows],
180
184
  'Latency (s)': [row[LATENCY_INDEX] for row in rows],
181
185
  'Input tokens': [row[PROMPT_TOKENS_INDEX] for row in rows],
182
186
  'Output tokens': [row[COMPLETION_TOKENS_INDEX] for row in rows],
183
- 'Throughput(tokens/s)':
187
+ 'Output throughput(tok/s)':
184
188
  [(row[COMPLETION_TOKENS_INDEX] / row[LATENCY_INDEX]) if row[LATENCY_INDEX] > 0 else float('nan')
185
- for row in rows]
189
+ for row in rows],
190
+ 'Total throughput(tok/s)': [((row[PROMPT_TOKENS_INDEX] + row[COMPLETION_TOKENS_INDEX])
191
+ / row[LATENCY_INDEX]) if row[LATENCY_INDEX] > 0 else float('nan') for row in rows]
186
192
  }
187
193
 
188
194
  # Calculate percentiles for each metric
@@ -194,16 +200,16 @@ def get_percentile_results(result_db_path: str) -> Dict[str, List[float]]:
194
200
  return results
195
201
 
196
202
 
197
- def summary_result(args: Arguments, metrics: BenchmarkMetrics, result_db_path: str):
203
+ def summary_result(args: Arguments, metrics: BenchmarkMetrics, result_db_path: str) -> Tuple[Dict, Dict]:
198
204
  result_path = os.path.dirname(result_db_path)
199
205
  write_json_file(args.to_dict(), os.path.join(result_path, 'benchmark_args.json'))
200
206
 
201
- data = metrics.create_message()
202
- data.update({'Expected number of requests': args.number, 'Result DB path': result_db_path})
203
- write_json_file(data, os.path.join(result_path, 'benchmark_summary.json'))
207
+ metrics_result = metrics.create_message()
208
+ metrics_result.update({'Expected number of requests': args.number, 'Result DB path': result_db_path})
209
+ write_json_file(metrics_result, os.path.join(result_path, 'benchmark_summary.json'))
204
210
 
205
211
  # Print summary in a table
206
- table = tabulate(list(data.items()), headers=['Key', 'Value'], tablefmt='grid')
212
+ table = tabulate(list(metrics_result.items()), headers=['Key', 'Value'], tablefmt='grid')
207
213
  logger.info('\nBenchmarking summary:\n' + table)
208
214
 
209
215
  # Get percentile results
@@ -217,6 +223,8 @@ def summary_result(args: Arguments, metrics: BenchmarkMetrics, result_db_path: s
217
223
  if args.dataset.startswith('speed_benchmark'):
218
224
  speed_benchmark_result(result_db_path)
219
225
 
226
+ return metrics_result, percentile_result
227
+
220
228
 
221
229
  def speed_benchmark_result(result_db_path: str):
222
230
  query_sql = """
@@ -1,5 +1,6 @@
1
1
  # Copyright (c) Alibaba, Inc. and its affiliates.
2
2
 
3
+ from evalscope.report.app_arguments import add_argument
3
4
  from evalscope.report.combinator import gen_table, get_data_frame, get_report_list
4
5
  from evalscope.report.generator import ReportGenerator
5
6
  from evalscope.report.utils import Category, Report, ReportKey, Subset
evalscope/report/app.py CHANGED
@@ -11,7 +11,7 @@ from dataclasses import dataclass
11
11
  from typing import Any, List, Union
12
12
 
13
13
  from evalscope.constants import DataCollection
14
- from evalscope.report import Report, ReportKey, get_data_frame, get_report_list
14
+ from evalscope.report import Report, ReportKey, add_argument, get_data_frame, get_report_list
15
15
  from evalscope.utils.io_utils import OutputsStructure, yaml_to_dict
16
16
  from evalscope.utils.logger import configure_logging, get_logger
17
17
  from evalscope.version import __version__
@@ -22,6 +22,23 @@ PLOTLY_THEME = 'plotly_dark'
22
22
  REPORT_TOKEN = '@@'
23
23
  MODEL_TOKEN = '::'
24
24
  DATASET_TOKEN = ', '
25
+ LATEX_DELIMITERS = [{
26
+ 'left': '$$',
27
+ 'right': '$$',
28
+ 'display': True
29
+ }, {
30
+ 'left': '$',
31
+ 'right': '$',
32
+ 'display': False
33
+ }, {
34
+ 'left': '\\(',
35
+ 'right': '\\)',
36
+ 'display': False
37
+ }, {
38
+ 'left': '\\[',
39
+ 'right': '\\]',
40
+ 'display': True
41
+ }]
25
42
 
26
43
 
27
44
  def scan_for_report_folders(root_path):
@@ -234,6 +251,18 @@ def convert_html_tags(text):
234
251
  return text
235
252
 
236
253
 
254
+ def convert_markdown_image(text):
255
+ if not os.path.isfile(text):
256
+ return text
257
+ # Convert the image path to a markdown image tag
258
+ if text.endswith('.png') or text.endswith('.jpg') or text.endswith('.jpeg'):
259
+ text = os.path.abspath(text)
260
+ image_tag = f'![image](gradio_api/file={text})'
261
+ logger.debug(f'Converting image path to markdown: {text} -> {image_tag}')
262
+ return image_tag
263
+ return text
264
+
265
+
237
266
  def process_string(string: str, max_length: int = 2048) -> str:
238
267
  string = convert_html_tags(string) # for display labels e.g. `<think>`
239
268
  if len(string) > max_length:
@@ -285,7 +314,7 @@ def get_model_prediction(work_dir: str, model_name: str, dataset_name: str, subs
285
314
  'Input': raw_input,
286
315
  'Generated': raw_pred_answer,
287
316
  'Gold': parsed_gold_answer if parsed_gold_answer != raw_input else '*Same as Input*',
288
- 'Pred': parsed_pred_answer if parsed_pred_answer != raw_pred_answer else '*Same as Generated*',
317
+ 'Pred': parsed_pred_answer,
289
318
  'Score': score,
290
319
  'NScore': normalize_score(score)
291
320
  }
@@ -295,22 +324,6 @@ def get_model_prediction(work_dir: str, model_name: str, dataset_name: str, subs
295
324
  return df_subset
296
325
 
297
326
 
298
- def get_table_data(data_review_df: pd.DataFrame, page: int = 1, rows_per_page: int = 1) -> pd.DataFrame:
299
- if data_review_df is None:
300
- return pd.DataFrame(), None
301
-
302
- logger.debug(f'page: {page}, rows_per_page: {rows_per_page}')
303
- start = (page - 1) * rows_per_page
304
- end = start + rows_per_page
305
- df_subset = data_review_df.iloc[start:end].copy()
306
- df_subset['Input'] = df_subset['Input'].map(process_model_prediction).astype(str)
307
- df_subset['Generated'] = df_subset['Generated'].map(process_model_prediction).astype(str)
308
- df_subset['Pred'] = df_subset['Pred'].map(process_model_prediction).astype(str)
309
- df_subset['Score'] = df_subset['Score'].map(process_model_prediction).astype(str)
310
- styler = style_df(df_subset, columns=['NScore'])
311
- return df_subset, styler
312
-
313
-
314
327
  @dataclass
315
328
  class SidebarComponents:
316
329
  root_path: gr.Textbox
@@ -457,7 +470,11 @@ def create_single_model_tab(sidebar: SidebarComponents, lang: str):
457
470
  'page': {
458
471
  'zh': '页码',
459
472
  'en': 'Page'
460
- }
473
+ },
474
+ 'score_threshold': {
475
+ 'zh': '分数阈值',
476
+ 'en': 'Score Threshold'
477
+ },
461
478
  }
462
479
 
463
480
  # Update the UI components with localized labels
@@ -489,37 +506,53 @@ def create_single_model_tab(sidebar: SidebarComponents, lang: str):
489
506
  gr.Markdown(f'### {locale_dict["model_prediction"][lang]}')
490
507
  subset_select = gr.Dropdown(
491
508
  label=locale_dict['select_subset'][lang], choices=[], show_label=True, interactive=True)
509
+
492
510
  with gr.Row():
493
511
  answer_mode_radio = gr.Radio(
494
512
  label=locale_dict['answer_mode'][lang], choices=['All', 'Pass', 'Fail'], value='All', interactive=True)
495
- page_number = gr.Number(
496
- value=1, label=locale_dict['page'][lang], minimum=1, maximum=1, step=1, interactive=True)
497
- answer_mode_counts = gr.Markdown('', label='Counts')
513
+ score_threshold = gr.Number(value=0.99, label=locale_dict['score_threshold'][lang], interactive=True)
514
+
498
515
  data_review_df = gr.State(None)
499
516
  filtered_review_df = gr.State(None)
500
- data_review_table = gr.DataFrame(
501
- value=None,
502
- datatype=['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number'],
503
- # column_widths=['500px', '500px'],
504
- wrap=True,
505
- latex_delimiters=[{
506
- 'left': '$$',
507
- 'right': '$$',
508
- 'display': True
509
- }, {
510
- 'left': '$',
511
- 'right': '$',
512
- 'display': False
513
- }, {
514
- 'left': '\\(',
515
- 'right': '\\)',
516
- 'display': False
517
- }, {
518
- 'left': '\\[',
519
- 'right': '\\]',
520
- 'display': True
521
- }],
522
- max_height=600)
517
+
518
+ # show statistics
519
+ with gr.Row(variant='panel'):
520
+ with gr.Column():
521
+ gr.Markdown('### *Counts*')
522
+ answer_mode_counts = gr.Markdown('')
523
+ with gr.Column():
524
+ page_number = gr.Number(
525
+ value=1, label=locale_dict['page'][lang], minimum=1, maximum=1, step=1, interactive=True)
526
+
527
+ # show data review table
528
+ with gr.Row(variant='panel'):
529
+ with gr.Column():
530
+ gr.Markdown('### *Score*')
531
+ score_text = gr.Markdown(
532
+ '', elem_id='score_text', latex_delimiters=LATEX_DELIMITERS, show_copy_button=True)
533
+ with gr.Column():
534
+ gr.Markdown('### *Normalized Score*')
535
+ nscore = gr.Markdown('', elem_id='score_text', latex_delimiters=LATEX_DELIMITERS)
536
+
537
+ with gr.Row(variant='panel'):
538
+ with gr.Column():
539
+ gr.Markdown('### *Gold*')
540
+ gold_text = gr.Markdown(
541
+ '', elem_id='gold_text', latex_delimiters=LATEX_DELIMITERS, show_copy_button=True)
542
+ with gr.Column():
543
+ gr.Markdown('### *Pred*')
544
+ pred_text = gr.Markdown(
545
+ '', elem_id='pred_text', latex_delimiters=LATEX_DELIMITERS, show_copy_button=True)
546
+
547
+ with gr.Row(variant='panel'):
548
+ with gr.Column():
549
+ gr.Markdown('### *Input*')
550
+ input_text = gr.Markdown(
551
+ '', elem_id='input_text', latex_delimiters=LATEX_DELIMITERS, show_copy_button=True)
552
+ with gr.Column():
553
+ gr.Markdown('### *Generated*')
554
+ generated_text = gr.Markdown(
555
+ '', elem_id='generated_text', latex_delimiters=LATEX_DELIMITERS, show_copy_button=True)
523
556
 
524
557
  @report_name.change(
525
558
  inputs=[sidebar.root_path, report_name],
@@ -561,15 +594,15 @@ def create_single_model_tab(sidebar: SidebarComponents, lang: str):
561
594
  return data_review_df, 1
562
595
 
563
596
  @gr.on(
564
- triggers=[data_review_df.change, answer_mode_radio.change],
565
- inputs=[data_review_df, answer_mode_radio],
597
+ triggers=[data_review_df.change, answer_mode_radio.change, score_threshold.change],
598
+ inputs=[data_review_df, answer_mode_radio, score_threshold],
566
599
  outputs=[filtered_review_df, page_number, answer_mode_counts])
567
- def filter_data(data_review_df, answer_mode):
600
+ def filter_data(data_review_df, answer_mode, score_threshold):
568
601
  if data_review_df is None:
569
602
  return None, gr.update(value=1, maximum=1), ''
570
603
 
571
604
  all_count = len(data_review_df)
572
- pass_df = data_review_df[data_review_df['NScore'] >= 0.99]
605
+ pass_df = data_review_df[data_review_df['NScore'] >= score_threshold]
573
606
  pass_count = len(pass_df)
574
607
  fail_count = all_count - pass_count
575
608
 
@@ -578,7 +611,7 @@ def create_single_model_tab(sidebar: SidebarComponents, lang: str):
578
611
  if answer_mode == 'Pass':
579
612
  filtered_df = pass_df
580
613
  elif answer_mode == 'Fail':
581
- filtered_df = data_review_df[data_review_df['NScore'] < 0.99]
614
+ filtered_df = data_review_df[data_review_df['NScore'] < score_threshold]
582
615
  else:
583
616
  filtered_df = data_review_df
584
617
 
@@ -588,13 +621,33 @@ def create_single_model_tab(sidebar: SidebarComponents, lang: str):
588
621
 
589
622
  @gr.on(
590
623
  triggers=[filtered_review_df.change, page_number.change],
591
- inputs=[filtered_review_df, page_number],
592
- outputs=[data_review_table])
593
- def update_table(filtered_df, page_number):
594
- if filtered_df is None:
595
- return gr.update(value=None)
596
- subset_df, styler = get_table_data(filtered_df, page_number)
597
- return styler
624
+ inputs=[filtered_review_df, page_number, score_threshold],
625
+ outputs=[input_text, generated_text, gold_text, pred_text, score_text, nscore])
626
+ def update_table_components(filtered_df, page_number, score_threshold):
627
+ if filtered_df is None or len(filtered_df) == 0:
628
+ return '', '', '', '', '', ''
629
+
630
+ # Get single row data for the current page
631
+ start = (page_number - 1)
632
+ if start >= len(filtered_df):
633
+ return '', '', '', '', '', ''
634
+
635
+ row = filtered_df.iloc[start]
636
+
637
+ # Process the data for display
638
+ input_md = process_model_prediction(row['Input'])
639
+ generated_md = process_model_prediction(row['Generated'])
640
+ gold_md = process_model_prediction(row['Gold'])
641
+ pred_md = convert_markdown_image(process_model_prediction(row['Pred']))
642
+ score_md = process_model_prediction(row['Score'])
643
+ nscore_val = float(row['NScore']) if not pd.isna(row['NScore']) else 0.0
644
+
645
+ if nscore_val >= score_threshold:
646
+ nscore_val = f'<div style="background-color:rgb(45,104, 62); padding:10px;">{nscore_val}</div>'
647
+ else:
648
+ nscore_val = f'<div style="background-color:rgb(151, 31, 44); padding:10px;">{nscore_val}</div>'
649
+
650
+ return input_md, generated_md, gold_md, pred_md, score_md, nscore_val
598
651
 
599
652
  return SingleModelComponents(report_name=report_name)
600
653
 
@@ -696,16 +749,13 @@ def create_app(args: argparse.Namespace):
696
749
  text = '<' if new_visible else '>'
697
750
  return gr.update(visible=new_visible), new_visible, gr.update(value=text)
698
751
 
699
- demo.launch(share=args.share, server_name=args.server_name, server_port=args.server_port, debug=args.debug)
700
-
701
-
702
- def add_argument(parser: argparse.ArgumentParser):
703
- parser.add_argument('--share', action='store_true', help='Share the app.')
704
- parser.add_argument('--server-name', type=str, default='0.0.0.0', help='The server name.')
705
- parser.add_argument('--server-port', type=int, default=None, help='The server port.')
706
- parser.add_argument('--debug', action='store_true', help='Debug the app.')
707
- parser.add_argument('--lang', type=str, default='zh', help='The locale.', choices=['zh', 'en'])
708
- parser.add_argument('--outputs', type=str, default='./outputs', help='The outputs dir.')
752
+ demo.launch(
753
+ share=args.share,
754
+ server_name=args.server_name,
755
+ server_port=args.server_port,
756
+ debug=args.debug,
757
+ allowed_paths=args.allowed_paths,
758
+ )
709
759
 
710
760
 
711
761
  if __name__ == '__main__':