evalscope 0.17.0__py3-none-any.whl → 0.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of evalscope might be problematic. Click here for more details.

Files changed (66) hide show
  1. evalscope/benchmarks/bfcl/bfcl_adapter.py +1 -1
  2. evalscope/benchmarks/data_adapter.py +9 -4
  3. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +2 -1
  4. evalscope/benchmarks/general_qa/general_qa_adapter.py +2 -1
  5. evalscope/benchmarks/hle/__init__.py +0 -0
  6. evalscope/benchmarks/hle/hle_adapter.py +118 -0
  7. evalscope/benchmarks/humaneval/humaneval_adapter.py +5 -21
  8. evalscope/benchmarks/mmlu/mmlu_adapter.py +1 -1
  9. evalscope/benchmarks/tau_bench/__init__.py +0 -0
  10. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +110 -0
  11. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +7 -1
  12. evalscope/benchmarks/utils.py +1 -0
  13. evalscope/constants.py +5 -21
  14. evalscope/evaluator/__init__.py +1 -1
  15. evalscope/evaluator/evaluator.py +5 -3
  16. evalscope/metrics/__init__.py +3 -1
  17. evalscope/metrics/completion_parsers.py +7 -0
  18. evalscope/metrics/llm_judge.py +6 -5
  19. evalscope/metrics/metrics.py +19 -7
  20. evalscope/models/__init__.py +4 -8
  21. evalscope/models/adapters/__init__.py +4 -9
  22. evalscope/models/adapters/base_adapter.py +4 -0
  23. evalscope/models/adapters/bfcl_adapter.py +2 -0
  24. evalscope/models/adapters/chat_adapter.py +3 -0
  25. evalscope/models/adapters/choice_adapter.py +4 -0
  26. evalscope/models/adapters/custom_adapter.py +7 -3
  27. evalscope/models/adapters/server_adapter.py +2 -0
  28. evalscope/models/adapters/t2i_adapter.py +3 -0
  29. evalscope/models/adapters/tau_bench_adapter.py +189 -0
  30. evalscope/models/register.py +0 -14
  31. evalscope/perf/arguments.py +13 -0
  32. evalscope/perf/benchmark.py +38 -39
  33. evalscope/perf/http_client.py +30 -86
  34. evalscope/perf/main.py +2 -2
  35. evalscope/perf/plugin/__init__.py +3 -2
  36. evalscope/perf/plugin/api/__init__.py +4 -3
  37. evalscope/perf/plugin/api/base.py +22 -4
  38. evalscope/perf/plugin/api/custom_api.py +212 -55
  39. evalscope/perf/plugin/api/dashscope_api.py +4 -10
  40. evalscope/perf/plugin/api/default_api.py +105 -0
  41. evalscope/perf/plugin/api/openai_api.py +17 -19
  42. evalscope/perf/plugin/datasets/__init__.py +10 -7
  43. evalscope/perf/plugin/datasets/base.py +22 -1
  44. evalscope/perf/plugin/datasets/custom.py +2 -1
  45. evalscope/perf/plugin/datasets/flickr8k.py +4 -27
  46. evalscope/perf/plugin/datasets/kontext_bench.py +28 -0
  47. evalscope/perf/plugin/datasets/line_by_line.py +2 -1
  48. evalscope/perf/plugin/datasets/longalpaca.py +2 -1
  49. evalscope/perf/plugin/datasets/openqa.py +2 -1
  50. evalscope/perf/plugin/datasets/random_dataset.py +15 -4
  51. evalscope/perf/plugin/datasets/random_vl_dataset.py +80 -0
  52. evalscope/perf/plugin/registry.py +36 -16
  53. evalscope/perf/utils/benchmark_util.py +14 -20
  54. evalscope/perf/utils/db_util.py +79 -61
  55. evalscope/utils/io_utils.py +10 -0
  56. evalscope/version.py +2 -2
  57. {evalscope-0.17.0.dist-info → evalscope-0.17.1.dist-info}/METADATA +54 -34
  58. {evalscope-0.17.0.dist-info → evalscope-0.17.1.dist-info}/RECORD +65 -58
  59. tests/cli/test_all.py +18 -2
  60. tests/cli/test_run.py +25 -37
  61. tests/perf/test_perf.py +29 -2
  62. evalscope/models/model.py +0 -189
  63. {evalscope-0.17.0.dist-info → evalscope-0.17.1.dist-info}/LICENSE +0 -0
  64. {evalscope-0.17.0.dist-info → evalscope-0.17.1.dist-info}/WHEEL +0 -0
  65. {evalscope-0.17.0.dist-info → evalscope-0.17.1.dist-info}/entry_points.txt +0 -0
  66. {evalscope-0.17.0.dist-info → evalscope-0.17.1.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,13 @@
1
1
  import aiohttp
2
2
  import asyncio
3
- import json
4
3
  import time
5
- from http import HTTPStatus
6
- from typing import AsyncGenerator, Dict, List, Tuple
4
+ from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Tuple
7
5
 
8
- from evalscope.perf.arguments import Arguments
9
- from evalscope.perf.utils.local_server import ServerSentEvent
10
6
  from evalscope.utils.logger import get_logger
7
+ from .arguments import Arguments
8
+
9
+ if TYPE_CHECKING:
10
+ from .plugin.api.base import ApiPluginBase
11
11
 
12
12
  logger = get_logger()
13
13
 
@@ -17,95 +17,48 @@ class AioHttpClient:
17
17
  def __init__(
18
18
  self,
19
19
  args: Arguments,
20
+ api_plugin: 'ApiPluginBase',
20
21
  ):
21
22
  self.url = args.url
22
23
  self.headers = {'user-agent': 'modelscope_bench', **(args.headers or {})}
23
24
  self.read_timeout = args.read_timeout
24
25
  self.connect_timeout = args.connect_timeout
26
+ self.api_plugin = api_plugin
25
27
  self.client = aiohttp.ClientSession(
26
28
  timeout=aiohttp.ClientTimeout(connect=self.connect_timeout, sock_read=self.read_timeout),
27
29
  trace_configs=[self._create_trace_config()] if args.debug else [])
28
30
 
29
- def _create_trace_config(self):
30
- trace_config = aiohttp.TraceConfig()
31
- trace_config.on_request_start.append(self.on_request_start)
32
- trace_config.on_request_chunk_sent.append(self.on_request_chunk_sent)
33
- trace_config.on_response_chunk_received.append(self.on_response_chunk_received)
34
- return trace_config
35
-
36
31
  async def __aenter__(self):
37
32
  pass
38
33
 
39
34
  async def __aexit__(self, exc_type, exc, tb):
40
35
  await self.client.close()
41
36
 
42
- async def _handle_stream(self, response: aiohttp.ClientResponse):
43
- is_error = False
44
- async for line in response.content:
45
- line = line.decode('utf8').rstrip('\n\r')
46
- sse_msg = ServerSentEvent.decode(line)
47
- if sse_msg:
48
- logger.debug(f'Response recevied: {line}')
49
- if sse_msg.event == 'error':
50
- is_error = True
51
- if sse_msg.data:
52
- if sse_msg.data.startswith('[DONE]'):
53
- break
54
- yield is_error, response.status, sse_msg.data
55
-
56
- async def _handle_response(self, response: aiohttp.ClientResponse) -> AsyncGenerator[Tuple[bool, int, str], None]:
57
- response_status = response.status
58
- response_content_type = response.content_type
59
- content_type_json = 'application/json'
60
- content_type_event_stream = 'text/event-stream'
61
- is_success = response_status == HTTPStatus.OK
62
-
63
- if is_success:
64
- # Handle successful response with 'text/event-stream' content type
65
- if content_type_event_stream in response_content_type:
66
- async for is_error, response_status, content in self._handle_stream(response):
67
- yield (is_error, response_status, content)
68
- # Handle successful response with 'application/json' content type
69
- elif content_type_json in response_content_type:
70
- content = await response.json()
71
- if content.get('object') == 'error':
72
- yield (True, content.get('code'), content.get('message')) # DashScope
73
- else:
74
- yield (False, response_status, json.dumps(content, ensure_ascii=False))
75
- # Handle other successful responses
76
- else:
77
- content = await response.read()
78
- yield (False, response_status, content)
79
- else:
80
- # Handle error response with 'application/json' content type
81
- if content_type_json in response_content_type:
82
- error = await response.json()
83
- yield (True, response_status, json.dumps(error, ensure_ascii=False))
84
- # Handle error response with 'text/event-stream' content type
85
- elif content_type_event_stream in response_content_type:
86
- async for _, _, data in self._handle_stream(response):
87
- error = json.loads(data)
88
- yield (True, response_status, json.dumps(error, ensure_ascii=False))
89
- # Handle other error responses
90
- else:
91
- msg = await response.read()
92
- yield (True, response_status, msg.decode('utf-8'))
37
+ def _create_trace_config(self):
38
+ """Create trace configuration for debugging."""
39
+ trace_config = aiohttp.TraceConfig()
40
+ trace_config.on_request_start.append(self.on_request_start)
41
+ trace_config.on_request_chunk_sent.append(self.on_request_chunk_sent)
42
+ trace_config.on_response_chunk_received.append(self.on_response_chunk_received)
43
+ return trace_config
93
44
 
94
45
  async def post(self, body):
95
- headers = {'Content-Type': 'application/json', **self.headers}
46
+ """Send POST request and delegate response handling to API plugin.
47
+ Yields:
48
+ Tuple[bool, int, str]: (is_error, status_code, response_data)
49
+ """
96
50
  try:
97
- data = json.dumps(body, ensure_ascii=False) # serialize to JSON
98
- async with self.client.request('POST', url=self.url, data=data, headers=headers) as response:
99
- async for rsp in self._handle_response(response):
100
- yield rsp
101
- except asyncio.TimeoutError:
51
+ # Delegate the request processing to the API plugin
52
+ async for result in self.api_plugin.process_request(self.client, self.url, self.headers, body):
53
+ yield result
54
+ except asyncio.TimeoutError as e:
102
55
  logger.error(
103
- f'TimeoutError: connect_timeout: {self.connect_timeout}, read_timeout: {self.read_timeout}. Please set longger timeout.' # noqa: E501
56
+ f'TimeoutError: connect_timeout: {self.connect_timeout}, read_timeout: {self.read_timeout}. Please set longer timeout.' # noqa: E501
104
57
  )
105
- yield (True, None, 'Timeout')
58
+ yield (True, None, str(e))
106
59
  except (aiohttp.ClientConnectorError, Exception) as e:
107
60
  logger.error(e)
108
- yield (True, None, e)
61
+ yield (True, None, str(e))
109
62
 
110
63
  @staticmethod
111
64
  async def on_request_start(session, context, params: aiohttp.TraceRequestStartParams):
@@ -136,25 +89,16 @@ class AioHttpClient:
136
89
  logger.debug(f'Request received: <{method=}, {url=}, {truncated_chunk=}>')
137
90
 
138
91
 
139
- async def test_connection(args: Arguments) -> bool:
92
+ async def test_connection(args: Arguments, api_plugin: 'ApiPluginBase') -> bool:
140
93
  is_error = True
141
94
  start_time = time.perf_counter()
142
95
 
143
96
  async def attempt_connection():
144
- client = AioHttpClient(args)
97
+ client = AioHttpClient(args, api_plugin)
145
98
  async with client:
146
- if args.apply_chat_template:
147
- request = {
148
- 'messages': [{
149
- 'role': 'user',
150
- 'content': 'hello'
151
- }],
152
- 'model': args.model,
153
- 'max_tokens': 10,
154
- 'stream': args.stream
155
- }
156
- else:
157
- request = {'prompt': 'hello', 'model': args.model, 'max_tokens': 10}
99
+ messages = [{'role': 'user', 'content': 'hello'}] if args.apply_chat_template else 'hello'
100
+ request = api_plugin.build_request(messages)
101
+
158
102
  async for is_error, state_code, response_data in client.post(request):
159
103
  return is_error, state_code, response_data
160
104
 
evalscope/perf/main.py CHANGED
@@ -57,8 +57,8 @@ def run_multi_benchmark(args: Arguments, output_path: str = None):
57
57
  results.append(metrics_result)
58
58
  # Sleep between runs to avoid overwhelming the server
59
59
  if i < len(number_list) - 1:
60
- logger.info('Sleeping for 5 seconds before the next run...')
61
- time.sleep(5)
60
+ logger.info(f'Sleeping for {args.sleep_interval} seconds before the next run...')
61
+ time.sleep(args.sleep_interval)
62
62
  # Analyze results
63
63
  print_summary(results, args.model_id)
64
64
  return results
@@ -1,2 +1,3 @@
1
- from evalscope.perf.plugin.api import *
2
- from evalscope.perf.plugin.datasets import *
1
+ from .api import *
2
+ from .datasets import *
3
+ from .registry import ApiRegistry, DatasetRegistry
@@ -1,3 +1,4 @@
1
- from evalscope.perf.plugin.api.custom_api import CustomPlugin
2
- from evalscope.perf.plugin.api.dashscope_api import DashScopeApiPlugin
3
- from evalscope.perf.plugin.api.openai_api import OpenaiPlugin
1
+ from .base import ApiPluginBase
2
+ from .custom_api import CustomPlugin
3
+ from .dashscope_api import DashScopeApiPlugin
4
+ from .openai_api import OpenaiPlugin
@@ -1,16 +1,18 @@
1
+ import aiohttp
1
2
  from abc import abstractmethod
2
- from typing import Any, Dict, List, Tuple
3
+ from typing import Any, AsyncGenerator, Dict, List, Tuple
3
4
 
4
5
  from evalscope.perf.arguments import Arguments
5
6
 
6
7
 
7
8
  class ApiPluginBase:
8
9
 
9
- def __init__(self, model_path: str) -> None:
10
- self.model_path = model_path
10
+ def __init__(self, param: Arguments) -> None:
11
+ self.param = param
12
+ self.model_path = param.tokenizer_path
11
13
 
12
14
  @abstractmethod
13
- def build_request(self, messages: List[Dict], param: Arguments) -> Dict:
15
+ def build_request(self, messages: List[Dict], param: Arguments = None) -> Dict:
14
16
  """Build a api request body.
15
17
 
16
18
  Args:
@@ -39,6 +41,22 @@ class ApiPluginBase:
39
41
  """
40
42
  raise NotImplementedError
41
43
 
44
+ @abstractmethod
45
+ async def process_request(self, client_session: aiohttp.ClientSession, url: str, headers: Dict,
46
+ body: Dict) -> AsyncGenerator[Tuple[bool, int, str], None]:
47
+ """Process the HTTP request and handle the response.
48
+
49
+ Args:
50
+ client_session: The aiohttp client session
51
+ url: The request URL
52
+ headers: The request headers
53
+ body: The request body
54
+
55
+ Yields:
56
+ Tuple[bool, int, str]: (is_error, status_code, response_data)
57
+ """
58
+ raise NotImplementedError
59
+
42
60
  @staticmethod
43
61
  def replace_values(input_json: Any, model: str, prompt: str):
44
62
  if isinstance(input_json, dict):
@@ -1,5 +1,6 @@
1
+ import aiohttp
1
2
  import json
2
- from typing import Any, Dict, Iterator, List
3
+ from typing import Any, AsyncGenerator, Dict, List, Tuple, Union
3
4
 
4
5
  from evalscope.perf.arguments import Arguments
5
6
  from evalscope.perf.plugin.api.base import ApiPluginBase
@@ -11,82 +12,238 @@ logger = get_logger()
11
12
 
12
13
  @register_api('custom')
13
14
  class CustomPlugin(ApiPluginBase):
14
- """Support tensorrt-llm triton server
15
+ """Support custom API implementations.
16
+
17
+ This class serves as a template for users to implement their own API plugins.
18
+ By extending this class, users can connect to any LLM API with custom request
19
+ and response formats.
15
20
  """
16
21
 
17
- def __init__(self, mode_path: str):
18
- """Init the plugin
22
+ def __init__(self, param: Arguments):
23
+ """Initialize the plugin with the provided parameters.
19
24
 
20
25
  Args:
21
- mode_path (str): The model path, we use the tokenizer
22
- weight in the model to calculate the number of the
23
- input and output tokens.
26
+ param (Arguments): Configuration parameters for the plugin, including:
27
+ - tokenizer_path: Path to the tokenizer for token counting
28
+ - model: Name of the model to use
29
+ - Other request parameters like max_tokens, temperature, etc.
24
30
  """
25
- super().__init__(model_path=mode_path)
26
- if mode_path is not None:
31
+ super().__init__(param=param)
32
+ if param.tokenizer_path is not None:
27
33
  from modelscope import AutoTokenizer
28
- self.tokenizer = AutoTokenizer.from_pretrained(mode_path)
34
+ self.tokenizer = AutoTokenizer.from_pretrained(param.tokenizer_path)
29
35
  else:
30
36
  self.tokenizer = None
31
37
 
32
- def build_request(self, messages: List[Dict], param: Arguments) -> Dict:
33
- """Build the openai format request based on prompt, dataset
38
+ def build_request(self, messages: Union[List[Dict], str], param: Arguments = None) -> Dict:
39
+ """Build a custom API request body based on the input messages and parameters.
34
40
 
35
- Args:
36
- message (Dict): The basic message to generator query.
37
- param (Arguments): The query parameters.
41
+ This method formats the input messages into the expected request format
42
+ for your custom API.
38
43
 
39
- Raises:
40
- Exception: NotImplemented
44
+ Args:
45
+ messages (Union[List[Dict], str]): The input messages to include in the request.
46
+ Could be a list of message dictionaries (for chat models) or a string (for completion models).
47
+ param (Arguments, optional): Request parameters. Defaults to self.param.
41
48
 
42
49
  Returns:
43
- Dict: The request body. None if prompt format is error.
50
+ Dict: A properly formatted request body for your custom API.
44
51
  """
52
+ param = param or self.param
45
53
  try:
46
- query = json.loads(param.query_template)
47
- ApiPluginBase.replace_values(query, param.model, messages[0]['content'])
48
- return query
54
+ # Create a default query format if no template is provided
55
+ if isinstance(messages, str):
56
+ query = {'input_text': messages}
57
+ else:
58
+ query = {'messages': messages}
59
+
60
+ # Add model parameters to the request
61
+ return self._add_parameters_to_request(query, param)
49
62
  except Exception as e:
50
63
  logger.exception(e)
51
- logger.error('Prompt: %s invalidate!' % messages)
52
64
  return None
53
65
 
54
- def parse_responses(self, responses, request: Any = None, **kwargs) -> Dict:
55
- """Parser responses and return number of request and response tokens.
56
- sample of the output delta:
57
- {"id":"4","object":"chat.completion.chunk","created":1714030870,"model":"llama3","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
66
+ def _add_parameters_to_request(self, payload: Dict, param: Arguments) -> Dict:
67
+ """Add model parameters to the request payload.
68
+
69
+ This helper method adds various parameters like temperature, max_tokens, etc.
70
+ to the request based on what your custom API supports.
71
+
72
+ Args:
73
+ payload (Dict): The base request payload.
74
+ param (Arguments): The parameters to add.
75
+
76
+ Returns:
77
+ Dict: The request payload with added parameters.
78
+ """
79
+ # Add the model name
80
+ payload['model'] = param.model
81
+
82
+ # Add various parameters if they are provided
83
+ if param.max_tokens is not None:
84
+ payload['max_tokens'] = param.max_tokens
85
+ if param.temperature is not None:
86
+ payload['temperature'] = param.temperature
87
+ if param.top_p is not None:
88
+ payload['top_p'] = param.top_p
89
+ if param.top_k is not None:
90
+ payload['top_k'] = param.top_k
91
+ if param.stream is not None:
92
+ payload['stream'] = param.stream
93
+ payload['stream_options'] = {'include_usage': True}
58
94
 
95
+ # Add any extra arguments passed via command line
96
+ if param.extra_args is not None:
97
+ payload.update(param.extra_args)
98
+
99
+ return payload
100
+
101
+ def parse_responses(self, responses: List[str], request: Any = None, **kwargs) -> Tuple[int, int]:
102
+ """Parse API responses and return token counts.
103
+
104
+ This method extracts the number of input and output tokens from the API responses.
105
+ Different APIs may return this information in different formats, or you may need
106
+ to calculate it using a tokenizer.
59
107
 
60
108
  Args:
61
- responses (List[bytes]): List of http response body, for stream output,
62
- there are multiple responses, for general only one.
63
- kwargs: (Any): The command line --parameter content.
109
+ responses (List[str]): List of API response strings.
110
+ request (Any, optional): The original request, which might be needed for token calculation.
111
+ **kwargs: Additional arguments.
112
+
64
113
  Returns:
65
- Tuple: Return number of prompt token and number of completion tokens.
114
+ Tuple[int, int]: (input_tokens, output_tokens) - The number of tokens in the prompt and completion.
115
+ """
116
+ try:
117
+ # Example 1: Try to get token counts from the API response
118
+ last_response = json.loads(responses[-1])
119
+
120
+ # If the API provides token usage information
121
+ if 'usage' in last_response and last_response['usage']:
122
+ input_tokens = last_response['usage'].get('prompt_tokens', 0)
123
+ output_tokens = last_response['usage'].get('completion_tokens', 0)
124
+ return input_tokens, output_tokens
125
+
126
+ # Example 2: Calculate tokens using the tokenizer if no usage info is available
127
+ if self.tokenizer is not None:
128
+ input_text = ''
129
+ output_text = ''
130
+
131
+ # Extract input text from the request
132
+ if request and 'messages' in request:
133
+ # For chat API
134
+ input_text = ' '.join([msg['content'] for msg in request['messages']])
135
+ elif request and 'input_text' in request:
136
+ # For completion API
137
+ input_text = request['input_text']
138
+
139
+ # Extract output text from the response
140
+ for response in responses:
141
+ js = json.loads(response)
142
+ if 'choices' in js:
143
+ for choice in js['choices']:
144
+ if 'message' in choice and 'content' in choice['message']:
145
+ output_text += choice['message']['content']
146
+ elif 'text' in choice:
147
+ output_text += choice['text']
148
+
149
+ # Count tokens
150
+ input_tokens = len(self.tokenizer.encode(input_text))
151
+ output_tokens = len(self.tokenizer.encode(output_text))
152
+ return input_tokens, output_tokens
153
+
154
+ # If no usage information and no tokenizer, raise an error
155
+ raise ValueError(
156
+ 'Cannot determine token counts: no usage information in response and no tokenizer provided.')
157
+
158
+ except Exception as e:
159
+ logger.error(f'Error parsing responses: {e}')
160
+ return 0, 0
161
+
162
+ async def process_request(self, client_session: aiohttp.ClientSession, url: str, headers: Dict,
163
+ body: Dict) -> AsyncGenerator[Tuple[bool, int, str], None]:
164
+ """Process the HTTP request and handle the response.
165
+
166
+ This method handles sending the request to your API and processing the response,
167
+ including handling streaming responses if supported.
168
+
169
+ Args:
170
+ client_session (aiohttp.ClientSession): The aiohttp client session.
171
+ url (str): The API endpoint URL.
172
+ headers (Dict): The request headers.
173
+ body (Dict): The request body.
174
+
175
+ Yields:
176
+ Tuple[bool, int, str]: (is_error, status_code, response_data)
177
+ - is_error: Whether the response indicates an error
178
+ - status_code: HTTP status code
179
+ - response_data: Response content
66
180
  """
67
- full_response_content = ''
68
- delta_contents = {}
69
- input_tokens = None
70
- output_tokens = None
71
- for response in responses:
72
- data = json.loads(response)
73
- # {"context_logits":0.0,"cum_log_probs":0.0,"generation_logits":0.0,"model_name":"ensemble",
74
- # "model_version":"1","output_log_probs":[0.0,0.0,0.0,0.0,0.0],"sequence_end":false,"sequence_id":0,"sequence_start":false,"text_output":"性"}
75
- if 'text_output' in data:
76
- if 0 in delta_contents:
77
- delta_contents[0].append(data['text_output'])
181
+ try:
182
+ # Set content type header
183
+ headers = {'Content-Type': 'application/json', **headers}
184
+
185
+ # Convert body to JSON
186
+ data = json.dumps(body, ensure_ascii=False)
187
+
188
+ # Send the request
189
+ async with client_session.request(
190
+ 'POST', url=url, data=data, headers=headers) as response: # noqa: E125
191
+ # Get the status code
192
+ status_code = response.status
193
+
194
+ # Check if it's a streaming response
195
+ if 'text/event-stream' in response.content_type:
196
+ # Handle streaming response
197
+ async for line in response.content:
198
+ line_str = line.decode('utf-8').strip()
199
+ if not line_str:
200
+ continue
201
+
202
+ # Check for data prefix in server-sent events
203
+ if line_str.startswith('data: '):
204
+ data = line_str[6:] # Remove 'data: ' prefix
205
+
206
+ # Check if it's the end of the stream
207
+ if data == '[DONE]':
208
+ break
209
+
210
+ try:
211
+ # Parse the JSON data
212
+ parsed_data = json.loads(data)
213
+ yield (False, status_code, json.dumps(parsed_data))
214
+ except json.JSONDecodeError:
215
+ yield (True, status_code, f'Failed to parse JSON: {data}')
78
216
  else:
79
- delta_contents[0] = [data['text_output']]
80
- if input_tokens is None and output_tokens is None and self.tokenizer is not None:
81
- input_tokens = 0
82
- output_tokens = 0
83
- for _, choice_contents in delta_contents.items():
84
- full_response_content = ''.join([m for m in choice_contents])
85
- input_tokens += len(self.tokenizer.encode(request['text_input']))
86
- output_tokens += len(self.tokenizer.encode(full_response_content))
87
- elif input_tokens is None and output_tokens is None: # no usage info get.
88
- input_tokens = 0
89
- output_tokens = 0
90
- logger.warning('No usage info get.')
91
-
92
- return input_tokens, output_tokens
217
+ # Handle regular response
218
+ if 'application/json' in response.content_type:
219
+ # JSON response
220
+ content = await response.json()
221
+ yield (status_code >= 400, status_code, json.dumps(content))
222
+ else:
223
+ # Text response
224
+ content = await response.text()
225
+ yield (status_code >= 400, status_code, content)
226
+
227
+ except Exception as e:
228
+ logger.error(f'Error in process_request: {e}')
229
+ yield (True, 500, str(e))
230
+
231
+
232
+ if __name__ == '__main__':
233
+ # Example usage of the CustomPlugin
234
+ from dotenv import dotenv_values
235
+ env = dotenv_values('.env')
236
+
237
+ from evalscope.perf.arguments import Arguments
238
+ from evalscope.perf.main import run_perf_benchmark
239
+
240
+ args = Arguments(
241
+ model='qwen2.5-7b-instruct',
242
+ url='https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions',
243
+ api_key=env.get('DASHSCOPE_API_KEY'),
244
+ api='custom', # Use the custom API plugin registered above
245
+ dataset='openqa',
246
+ number=1,
247
+ max_tokens=10)
248
+
249
+ run_perf_benchmark(args)
@@ -13,17 +13,10 @@ logger = get_logger()
13
13
  @register_api('dashscope')
14
14
  class DashScopeApiPlugin(ApiPluginBase):
15
15
 
16
- def __init__(self, mode_path: str):
17
- """Init the plugin
16
+ def __init__(self, param: Arguments):
17
+ super().__init__(param)
18
18
 
19
- Args:
20
- mode_path (str): The model path, we use the tokenizer
21
- weight in the model to calculate the number of the
22
- input and output tokens.
23
- """
24
- super().__init__(model_path=mode_path)
25
-
26
- def build_request(self, messages: List[Dict], param: Arguments) -> Dict:
19
+ def build_request(self, messages: List[Dict], param: Arguments = None) -> Dict:
27
20
  """Build the openai format request based on prompt, dataset
28
21
 
29
22
  Args:
@@ -36,6 +29,7 @@ class DashScopeApiPlugin(ApiPluginBase):
36
29
  Returns:
37
30
  Dict: The request body. None if prompt format is error.
38
31
  """
32
+ param = param or self.param
39
33
  try:
40
34
  if param.query_template is not None:
41
35
  if param.query_template.startswith('@'):