guidellm 0.3.0rc20250507__py3-none-any.whl → 0.4.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +8 -13
- guidellm/__main__.py +290 -69
- guidellm/backend/__init__.py +6 -6
- guidellm/backend/backend.py +25 -4
- guidellm/backend/openai.py +147 -27
- guidellm/backend/response.py +6 -2
- guidellm/benchmark/__init__.py +16 -22
- guidellm/benchmark/aggregator.py +3 -3
- guidellm/benchmark/benchmark.py +11 -12
- guidellm/benchmark/benchmarker.py +2 -2
- guidellm/benchmark/entrypoints.py +34 -10
- guidellm/benchmark/output.py +57 -5
- guidellm/benchmark/profile.py +4 -4
- guidellm/benchmark/progress.py +2 -2
- guidellm/benchmark/scenario.py +104 -0
- guidellm/benchmark/scenarios/__init__.py +0 -0
- guidellm/config.py +28 -7
- guidellm/dataset/__init__.py +4 -4
- guidellm/dataset/creator.py +1 -1
- guidellm/dataset/synthetic.py +36 -11
- guidellm/logger.py +8 -4
- guidellm/objects/__init__.py +2 -2
- guidellm/objects/pydantic.py +30 -1
- guidellm/objects/statistics.py +20 -14
- guidellm/preprocess/__init__.py +3 -0
- guidellm/preprocess/dataset.py +374 -0
- guidellm/presentation/__init__.py +28 -0
- guidellm/presentation/builder.py +27 -0
- guidellm/presentation/data_models.py +232 -0
- guidellm/presentation/injector.py +66 -0
- guidellm/request/__init__.py +6 -3
- guidellm/request/loader.py +5 -5
- guidellm/{scheduler → request}/types.py +4 -1
- guidellm/scheduler/__init__.py +10 -15
- guidellm/scheduler/queues.py +25 -0
- guidellm/scheduler/result.py +21 -3
- guidellm/scheduler/scheduler.py +68 -60
- guidellm/scheduler/strategy.py +26 -24
- guidellm/scheduler/worker.py +64 -103
- guidellm/utils/__init__.py +17 -5
- guidellm/utils/cli.py +62 -0
- guidellm/utils/default_group.py +105 -0
- guidellm/utils/dict.py +23 -0
- guidellm/utils/hf_datasets.py +36 -0
- guidellm/utils/random.py +1 -1
- guidellm/utils/text.py +12 -5
- guidellm/version.py +6 -0
- guidellm-0.4.0a2.dist-info/METADATA +317 -0
- guidellm-0.4.0a2.dist-info/RECORD +62 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/WHEEL +1 -1
- guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
- guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from collections.abc import Iterator
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Callable, Optional, Union
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from datasets import Dataset
|
|
10
|
+
from loguru import logger
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
from transformers import PreTrainedTokenizerBase
|
|
13
|
+
|
|
14
|
+
from guidellm.dataset import load_dataset as guidellm_load_dataset
|
|
15
|
+
from guidellm.utils import IntegerRangeSampler, check_load_processor
|
|
16
|
+
from guidellm.utils.hf_datasets import SUPPORTED_TYPES, save_dataset_to_file
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PromptTooShortError(Exception):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ShortPromptStrategy(str, Enum):
|
|
24
|
+
IGNORE = "ignore"
|
|
25
|
+
CONCATENATE = "concatenate"
|
|
26
|
+
PAD = "pad"
|
|
27
|
+
ERROR = "error"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def handle_ignore_strategy(
|
|
31
|
+
current_prompt: str,
|
|
32
|
+
min_prompt_tokens: int,
|
|
33
|
+
tokenizer: PreTrainedTokenizerBase,
|
|
34
|
+
**_kwargs,
|
|
35
|
+
) -> Optional[str]:
|
|
36
|
+
"""
|
|
37
|
+
Ignores prompts that are shorter than the required minimum token length.
|
|
38
|
+
|
|
39
|
+
:param current_prompt: The input prompt string.
|
|
40
|
+
:param min_prompt_tokens: Minimum required token count.
|
|
41
|
+
:param tokenizer: Tokenizer used to count tokens.
|
|
42
|
+
:return: The prompt if it meets the length, otherwise None.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
if len(tokenizer.encode(current_prompt)) < min_prompt_tokens:
|
|
46
|
+
logger.warning("Prompt too short, ignoring")
|
|
47
|
+
return None
|
|
48
|
+
return current_prompt
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def handle_concatenate_strategy(
|
|
52
|
+
current_prompt: str,
|
|
53
|
+
min_prompt_tokens: int,
|
|
54
|
+
dataset_iterator: Iterator[dict[str, Any]],
|
|
55
|
+
prompt_column: str,
|
|
56
|
+
tokenizer: PreTrainedTokenizerBase,
|
|
57
|
+
concat_delimiter: str,
|
|
58
|
+
**_kwargs,
|
|
59
|
+
) -> Optional[str]:
|
|
60
|
+
"""
|
|
61
|
+
Concatenates prompts until the minimum token requirement is met.
|
|
62
|
+
|
|
63
|
+
:param current_prompt: The initial prompt.
|
|
64
|
+
:param min_prompt_tokens: Target minimum token length.
|
|
65
|
+
:param dataset_iterator: Iterator to fetch more prompts.
|
|
66
|
+
:param prompt_column: Column key for prompt extraction.
|
|
67
|
+
:param tokenizer: Tokenizer used to count tokens.
|
|
68
|
+
:param concat_delimiter: Delimiter to use between prompts.
|
|
69
|
+
:return: Concatenated prompt or None if not enough data.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
tokens_len = len(tokenizer.encode(current_prompt))
|
|
73
|
+
while tokens_len < min_prompt_tokens:
|
|
74
|
+
try:
|
|
75
|
+
next_row = next(dataset_iterator)
|
|
76
|
+
except StopIteration:
|
|
77
|
+
logger.warning(
|
|
78
|
+
"Could not concatenate enough prompts to reach minimum length, ignoring"
|
|
79
|
+
)
|
|
80
|
+
return None
|
|
81
|
+
current_prompt += concat_delimiter + next_row[prompt_column]
|
|
82
|
+
tokens_len = len(tokenizer.encode(current_prompt))
|
|
83
|
+
return current_prompt
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def handle_pad_strategy(
|
|
87
|
+
current_prompt: str,
|
|
88
|
+
min_prompt_tokens: int,
|
|
89
|
+
tokenizer: PreTrainedTokenizerBase,
|
|
90
|
+
pad_char: str,
|
|
91
|
+
pad_multiplier: int = 2,
|
|
92
|
+
**_kwargs,
|
|
93
|
+
) -> str:
|
|
94
|
+
"""
|
|
95
|
+
Pads the prompt with a character until it reaches the minimum token length.
|
|
96
|
+
|
|
97
|
+
:param current_prompt: The input prompt.
|
|
98
|
+
:param min_prompt_tokens: Desired minimum token count.
|
|
99
|
+
:param tokenizer: Tokenizer used to count tokens.
|
|
100
|
+
:param pad_char: Character used for padding.
|
|
101
|
+
:param pad_multiplier: Multiplier for padding character length.
|
|
102
|
+
:return: Padded prompt string.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
tokens = tokenizer.encode(current_prompt)
|
|
106
|
+
pad_count = 1
|
|
107
|
+
prompt = current_prompt
|
|
108
|
+
while len(tokens) < min_prompt_tokens:
|
|
109
|
+
prompt += pad_char * pad_count
|
|
110
|
+
tokens = tokenizer.encode(prompt)
|
|
111
|
+
pad_count *= pad_multiplier
|
|
112
|
+
return prompt
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def handle_error_strategy(
|
|
116
|
+
current_prompt: str,
|
|
117
|
+
min_prompt_tokens: int,
|
|
118
|
+
tokenizer: PreTrainedTokenizerBase,
|
|
119
|
+
**_kwargs,
|
|
120
|
+
) -> Optional[str]:
|
|
121
|
+
"""
|
|
122
|
+
Raises an error if the prompt is too short.
|
|
123
|
+
|
|
124
|
+
:param current_prompt: The input prompt.
|
|
125
|
+
:param min_prompt_tokens: Required token count.
|
|
126
|
+
:param tokenizer: Tokenizer used to count tokens.
|
|
127
|
+
:return: The input prompt if valid.
|
|
128
|
+
:raises PromptTooShortError: If the prompt is too short.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
prompt_len = len(tokenizer.encode(current_prompt))
|
|
132
|
+
if prompt_len < min_prompt_tokens:
|
|
133
|
+
raise PromptTooShortError(
|
|
134
|
+
f"Found too short prompt: {current_prompt}, with length: {prompt_len}. "
|
|
135
|
+
f"Minimum length required: {min_prompt_tokens}.",
|
|
136
|
+
)
|
|
137
|
+
return current_prompt
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
STRATEGY_HANDLERS: dict[ShortPromptStrategy, Callable] = {
|
|
141
|
+
ShortPromptStrategy.IGNORE: handle_ignore_strategy,
|
|
142
|
+
ShortPromptStrategy.CONCATENATE: handle_concatenate_strategy,
|
|
143
|
+
ShortPromptStrategy.PAD: handle_pad_strategy,
|
|
144
|
+
ShortPromptStrategy.ERROR: handle_error_strategy,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class TokensConfig(BaseModel):
|
|
149
|
+
average: int = Field(
|
|
150
|
+
description="The average number of tokens.",
|
|
151
|
+
gt=0,
|
|
152
|
+
)
|
|
153
|
+
stdev: Optional[int] = Field(
|
|
154
|
+
description="The standard deviation of the tokens.",
|
|
155
|
+
gt=0,
|
|
156
|
+
default=None,
|
|
157
|
+
)
|
|
158
|
+
min: Optional[int] = Field(
|
|
159
|
+
description="The minimum number of tokens.",
|
|
160
|
+
gt=0,
|
|
161
|
+
default=None,
|
|
162
|
+
)
|
|
163
|
+
max: Optional[int] = Field(
|
|
164
|
+
description="The maximum number of tokens.",
|
|
165
|
+
gt=0,
|
|
166
|
+
default=None,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def parse_str(data: Union[str, Path]) -> "TokensConfig":
|
|
171
|
+
"""
|
|
172
|
+
Parses a string or path into a TokensConfig object. Supports:
|
|
173
|
+
- JSON string
|
|
174
|
+
- key=value pairs
|
|
175
|
+
- file path to .yaml/.config
|
|
176
|
+
|
|
177
|
+
:param data: String or path containing configuration.
|
|
178
|
+
:return: Parsed TokensConfig instance.
|
|
179
|
+
:raises ValueError: If the format is not recognized.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
if (
|
|
183
|
+
isinstance(data, Path)
|
|
184
|
+
or data.strip().endswith(".config")
|
|
185
|
+
or data.strip().endswith(".yaml")
|
|
186
|
+
):
|
|
187
|
+
return TokensConfig.parse_config_file(data)
|
|
188
|
+
|
|
189
|
+
if data.strip().startswith("{"):
|
|
190
|
+
return TokensConfig.parse_json(data)
|
|
191
|
+
|
|
192
|
+
if data.count("=") > 1:
|
|
193
|
+
return TokensConfig.parse_key_value_pairs(data)
|
|
194
|
+
|
|
195
|
+
raise ValueError(
|
|
196
|
+
f"Unsupported data format. Expected JSON or key-value pairs, got {data}"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def parse_json(data: str) -> "TokensConfig":
|
|
201
|
+
config_dict = json.loads(data.strip())
|
|
202
|
+
|
|
203
|
+
return TokensConfig(**config_dict)
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def parse_key_value_pairs(data: str) -> "TokensConfig":
|
|
207
|
+
config_dict = {}
|
|
208
|
+
items = data.strip().split(",")
|
|
209
|
+
for item in items:
|
|
210
|
+
key, value = item.split("=")
|
|
211
|
+
config_dict[key.strip()] = (
|
|
212
|
+
int(value.strip()) if value.strip().isnumeric() else value.strip()
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return TokensConfig(**config_dict) # type: ignore[arg-type]
|
|
216
|
+
|
|
217
|
+
@staticmethod
|
|
218
|
+
def parse_config_file(data: Union[str, Path]) -> "TokensConfig":
|
|
219
|
+
with Path(data).open("r") as file:
|
|
220
|
+
config_dict = yaml.safe_load(file)
|
|
221
|
+
|
|
222
|
+
return TokensConfig(**config_dict)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _validate_output_suffix(output_path: Union[str, Path]) -> None:
|
|
226
|
+
output_path = Path(output_path)
|
|
227
|
+
suffix = output_path.suffix.lower()
|
|
228
|
+
if suffix not in SUPPORTED_TYPES:
|
|
229
|
+
raise ValueError(
|
|
230
|
+
f"Unsupported file suffix '{suffix}' in output_path '{output_path}'. "
|
|
231
|
+
f"Only {SUPPORTED_TYPES} are supported."
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def process_dataset(
|
|
236
|
+
data: Union[str, Path],
|
|
237
|
+
output_path: Union[str, Path],
|
|
238
|
+
processor: Union[str, Path, PreTrainedTokenizerBase],
|
|
239
|
+
prompt_tokens: Union[str, Path],
|
|
240
|
+
output_tokens: Union[str, Path],
|
|
241
|
+
processor_args: Optional[dict[str, Any]] = None,
|
|
242
|
+
data_args: Optional[dict[str, Any]] = None,
|
|
243
|
+
short_prompt_strategy: ShortPromptStrategy = ShortPromptStrategy.IGNORE,
|
|
244
|
+
pad_char: Optional[str] = None,
|
|
245
|
+
concat_delimiter: Optional[str] = None,
|
|
246
|
+
push_to_hub: bool = False,
|
|
247
|
+
hub_dataset_id: Optional[str] = None,
|
|
248
|
+
random_seed: int = 42,
|
|
249
|
+
) -> None:
|
|
250
|
+
"""
|
|
251
|
+
Main method to process and save a dataset with sampled prompt/output token counts.
|
|
252
|
+
|
|
253
|
+
:param data: Path or identifier for dataset input.
|
|
254
|
+
:param output_path: File path to save the processed dataset.
|
|
255
|
+
:param processor: Tokenizer object or its config.
|
|
256
|
+
:param prompt_tokens: Prompt token config string or file.
|
|
257
|
+
:param output_tokens: Output token config string or file.
|
|
258
|
+
:param processor_args: Optional processor arguments.
|
|
259
|
+
:param data_args: Optional data loading arguments.
|
|
260
|
+
:param short_prompt_strategy: Strategy for handling short prompts.
|
|
261
|
+
:param pad_char: Character used when padding short prompts.
|
|
262
|
+
:param concat_delimiter: Delimiter for concatenation strategy.
|
|
263
|
+
:param push_to_hub: Whether to push to Hugging Face Hub.
|
|
264
|
+
:param hub_dataset_id: Dataset ID on Hugging Face Hub.
|
|
265
|
+
:param random_seed: Seed for random sampling.
|
|
266
|
+
:raises ValueError: If output path is invalid or pushing conditions unmet.
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
_validate_output_suffix(output_path)
|
|
270
|
+
logger.info(
|
|
271
|
+
f"Starting dataset conversion | Input: {data} | Output directory: {output_path}"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
dataset, column_mappings = guidellm_load_dataset(
|
|
275
|
+
data, data_args, processor, processor_args
|
|
276
|
+
)
|
|
277
|
+
tokenizer = check_load_processor(
|
|
278
|
+
processor,
|
|
279
|
+
processor_args,
|
|
280
|
+
"dataset conversion.",
|
|
281
|
+
)
|
|
282
|
+
prompt_column = column_mappings.get("prompt_column")
|
|
283
|
+
output_column = column_mappings.get(
|
|
284
|
+
"output_tokens_count_column", "output_tokens_count"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
prompt_tokens_cfg = TokensConfig.parse_str(prompt_tokens)
|
|
288
|
+
output_tokens_cfg = TokensConfig.parse_str(output_tokens)
|
|
289
|
+
|
|
290
|
+
prompt_token_sampler = iter(
|
|
291
|
+
IntegerRangeSampler(
|
|
292
|
+
average=prompt_tokens_cfg.average,
|
|
293
|
+
variance=prompt_tokens_cfg.stdev,
|
|
294
|
+
min_value=prompt_tokens_cfg.min,
|
|
295
|
+
max_value=prompt_tokens_cfg.max,
|
|
296
|
+
random_seed=random_seed,
|
|
297
|
+
)
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
output_token_sampler = iter(
|
|
301
|
+
IntegerRangeSampler(
|
|
302
|
+
average=output_tokens_cfg.average,
|
|
303
|
+
variance=output_tokens_cfg.stdev,
|
|
304
|
+
min_value=output_tokens_cfg.min,
|
|
305
|
+
max_value=output_tokens_cfg.max,
|
|
306
|
+
random_seed=random_seed,
|
|
307
|
+
)
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
dataset_iterator = iter(dataset)
|
|
311
|
+
processed_prompts = []
|
|
312
|
+
prompt_handler = STRATEGY_HANDLERS[short_prompt_strategy]
|
|
313
|
+
|
|
314
|
+
for prompt_row in dataset_iterator:
|
|
315
|
+
prompt_text = prompt_row[prompt_column]
|
|
316
|
+
target_prompt_len = next(prompt_token_sampler)
|
|
317
|
+
|
|
318
|
+
prompt_text = prompt_handler(
|
|
319
|
+
current_prompt=prompt_text,
|
|
320
|
+
min_prompt_tokens=target_prompt_len,
|
|
321
|
+
dataset_iterator=dataset_iterator,
|
|
322
|
+
prompt_column=prompt_column,
|
|
323
|
+
tokenizer=tokenizer,
|
|
324
|
+
pad_char=pad_char,
|
|
325
|
+
concat_delimiter=concat_delimiter,
|
|
326
|
+
)
|
|
327
|
+
if prompt_text is None:
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
tokens = tokenizer.encode(prompt_text)
|
|
331
|
+
if len(tokens) > target_prompt_len:
|
|
332
|
+
prompt_text = tokenizer.decode(tokens[:target_prompt_len])
|
|
333
|
+
|
|
334
|
+
processed_prompt = prompt_row.copy()
|
|
335
|
+
processed_prompt[prompt_column] = prompt_text
|
|
336
|
+
processed_prompt["prompt_tokens_count"] = target_prompt_len
|
|
337
|
+
processed_prompt[output_column] = next(output_token_sampler)
|
|
338
|
+
|
|
339
|
+
processed_prompts.append(processed_prompt)
|
|
340
|
+
|
|
341
|
+
if not processed_prompts:
|
|
342
|
+
logger.error("No prompts remained after processing")
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
logger.info(f"Generated processed dataset with {len(processed_prompts)} prompts")
|
|
346
|
+
|
|
347
|
+
processed_dataset = Dataset.from_list(processed_prompts)
|
|
348
|
+
save_dataset_to_file(processed_dataset, output_path)
|
|
349
|
+
logger.info(f"Conversion completed. Dataset saved to: {output_path}")
|
|
350
|
+
|
|
351
|
+
if push_to_hub:
|
|
352
|
+
push_dataset_to_hub(hub_dataset_id, processed_dataset)
|
|
353
|
+
logger.info(f"Pushed dataset to: {hub_dataset_id}")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def push_dataset_to_hub(
|
|
357
|
+
hub_dataset_id: Optional[str],
|
|
358
|
+
processed_dataset: Dataset,
|
|
359
|
+
) -> None:
|
|
360
|
+
"""
|
|
361
|
+
Pushes the processed dataset to Hugging Face Hub using HF_TOKEN.
|
|
362
|
+
|
|
363
|
+
:param hub_dataset_id: Identifier on the Hub to push to.
|
|
364
|
+
:param processed_dataset: HuggingFace Dataset object.
|
|
365
|
+
:raises ValueError: If hub_dataset_id or HF_TOKEN is not available.
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
hf_token = os.environ.get("HF_TOKEN")
|
|
369
|
+
if not hub_dataset_id or not hf_token:
|
|
370
|
+
raise ValueError(
|
|
371
|
+
"hub_dataset_id and HF_TOKEN env var must be provided when push_to_hub"
|
|
372
|
+
" is True"
|
|
373
|
+
)
|
|
374
|
+
processed_dataset.push_to_hub(hub_dataset_id, token=hf_token)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from .builder import UIDataBuilder
|
|
2
|
+
from .data_models import (
|
|
3
|
+
BenchmarkDatum,
|
|
4
|
+
Bucket,
|
|
5
|
+
Dataset,
|
|
6
|
+
Distribution,
|
|
7
|
+
Model,
|
|
8
|
+
RunInfo,
|
|
9
|
+
Server,
|
|
10
|
+
TokenDetails,
|
|
11
|
+
WorkloadDetails,
|
|
12
|
+
)
|
|
13
|
+
from .injector import create_report, inject_data
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"BenchmarkDatum",
|
|
17
|
+
"Bucket",
|
|
18
|
+
"Dataset",
|
|
19
|
+
"Distribution",
|
|
20
|
+
"Model",
|
|
21
|
+
"RunInfo",
|
|
22
|
+
"Server",
|
|
23
|
+
"TokenDetails",
|
|
24
|
+
"UIDataBuilder",
|
|
25
|
+
"WorkloadDetails",
|
|
26
|
+
"create_report",
|
|
27
|
+
"inject_data",
|
|
28
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Any
|
|
2
|
+
|
|
3
|
+
if TYPE_CHECKING:
|
|
4
|
+
from guidellm.benchmark.benchmark import GenerativeBenchmark
|
|
5
|
+
|
|
6
|
+
from .data_models import BenchmarkDatum, RunInfo, WorkloadDetails
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UIDataBuilder:
|
|
10
|
+
def __init__(self, benchmarks: list["GenerativeBenchmark"]):
|
|
11
|
+
self.benchmarks = benchmarks
|
|
12
|
+
|
|
13
|
+
def build_run_info(self):
|
|
14
|
+
return RunInfo.from_benchmarks(self.benchmarks)
|
|
15
|
+
|
|
16
|
+
def build_workload_details(self):
|
|
17
|
+
return WorkloadDetails.from_benchmarks(self.benchmarks)
|
|
18
|
+
|
|
19
|
+
def build_benchmarks(self):
|
|
20
|
+
return [BenchmarkDatum.from_benchmark(b) for b in self.benchmarks]
|
|
21
|
+
|
|
22
|
+
def to_dict(self) -> dict[str, Any]:
|
|
23
|
+
return {
|
|
24
|
+
"run_info": self.build_run_info().model_dump(),
|
|
25
|
+
"workload_details": self.build_workload_details().model_dump(),
|
|
26
|
+
"benchmarks": [b.model_dump() for b in self.build_benchmarks()],
|
|
27
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from math import ceil
|
|
4
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, computed_field
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from guidellm.benchmark.benchmark import GenerativeBenchmark
|
|
10
|
+
|
|
11
|
+
from guidellm.objects.statistics import DistributionSummary
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Bucket(BaseModel):
|
|
15
|
+
value: Union[float, int]
|
|
16
|
+
count: int
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def from_data(
|
|
20
|
+
data: Union[list[float], list[int]],
|
|
21
|
+
bucket_width: Optional[float] = None,
|
|
22
|
+
n_buckets: Optional[int] = None,
|
|
23
|
+
) -> tuple[list["Bucket"], float]:
|
|
24
|
+
if not data:
|
|
25
|
+
return [], 1.0
|
|
26
|
+
|
|
27
|
+
min_v = min(data)
|
|
28
|
+
max_v = max(data)
|
|
29
|
+
range_v = (1 + max_v) - min_v
|
|
30
|
+
|
|
31
|
+
if bucket_width is None:
|
|
32
|
+
if n_buckets is None:
|
|
33
|
+
n_buckets = 10
|
|
34
|
+
bucket_width = range_v / n_buckets
|
|
35
|
+
else:
|
|
36
|
+
n_buckets = ceil(range_v / bucket_width)
|
|
37
|
+
|
|
38
|
+
bucket_counts: defaultdict[Union[float, int], int] = defaultdict(int)
|
|
39
|
+
for val in data:
|
|
40
|
+
idx = int((val - min_v) // bucket_width)
|
|
41
|
+
if idx >= n_buckets:
|
|
42
|
+
idx = n_buckets - 1
|
|
43
|
+
bucket_start = min_v + idx * bucket_width
|
|
44
|
+
bucket_counts[bucket_start] += 1
|
|
45
|
+
|
|
46
|
+
buckets = [
|
|
47
|
+
Bucket(value=start, count=count)
|
|
48
|
+
for start, count in sorted(bucket_counts.items())
|
|
49
|
+
]
|
|
50
|
+
return buckets, bucket_width
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Model(BaseModel):
|
|
54
|
+
name: str
|
|
55
|
+
size: int
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Dataset(BaseModel):
|
|
59
|
+
name: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class RunInfo(BaseModel):
|
|
63
|
+
model: Model
|
|
64
|
+
task: str
|
|
65
|
+
timestamp: float
|
|
66
|
+
dataset: Dataset
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
|
|
70
|
+
model = benchmarks[0].worker.backend_model or "N/A"
|
|
71
|
+
timestamp = max(
|
|
72
|
+
bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
|
|
73
|
+
)
|
|
74
|
+
return cls(
|
|
75
|
+
model=Model(name=model, size=0),
|
|
76
|
+
task="N/A",
|
|
77
|
+
timestamp=timestamp,
|
|
78
|
+
dataset=Dataset(name="N/A"),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class Distribution(BaseModel):
|
|
83
|
+
statistics: Optional[DistributionSummary] = None
|
|
84
|
+
buckets: list[Bucket]
|
|
85
|
+
bucket_width: float
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TokenDetails(BaseModel):
|
|
89
|
+
samples: list[str]
|
|
90
|
+
token_distributions: Distribution
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class Server(BaseModel):
|
|
94
|
+
target: str
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class RequestOverTime(BaseModel):
|
|
98
|
+
num_benchmarks: int
|
|
99
|
+
requests_over_time: Distribution
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class WorkloadDetails(BaseModel):
|
|
103
|
+
prompts: TokenDetails
|
|
104
|
+
generations: TokenDetails
|
|
105
|
+
requests_over_time: RequestOverTime
|
|
106
|
+
rate_type: str
|
|
107
|
+
server: Server
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
|
|
111
|
+
target = benchmarks[0].worker.backend_target
|
|
112
|
+
rate_type = benchmarks[0].args.profile.type_
|
|
113
|
+
successful_requests = [
|
|
114
|
+
req for bm in benchmarks for req in bm.requests.successful
|
|
115
|
+
]
|
|
116
|
+
sample_indices = random.sample(
|
|
117
|
+
range(len(successful_requests)), min(5, len(successful_requests))
|
|
118
|
+
)
|
|
119
|
+
sample_prompts = [
|
|
120
|
+
successful_requests[i].prompt.replace("\n", " ").replace('"', "'")
|
|
121
|
+
for i in sample_indices
|
|
122
|
+
]
|
|
123
|
+
sample_outputs = [
|
|
124
|
+
successful_requests[i].output.replace("\n", " ").replace('"', "'")
|
|
125
|
+
for i in sample_indices
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
prompt_tokens = [
|
|
129
|
+
float(req.prompt_tokens)
|
|
130
|
+
for bm in benchmarks
|
|
131
|
+
for req in bm.requests.successful
|
|
132
|
+
]
|
|
133
|
+
output_tokens = [
|
|
134
|
+
float(req.output_tokens)
|
|
135
|
+
for bm in benchmarks
|
|
136
|
+
for req in bm.requests.successful
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
prompt_token_buckets, _prompt_token_bucket_width = Bucket.from_data(
|
|
140
|
+
prompt_tokens, 1
|
|
141
|
+
)
|
|
142
|
+
output_token_buckets, _output_token_bucket_width = Bucket.from_data(
|
|
143
|
+
output_tokens, 1
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
prompt_token_stats = DistributionSummary.from_values(prompt_tokens)
|
|
147
|
+
output_token_stats = DistributionSummary.from_values(output_tokens)
|
|
148
|
+
prompt_token_distributions = Distribution(
|
|
149
|
+
statistics=prompt_token_stats, buckets=prompt_token_buckets, bucket_width=1
|
|
150
|
+
)
|
|
151
|
+
output_token_distributions = Distribution(
|
|
152
|
+
statistics=output_token_stats, buckets=output_token_buckets, bucket_width=1
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
min_start_time = benchmarks[0].run_stats.start_time
|
|
156
|
+
|
|
157
|
+
all_req_times = [
|
|
158
|
+
req.start_time - min_start_time
|
|
159
|
+
for bm in benchmarks
|
|
160
|
+
for req in bm.requests.successful
|
|
161
|
+
if req.start_time is not None
|
|
162
|
+
]
|
|
163
|
+
number_of_buckets = len(benchmarks)
|
|
164
|
+
request_over_time_buckets, bucket_width = Bucket.from_data(
|
|
165
|
+
all_req_times, None, number_of_buckets
|
|
166
|
+
)
|
|
167
|
+
request_over_time_distribution = Distribution(
|
|
168
|
+
buckets=request_over_time_buckets, bucket_width=bucket_width
|
|
169
|
+
)
|
|
170
|
+
return cls(
|
|
171
|
+
prompts=TokenDetails(
|
|
172
|
+
samples=sample_prompts, token_distributions=prompt_token_distributions
|
|
173
|
+
),
|
|
174
|
+
generations=TokenDetails(
|
|
175
|
+
samples=sample_outputs, token_distributions=output_token_distributions
|
|
176
|
+
),
|
|
177
|
+
requests_over_time=RequestOverTime(
|
|
178
|
+
requests_over_time=request_over_time_distribution,
|
|
179
|
+
num_benchmarks=number_of_buckets,
|
|
180
|
+
),
|
|
181
|
+
rate_type=rate_type,
|
|
182
|
+
server=Server(target=target),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class TabularDistributionSummary(DistributionSummary):
|
|
187
|
+
"""
|
|
188
|
+
Same fields as `DistributionSummary`, but adds a ready-to-serialize/iterate
|
|
189
|
+
`percentile_rows` helper.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
@computed_field
|
|
193
|
+
def percentile_rows(self) -> list[dict[str, float]]:
|
|
194
|
+
rows = [
|
|
195
|
+
{"percentile": name, "value": value}
|
|
196
|
+
for name, value in self.percentiles.model_dump().items()
|
|
197
|
+
]
|
|
198
|
+
return list(
|
|
199
|
+
filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
def from_distribution_summary(
|
|
204
|
+
cls, distribution: DistributionSummary
|
|
205
|
+
) -> "TabularDistributionSummary":
|
|
206
|
+
return cls(**distribution.model_dump())
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class BenchmarkDatum(BaseModel):
|
|
210
|
+
requests_per_second: float
|
|
211
|
+
tpot: TabularDistributionSummary
|
|
212
|
+
ttft: TabularDistributionSummary
|
|
213
|
+
throughput: TabularDistributionSummary
|
|
214
|
+
time_per_request: TabularDistributionSummary
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def from_benchmark(cls, bm: "GenerativeBenchmark"):
|
|
218
|
+
return cls(
|
|
219
|
+
requests_per_second=bm.metrics.requests_per_second.successful.mean,
|
|
220
|
+
tpot=TabularDistributionSummary.from_distribution_summary(
|
|
221
|
+
bm.metrics.inter_token_latency_ms.successful
|
|
222
|
+
),
|
|
223
|
+
ttft=TabularDistributionSummary.from_distribution_summary(
|
|
224
|
+
bm.metrics.time_to_first_token_ms.successful
|
|
225
|
+
),
|
|
226
|
+
throughput=TabularDistributionSummary.from_distribution_summary(
|
|
227
|
+
bm.metrics.output_tokens_per_second.successful
|
|
228
|
+
),
|
|
229
|
+
time_per_request=TabularDistributionSummary.from_distribution_summary(
|
|
230
|
+
bm.metrics.request_latency.successful
|
|
231
|
+
),
|
|
232
|
+
)
|