guidellm 0.3.0rc20250507__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show
  1. guidellm/__init__.py +8 -13
  2. guidellm/__main__.py +290 -69
  3. guidellm/backend/__init__.py +6 -6
  4. guidellm/backend/backend.py +25 -4
  5. guidellm/backend/openai.py +153 -30
  6. guidellm/backend/response.py +6 -2
  7. guidellm/benchmark/__init__.py +16 -22
  8. guidellm/benchmark/aggregator.py +3 -3
  9. guidellm/benchmark/benchmark.py +11 -12
  10. guidellm/benchmark/benchmarker.py +2 -2
  11. guidellm/benchmark/entrypoints.py +34 -10
  12. guidellm/benchmark/output.py +59 -8
  13. guidellm/benchmark/profile.py +4 -4
  14. guidellm/benchmark/progress.py +2 -2
  15. guidellm/benchmark/scenario.py +104 -0
  16. guidellm/benchmark/scenarios/__init__.py +0 -0
  17. guidellm/config.py +32 -7
  18. guidellm/dataset/__init__.py +4 -4
  19. guidellm/dataset/creator.py +1 -1
  20. guidellm/dataset/synthetic.py +36 -11
  21. guidellm/logger.py +8 -4
  22. guidellm/objects/__init__.py +2 -2
  23. guidellm/objects/pydantic.py +30 -1
  24. guidellm/objects/statistics.py +20 -14
  25. guidellm/preprocess/__init__.py +3 -0
  26. guidellm/preprocess/dataset.py +374 -0
  27. guidellm/presentation/__init__.py +28 -0
  28. guidellm/presentation/builder.py +27 -0
  29. guidellm/presentation/data_models.py +232 -0
  30. guidellm/presentation/injector.py +66 -0
  31. guidellm/request/__init__.py +6 -3
  32. guidellm/request/loader.py +5 -5
  33. guidellm/{scheduler → request}/types.py +4 -1
  34. guidellm/scheduler/__init__.py +10 -15
  35. guidellm/scheduler/queues.py +25 -0
  36. guidellm/scheduler/result.py +21 -3
  37. guidellm/scheduler/scheduler.py +68 -60
  38. guidellm/scheduler/strategy.py +26 -24
  39. guidellm/scheduler/worker.py +64 -103
  40. guidellm/utils/__init__.py +17 -5
  41. guidellm/utils/cli.py +62 -0
  42. guidellm/utils/default_group.py +105 -0
  43. guidellm/utils/dict.py +23 -0
  44. guidellm/utils/hf_datasets.py +36 -0
  45. guidellm/utils/random.py +1 -1
  46. guidellm/utils/text.py +12 -5
  47. guidellm/version.py +6 -0
  48. guidellm-0.3.1.dist-info/METADATA +329 -0
  49. guidellm-0.3.1.dist-info/RECORD +62 -0
  50. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
  51. guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
  52. guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
  53. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
  54. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
  55. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,374 @@
1
+ import json
2
+ import os
3
+ from collections.abc import Iterator
4
+ from enum import Enum
5
+ from pathlib import Path
6
+ from typing import Any, Callable, Optional, Union
7
+
8
+ import yaml
9
+ from datasets import Dataset
10
+ from loguru import logger
11
+ from pydantic import BaseModel, Field
12
+ from transformers import PreTrainedTokenizerBase
13
+
14
+ from guidellm.dataset import load_dataset as guidellm_load_dataset
15
+ from guidellm.utils import IntegerRangeSampler, check_load_processor
16
+ from guidellm.utils.hf_datasets import SUPPORTED_TYPES, save_dataset_to_file
17
+
18
+
19
+ class PromptTooShortError(Exception):
20
+ pass
21
+
22
+
23
+ class ShortPromptStrategy(str, Enum):
24
+ IGNORE = "ignore"
25
+ CONCATENATE = "concatenate"
26
+ PAD = "pad"
27
+ ERROR = "error"
28
+
29
+
30
+ def handle_ignore_strategy(
31
+ current_prompt: str,
32
+ min_prompt_tokens: int,
33
+ tokenizer: PreTrainedTokenizerBase,
34
+ **_kwargs,
35
+ ) -> Optional[str]:
36
+ """
37
+ Ignores prompts that are shorter than the required minimum token length.
38
+
39
+ :param current_prompt: The input prompt string.
40
+ :param min_prompt_tokens: Minimum required token count.
41
+ :param tokenizer: Tokenizer used to count tokens.
42
+ :return: The prompt if it meets the length, otherwise None.
43
+ """
44
+
45
+ if len(tokenizer.encode(current_prompt)) < min_prompt_tokens:
46
+ logger.warning("Prompt too short, ignoring")
47
+ return None
48
+ return current_prompt
49
+
50
+
51
+ def handle_concatenate_strategy(
52
+ current_prompt: str,
53
+ min_prompt_tokens: int,
54
+ dataset_iterator: Iterator[dict[str, Any]],
55
+ prompt_column: str,
56
+ tokenizer: PreTrainedTokenizerBase,
57
+ concat_delimiter: str,
58
+ **_kwargs,
59
+ ) -> Optional[str]:
60
+ """
61
+ Concatenates prompts until the minimum token requirement is met.
62
+
63
+ :param current_prompt: The initial prompt.
64
+ :param min_prompt_tokens: Target minimum token length.
65
+ :param dataset_iterator: Iterator to fetch more prompts.
66
+ :param prompt_column: Column key for prompt extraction.
67
+ :param tokenizer: Tokenizer used to count tokens.
68
+ :param concat_delimiter: Delimiter to use between prompts.
69
+ :return: Concatenated prompt or None if not enough data.
70
+ """
71
+
72
+ tokens_len = len(tokenizer.encode(current_prompt))
73
+ while tokens_len < min_prompt_tokens:
74
+ try:
75
+ next_row = next(dataset_iterator)
76
+ except StopIteration:
77
+ logger.warning(
78
+ "Could not concatenate enough prompts to reach minimum length, ignoring"
79
+ )
80
+ return None
81
+ current_prompt += concat_delimiter + next_row[prompt_column]
82
+ tokens_len = len(tokenizer.encode(current_prompt))
83
+ return current_prompt
84
+
85
+
86
+ def handle_pad_strategy(
87
+ current_prompt: str,
88
+ min_prompt_tokens: int,
89
+ tokenizer: PreTrainedTokenizerBase,
90
+ pad_char: str,
91
+ pad_multiplier: int = 2,
92
+ **_kwargs,
93
+ ) -> str:
94
+ """
95
+ Pads the prompt with a character until it reaches the minimum token length.
96
+
97
+ :param current_prompt: The input prompt.
98
+ :param min_prompt_tokens: Desired minimum token count.
99
+ :param tokenizer: Tokenizer used to count tokens.
100
+ :param pad_char: Character used for padding.
101
+ :param pad_multiplier: Multiplier for padding character length.
102
+ :return: Padded prompt string.
103
+ """
104
+
105
+ tokens = tokenizer.encode(current_prompt)
106
+ pad_count = 1
107
+ prompt = current_prompt
108
+ while len(tokens) < min_prompt_tokens:
109
+ prompt += pad_char * pad_count
110
+ tokens = tokenizer.encode(prompt)
111
+ pad_count *= pad_multiplier
112
+ return prompt
113
+
114
+
115
+ def handle_error_strategy(
116
+ current_prompt: str,
117
+ min_prompt_tokens: int,
118
+ tokenizer: PreTrainedTokenizerBase,
119
+ **_kwargs,
120
+ ) -> Optional[str]:
121
+ """
122
+ Raises an error if the prompt is too short.
123
+
124
+ :param current_prompt: The input prompt.
125
+ :param min_prompt_tokens: Required token count.
126
+ :param tokenizer: Tokenizer used to count tokens.
127
+ :return: The input prompt if valid.
128
+ :raises PromptTooShortError: If the prompt is too short.
129
+ """
130
+
131
+ prompt_len = len(tokenizer.encode(current_prompt))
132
+ if prompt_len < min_prompt_tokens:
133
+ raise PromptTooShortError(
134
+ f"Found too short prompt: {current_prompt}, with length: {prompt_len}. "
135
+ f"Minimum length required: {min_prompt_tokens}.",
136
+ )
137
+ return current_prompt
138
+
139
+
140
+ STRATEGY_HANDLERS: dict[ShortPromptStrategy, Callable] = {
141
+ ShortPromptStrategy.IGNORE: handle_ignore_strategy,
142
+ ShortPromptStrategy.CONCATENATE: handle_concatenate_strategy,
143
+ ShortPromptStrategy.PAD: handle_pad_strategy,
144
+ ShortPromptStrategy.ERROR: handle_error_strategy,
145
+ }
146
+
147
+
148
+ class TokensConfig(BaseModel):
149
+ average: int = Field(
150
+ description="The average number of tokens.",
151
+ gt=0,
152
+ )
153
+ stdev: Optional[int] = Field(
154
+ description="The standard deviation of the tokens.",
155
+ gt=0,
156
+ default=None,
157
+ )
158
+ min: Optional[int] = Field(
159
+ description="The minimum number of tokens.",
160
+ gt=0,
161
+ default=None,
162
+ )
163
+ max: Optional[int] = Field(
164
+ description="The maximum number of tokens.",
165
+ gt=0,
166
+ default=None,
167
+ )
168
+
169
+ @staticmethod
170
+ def parse_str(data: Union[str, Path]) -> "TokensConfig":
171
+ """
172
+ Parses a string or path into a TokensConfig object. Supports:
173
+ - JSON string
174
+ - key=value pairs
175
+ - file path to .yaml/.config
176
+
177
+ :param data: String or path containing configuration.
178
+ :return: Parsed TokensConfig instance.
179
+ :raises ValueError: If the format is not recognized.
180
+ """
181
+
182
+ if (
183
+ isinstance(data, Path)
184
+ or data.strip().endswith(".config")
185
+ or data.strip().endswith(".yaml")
186
+ ):
187
+ return TokensConfig.parse_config_file(data)
188
+
189
+ if data.strip().startswith("{"):
190
+ return TokensConfig.parse_json(data)
191
+
192
+ if data.count("=") > 1:
193
+ return TokensConfig.parse_key_value_pairs(data)
194
+
195
+ raise ValueError(
196
+ f"Unsupported data format. Expected JSON or key-value pairs, got {data}"
197
+ )
198
+
199
+ @staticmethod
200
+ def parse_json(data: str) -> "TokensConfig":
201
+ config_dict = json.loads(data.strip())
202
+
203
+ return TokensConfig(**config_dict)
204
+
205
+ @staticmethod
206
+ def parse_key_value_pairs(data: str) -> "TokensConfig":
207
+ config_dict = {}
208
+ items = data.strip().split(",")
209
+ for item in items:
210
+ key, value = item.split("=")
211
+ config_dict[key.strip()] = (
212
+ int(value.strip()) if value.strip().isnumeric() else value.strip()
213
+ )
214
+
215
+ return TokensConfig(**config_dict) # type: ignore[arg-type]
216
+
217
+ @staticmethod
218
+ def parse_config_file(data: Union[str, Path]) -> "TokensConfig":
219
+ with Path(data).open("r") as file:
220
+ config_dict = yaml.safe_load(file)
221
+
222
+ return TokensConfig(**config_dict)
223
+
224
+
225
+ def _validate_output_suffix(output_path: Union[str, Path]) -> None:
226
+ output_path = Path(output_path)
227
+ suffix = output_path.suffix.lower()
228
+ if suffix not in SUPPORTED_TYPES:
229
+ raise ValueError(
230
+ f"Unsupported file suffix '{suffix}' in output_path '{output_path}'. "
231
+ f"Only {SUPPORTED_TYPES} are supported."
232
+ )
233
+
234
+
235
+ def process_dataset(
236
+ data: Union[str, Path],
237
+ output_path: Union[str, Path],
238
+ processor: Union[str, Path, PreTrainedTokenizerBase],
239
+ prompt_tokens: Union[str, Path],
240
+ output_tokens: Union[str, Path],
241
+ processor_args: Optional[dict[str, Any]] = None,
242
+ data_args: Optional[dict[str, Any]] = None,
243
+ short_prompt_strategy: ShortPromptStrategy = ShortPromptStrategy.IGNORE,
244
+ pad_char: Optional[str] = None,
245
+ concat_delimiter: Optional[str] = None,
246
+ push_to_hub: bool = False,
247
+ hub_dataset_id: Optional[str] = None,
248
+ random_seed: int = 42,
249
+ ) -> None:
250
+ """
251
+ Main method to process and save a dataset with sampled prompt/output token counts.
252
+
253
+ :param data: Path or identifier for dataset input.
254
+ :param output_path: File path to save the processed dataset.
255
+ :param processor: Tokenizer object or its config.
256
+ :param prompt_tokens: Prompt token config string or file.
257
+ :param output_tokens: Output token config string or file.
258
+ :param processor_args: Optional processor arguments.
259
+ :param data_args: Optional data loading arguments.
260
+ :param short_prompt_strategy: Strategy for handling short prompts.
261
+ :param pad_char: Character used when padding short prompts.
262
+ :param concat_delimiter: Delimiter for concatenation strategy.
263
+ :param push_to_hub: Whether to push to Hugging Face Hub.
264
+ :param hub_dataset_id: Dataset ID on Hugging Face Hub.
265
+ :param random_seed: Seed for random sampling.
266
+ :raises ValueError: If output path is invalid or pushing conditions unmet.
267
+ """
268
+
269
+ _validate_output_suffix(output_path)
270
+ logger.info(
271
+ f"Starting dataset conversion | Input: {data} | Output directory: {output_path}"
272
+ )
273
+
274
+ dataset, column_mappings = guidellm_load_dataset(
275
+ data, data_args, processor, processor_args
276
+ )
277
+ tokenizer = check_load_processor(
278
+ processor,
279
+ processor_args,
280
+ "dataset conversion.",
281
+ )
282
+ prompt_column = column_mappings.get("prompt_column")
283
+ output_column = column_mappings.get(
284
+ "output_tokens_count_column", "output_tokens_count"
285
+ )
286
+
287
+ prompt_tokens_cfg = TokensConfig.parse_str(prompt_tokens)
288
+ output_tokens_cfg = TokensConfig.parse_str(output_tokens)
289
+
290
+ prompt_token_sampler = iter(
291
+ IntegerRangeSampler(
292
+ average=prompt_tokens_cfg.average,
293
+ variance=prompt_tokens_cfg.stdev,
294
+ min_value=prompt_tokens_cfg.min,
295
+ max_value=prompt_tokens_cfg.max,
296
+ random_seed=random_seed,
297
+ )
298
+ )
299
+
300
+ output_token_sampler = iter(
301
+ IntegerRangeSampler(
302
+ average=output_tokens_cfg.average,
303
+ variance=output_tokens_cfg.stdev,
304
+ min_value=output_tokens_cfg.min,
305
+ max_value=output_tokens_cfg.max,
306
+ random_seed=random_seed,
307
+ )
308
+ )
309
+
310
+ dataset_iterator = iter(dataset)
311
+ processed_prompts = []
312
+ prompt_handler = STRATEGY_HANDLERS[short_prompt_strategy]
313
+
314
+ for prompt_row in dataset_iterator:
315
+ prompt_text = prompt_row[prompt_column]
316
+ target_prompt_len = next(prompt_token_sampler)
317
+
318
+ prompt_text = prompt_handler(
319
+ current_prompt=prompt_text,
320
+ min_prompt_tokens=target_prompt_len,
321
+ dataset_iterator=dataset_iterator,
322
+ prompt_column=prompt_column,
323
+ tokenizer=tokenizer,
324
+ pad_char=pad_char,
325
+ concat_delimiter=concat_delimiter,
326
+ )
327
+ if prompt_text is None:
328
+ continue
329
+
330
+ tokens = tokenizer.encode(prompt_text)
331
+ if len(tokens) > target_prompt_len:
332
+ prompt_text = tokenizer.decode(tokens[:target_prompt_len])
333
+
334
+ processed_prompt = prompt_row.copy()
335
+ processed_prompt[prompt_column] = prompt_text
336
+ processed_prompt["prompt_tokens_count"] = target_prompt_len
337
+ processed_prompt[output_column] = next(output_token_sampler)
338
+
339
+ processed_prompts.append(processed_prompt)
340
+
341
+ if not processed_prompts:
342
+ logger.error("No prompts remained after processing")
343
+ return
344
+
345
+ logger.info(f"Generated processed dataset with {len(processed_prompts)} prompts")
346
+
347
+ processed_dataset = Dataset.from_list(processed_prompts)
348
+ save_dataset_to_file(processed_dataset, output_path)
349
+ logger.info(f"Conversion completed. Dataset saved to: {output_path}")
350
+
351
+ if push_to_hub:
352
+ push_dataset_to_hub(hub_dataset_id, processed_dataset)
353
+ logger.info(f"Pushed dataset to: {hub_dataset_id}")
354
+
355
+
356
+ def push_dataset_to_hub(
357
+ hub_dataset_id: Optional[str],
358
+ processed_dataset: Dataset,
359
+ ) -> None:
360
+ """
361
+ Pushes the processed dataset to Hugging Face Hub using HF_TOKEN.
362
+
363
+ :param hub_dataset_id: Identifier on the Hub to push to.
364
+ :param processed_dataset: HuggingFace Dataset object.
365
+ :raises ValueError: If hub_dataset_id or HF_TOKEN is not available.
366
+ """
367
+
368
+ hf_token = os.environ.get("HF_TOKEN")
369
+ if not hub_dataset_id or not hf_token:
370
+ raise ValueError(
371
+ "hub_dataset_id and HF_TOKEN env var must be provided when push_to_hub"
372
+ " is True"
373
+ )
374
+ processed_dataset.push_to_hub(hub_dataset_id, token=hf_token)
@@ -0,0 +1,28 @@
1
+ from .builder import UIDataBuilder
2
+ from .data_models import (
3
+ BenchmarkDatum,
4
+ Bucket,
5
+ Dataset,
6
+ Distribution,
7
+ Model,
8
+ RunInfo,
9
+ Server,
10
+ TokenDetails,
11
+ WorkloadDetails,
12
+ )
13
+ from .injector import create_report, inject_data
14
+
15
+ __all__ = [
16
+ "BenchmarkDatum",
17
+ "Bucket",
18
+ "Dataset",
19
+ "Distribution",
20
+ "Model",
21
+ "RunInfo",
22
+ "Server",
23
+ "TokenDetails",
24
+ "UIDataBuilder",
25
+ "WorkloadDetails",
26
+ "create_report",
27
+ "inject_data",
28
+ ]
@@ -0,0 +1,27 @@
1
+ from typing import TYPE_CHECKING, Any
2
+
3
+ if TYPE_CHECKING:
4
+ from guidellm.benchmark.benchmark import GenerativeBenchmark
5
+
6
+ from .data_models import BenchmarkDatum, RunInfo, WorkloadDetails
7
+
8
+
9
+ class UIDataBuilder:
10
+ def __init__(self, benchmarks: list["GenerativeBenchmark"]):
11
+ self.benchmarks = benchmarks
12
+
13
+ def build_run_info(self):
14
+ return RunInfo.from_benchmarks(self.benchmarks)
15
+
16
+ def build_workload_details(self):
17
+ return WorkloadDetails.from_benchmarks(self.benchmarks)
18
+
19
+ def build_benchmarks(self):
20
+ return [BenchmarkDatum.from_benchmark(b) for b in self.benchmarks]
21
+
22
+ def to_dict(self) -> dict[str, Any]:
23
+ return {
24
+ "run_info": self.build_run_info().model_dump(),
25
+ "workload_details": self.build_workload_details().model_dump(),
26
+ "benchmarks": [b.model_dump() for b in self.build_benchmarks()],
27
+ }
@@ -0,0 +1,232 @@
1
+ import random
2
+ from collections import defaultdict
3
+ from math import ceil
4
+ from typing import TYPE_CHECKING, Optional, Union
5
+
6
+ from pydantic import BaseModel, computed_field
7
+
8
+ if TYPE_CHECKING:
9
+ from guidellm.benchmark.benchmark import GenerativeBenchmark
10
+
11
+ from guidellm.objects.statistics import DistributionSummary
12
+
13
+
14
+ class Bucket(BaseModel):
15
+ value: Union[float, int]
16
+ count: int
17
+
18
+ @staticmethod
19
+ def from_data(
20
+ data: Union[list[float], list[int]],
21
+ bucket_width: Optional[float] = None,
22
+ n_buckets: Optional[int] = None,
23
+ ) -> tuple[list["Bucket"], float]:
24
+ if not data:
25
+ return [], 1.0
26
+
27
+ min_v = min(data)
28
+ max_v = max(data)
29
+ range_v = (1 + max_v) - min_v
30
+
31
+ if bucket_width is None:
32
+ if n_buckets is None:
33
+ n_buckets = 10
34
+ bucket_width = range_v / n_buckets
35
+ else:
36
+ n_buckets = ceil(range_v / bucket_width)
37
+
38
+ bucket_counts: defaultdict[Union[float, int], int] = defaultdict(int)
39
+ for val in data:
40
+ idx = int((val - min_v) // bucket_width)
41
+ if idx >= n_buckets:
42
+ idx = n_buckets - 1
43
+ bucket_start = min_v + idx * bucket_width
44
+ bucket_counts[bucket_start] += 1
45
+
46
+ buckets = [
47
+ Bucket(value=start, count=count)
48
+ for start, count in sorted(bucket_counts.items())
49
+ ]
50
+ return buckets, bucket_width
51
+
52
+
53
+ class Model(BaseModel):
54
+ name: str
55
+ size: int
56
+
57
+
58
+ class Dataset(BaseModel):
59
+ name: str
60
+
61
+
62
+ class RunInfo(BaseModel):
63
+ model: Model
64
+ task: str
65
+ timestamp: float
66
+ dataset: Dataset
67
+
68
+ @classmethod
69
+ def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
70
+ model = benchmarks[0].worker.backend_model or "N/A"
71
+ timestamp = max(
72
+ bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
73
+ )
74
+ return cls(
75
+ model=Model(name=model, size=0),
76
+ task="N/A",
77
+ timestamp=timestamp,
78
+ dataset=Dataset(name="N/A"),
79
+ )
80
+
81
+
82
+ class Distribution(BaseModel):
83
+ statistics: Optional[DistributionSummary] = None
84
+ buckets: list[Bucket]
85
+ bucket_width: float
86
+
87
+
88
+ class TokenDetails(BaseModel):
89
+ samples: list[str]
90
+ token_distributions: Distribution
91
+
92
+
93
+ class Server(BaseModel):
94
+ target: str
95
+
96
+
97
+ class RequestOverTime(BaseModel):
98
+ num_benchmarks: int
99
+ requests_over_time: Distribution
100
+
101
+
102
+ class WorkloadDetails(BaseModel):
103
+ prompts: TokenDetails
104
+ generations: TokenDetails
105
+ requests_over_time: RequestOverTime
106
+ rate_type: str
107
+ server: Server
108
+
109
+ @classmethod
110
+ def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
111
+ target = benchmarks[0].worker.backend_target
112
+ rate_type = benchmarks[0].args.profile.type_
113
+ successful_requests = [
114
+ req for bm in benchmarks for req in bm.requests.successful
115
+ ]
116
+ sample_indices = random.sample(
117
+ range(len(successful_requests)), min(5, len(successful_requests))
118
+ )
119
+ sample_prompts = [
120
+ successful_requests[i].prompt.replace("\n", " ").replace('"', "'")
121
+ for i in sample_indices
122
+ ]
123
+ sample_outputs = [
124
+ successful_requests[i].output.replace("\n", " ").replace('"', "'")
125
+ for i in sample_indices
126
+ ]
127
+
128
+ prompt_tokens = [
129
+ float(req.prompt_tokens)
130
+ for bm in benchmarks
131
+ for req in bm.requests.successful
132
+ ]
133
+ output_tokens = [
134
+ float(req.output_tokens)
135
+ for bm in benchmarks
136
+ for req in bm.requests.successful
137
+ ]
138
+
139
+ prompt_token_buckets, _prompt_token_bucket_width = Bucket.from_data(
140
+ prompt_tokens, 1
141
+ )
142
+ output_token_buckets, _output_token_bucket_width = Bucket.from_data(
143
+ output_tokens, 1
144
+ )
145
+
146
+ prompt_token_stats = DistributionSummary.from_values(prompt_tokens)
147
+ output_token_stats = DistributionSummary.from_values(output_tokens)
148
+ prompt_token_distributions = Distribution(
149
+ statistics=prompt_token_stats, buckets=prompt_token_buckets, bucket_width=1
150
+ )
151
+ output_token_distributions = Distribution(
152
+ statistics=output_token_stats, buckets=output_token_buckets, bucket_width=1
153
+ )
154
+
155
+ min_start_time = benchmarks[0].run_stats.start_time
156
+
157
+ all_req_times = [
158
+ req.start_time - min_start_time
159
+ for bm in benchmarks
160
+ for req in bm.requests.successful
161
+ if req.start_time is not None
162
+ ]
163
+ number_of_buckets = len(benchmarks)
164
+ request_over_time_buckets, bucket_width = Bucket.from_data(
165
+ all_req_times, None, number_of_buckets
166
+ )
167
+ request_over_time_distribution = Distribution(
168
+ buckets=request_over_time_buckets, bucket_width=bucket_width
169
+ )
170
+ return cls(
171
+ prompts=TokenDetails(
172
+ samples=sample_prompts, token_distributions=prompt_token_distributions
173
+ ),
174
+ generations=TokenDetails(
175
+ samples=sample_outputs, token_distributions=output_token_distributions
176
+ ),
177
+ requests_over_time=RequestOverTime(
178
+ requests_over_time=request_over_time_distribution,
179
+ num_benchmarks=number_of_buckets,
180
+ ),
181
+ rate_type=rate_type,
182
+ server=Server(target=target),
183
+ )
184
+
185
+
186
+ class TabularDistributionSummary(DistributionSummary):
187
+ """
188
+ Same fields as `DistributionSummary`, but adds a ready-to-serialize/iterate
189
+ `percentile_rows` helper.
190
+ """
191
+
192
+ @computed_field
193
+ def percentile_rows(self) -> list[dict[str, Union[str, float]]]:
194
+ rows = [
195
+ {"percentile": name, "value": value}
196
+ for name, value in self.percentiles.model_dump().items()
197
+ ]
198
+ return list(
199
+ filter(lambda row: row["percentile"] in ["p50", "p90", "p95", "p99"], rows)
200
+ )
201
+
202
+ @classmethod
203
+ def from_distribution_summary(
204
+ cls, distribution: DistributionSummary
205
+ ) -> "TabularDistributionSummary":
206
+ return cls(**distribution.model_dump())
207
+
208
+
209
+ class BenchmarkDatum(BaseModel):
210
+ requests_per_second: float
211
+ itl: TabularDistributionSummary
212
+ ttft: TabularDistributionSummary
213
+ throughput: TabularDistributionSummary
214
+ time_per_request: TabularDistributionSummary
215
+
216
+ @classmethod
217
+ def from_benchmark(cls, bm: "GenerativeBenchmark"):
218
+ return cls(
219
+ requests_per_second=bm.metrics.requests_per_second.successful.mean,
220
+ itl=TabularDistributionSummary.from_distribution_summary(
221
+ bm.metrics.inter_token_latency_ms.successful
222
+ ),
223
+ ttft=TabularDistributionSummary.from_distribution_summary(
224
+ bm.metrics.time_to_first_token_ms.successful
225
+ ),
226
+ throughput=TabularDistributionSummary.from_distribution_summary(
227
+ bm.metrics.output_tokens_per_second.successful
228
+ ),
229
+ time_per_request=TabularDistributionSummary.from_distribution_summary(
230
+ bm.metrics.request_latency.successful
231
+ ),
232
+ )