guidellm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +19 -0
- guidellm/backend/__init__.py +10 -0
- guidellm/backend/base.py +320 -0
- guidellm/backend/openai.py +168 -0
- guidellm/config.py +234 -0
- guidellm/core/__init__.py +24 -0
- guidellm/core/distribution.py +190 -0
- guidellm/core/report.py +321 -0
- guidellm/core/request.py +44 -0
- guidellm/core/result.py +545 -0
- guidellm/core/serializable.py +169 -0
- guidellm/executor/__init__.py +10 -0
- guidellm/executor/base.py +213 -0
- guidellm/executor/profile_generator.py +343 -0
- guidellm/logger.py +83 -0
- guidellm/main.py +336 -0
- guidellm/request/__init__.py +13 -0
- guidellm/request/base.py +194 -0
- guidellm/request/emulated.py +391 -0
- guidellm/request/file.py +76 -0
- guidellm/request/transformers.py +100 -0
- guidellm/scheduler/__init__.py +4 -0
- guidellm/scheduler/base.py +374 -0
- guidellm/scheduler/load_generator.py +196 -0
- guidellm/utils/__init__.py +40 -0
- guidellm/utils/injector.py +70 -0
- guidellm/utils/progress.py +196 -0
- guidellm/utils/text.py +455 -0
- guidellm/utils/transformers.py +151 -0
- guidellm-0.1.0.dist-info/LICENSE +201 -0
- guidellm-0.1.0.dist-info/METADATA +434 -0
- guidellm-0.1.0.dist-info/RECORD +35 -0
- guidellm-0.1.0.dist-info/WHEEL +5 -0
- guidellm-0.1.0.dist-info/entry_points.txt +3 -0
- guidellm-0.1.0.dist-info/top_level.txt +1 -0
guidellm/core/report.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from rich.console import Console, Group
|
|
8
|
+
from rich.live import Live
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
|
|
12
|
+
from guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport
|
|
13
|
+
from guidellm.core.serializable import Serializable
|
|
14
|
+
|
|
15
|
+
__all__ = ["GuidanceReport"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _create_benchmark_report_details(report: TextGenerationBenchmarkReport) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Create a detailed string representation of a benchmark report.
|
|
21
|
+
|
|
22
|
+
:param report: The benchmark report to generate details for.
|
|
23
|
+
:type report: TextGenerationBenchmarkReport
|
|
24
|
+
:return: A string containing the backend, data, rate, and limits of
|
|
25
|
+
the benchmark report.
|
|
26
|
+
:rtype: str
|
|
27
|
+
"""
|
|
28
|
+
backend = (
|
|
29
|
+
f"Backend(type={report.args.get('backend_type', 'N/A')}, "
|
|
30
|
+
f"target={report.args.get('target', 'N/A')}, "
|
|
31
|
+
f"model={report.args.get('model', 'N/A')})"
|
|
32
|
+
)
|
|
33
|
+
data = (
|
|
34
|
+
f"Data(type={report.args.get('data_type', 'N/A')}, "
|
|
35
|
+
f"source={report.args.get('data', 'N/A')}, "
|
|
36
|
+
f"tokenizer={report.args.get('tokenizer', 'N/A')})"
|
|
37
|
+
)
|
|
38
|
+
rate = (
|
|
39
|
+
f"Rate(type={report.args.get('mode', 'N/A')}, "
|
|
40
|
+
f"rate={report.args.get('rate', 'N/A')})"
|
|
41
|
+
)
|
|
42
|
+
limits = (
|
|
43
|
+
f"Limits(max_number={report.args.get('max_number', 'N/A')} requests, "
|
|
44
|
+
f"max_duration={report.args.get('max_duration', 'N/A')} sec)"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
logger.debug(
|
|
48
|
+
"Created benchmark report details for backend={}, data={}, rate={}, limits={}",
|
|
49
|
+
backend,
|
|
50
|
+
data,
|
|
51
|
+
rate,
|
|
52
|
+
limits,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return backend + "\n" + data + "\n" + rate + "\n" + limits + "\n"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _benchmark_rate_id(benchmark: TextGenerationBenchmark) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Generate a string identifier for a benchmark rate.
|
|
61
|
+
|
|
62
|
+
:param benchmark: The benchmark for which to generate the rate ID.
|
|
63
|
+
:type benchmark: TextGenerationBenchmark
|
|
64
|
+
:return: A string representing the benchmark rate ID.
|
|
65
|
+
:rtype: str
|
|
66
|
+
"""
|
|
67
|
+
rate_id = (
|
|
68
|
+
f"{benchmark.mode}@{benchmark.rate:.2f} req/sec"
|
|
69
|
+
if benchmark.rate
|
|
70
|
+
else f"{benchmark.mode}"
|
|
71
|
+
)
|
|
72
|
+
logger.debug("Generated benchmark rate ID: {}", rate_id)
|
|
73
|
+
return rate_id
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _create_benchmark_report_requests_summary(
|
|
77
|
+
report: TextGenerationBenchmarkReport,
|
|
78
|
+
) -> Table:
|
|
79
|
+
"""
|
|
80
|
+
Create a table summarizing the requests of a benchmark report.
|
|
81
|
+
|
|
82
|
+
:param report: The benchmark report to summarize.
|
|
83
|
+
:type report: TextGenerationBenchmarkReport
|
|
84
|
+
:return: A rich Table object summarizing the requests.
|
|
85
|
+
:rtype: Table
|
|
86
|
+
"""
|
|
87
|
+
table = Table(
|
|
88
|
+
"Benchmark",
|
|
89
|
+
"Requests Completed",
|
|
90
|
+
"Request Failed",
|
|
91
|
+
"Duration",
|
|
92
|
+
"Start Time",
|
|
93
|
+
"End Time",
|
|
94
|
+
title="[magenta]Requests Data by Benchmark[/magenta]",
|
|
95
|
+
title_style="bold",
|
|
96
|
+
title_justify="left",
|
|
97
|
+
show_header=True,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
for benchmark in report.benchmarks_sorted:
|
|
101
|
+
start_time_str = (
|
|
102
|
+
datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S")
|
|
103
|
+
if benchmark.start_time
|
|
104
|
+
else "N/A"
|
|
105
|
+
)
|
|
106
|
+
end_time_str = (
|
|
107
|
+
datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S")
|
|
108
|
+
if benchmark.end_time
|
|
109
|
+
else "N/A"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
table.add_row(
|
|
113
|
+
_benchmark_rate_id(benchmark),
|
|
114
|
+
f"{benchmark.request_count}/{benchmark.total_count}",
|
|
115
|
+
f"{benchmark.error_count}/{benchmark.total_count}",
|
|
116
|
+
f"{benchmark.duration:.2f} sec",
|
|
117
|
+
f"{start_time_str}",
|
|
118
|
+
f"{end_time_str}",
|
|
119
|
+
)
|
|
120
|
+
logger.debug("Created requests summary table for the report.")
|
|
121
|
+
return table
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _create_benchmark_report_data_tokens_summary(
|
|
125
|
+
report: TextGenerationBenchmarkReport,
|
|
126
|
+
) -> Table:
|
|
127
|
+
"""
|
|
128
|
+
Create a table summarizing data tokens of a benchmark report.
|
|
129
|
+
|
|
130
|
+
:param report: The benchmark report to summarize.
|
|
131
|
+
:type report: TextGenerationBenchmarkReport
|
|
132
|
+
:return: A rich Table object summarizing the data tokens.
|
|
133
|
+
:rtype: Table
|
|
134
|
+
"""
|
|
135
|
+
table = Table(
|
|
136
|
+
"Benchmark",
|
|
137
|
+
"Prompt",
|
|
138
|
+
"Prompt (1%, 5%, 50%, 95%, 99%)",
|
|
139
|
+
"Output",
|
|
140
|
+
"Output (1%, 5%, 50%, 95%, 99%)",
|
|
141
|
+
title="[magenta]Tokens Data by Benchmark[/magenta]",
|
|
142
|
+
title_style="bold",
|
|
143
|
+
title_justify="left",
|
|
144
|
+
show_header=True,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
for benchmark in report.benchmarks_sorted:
|
|
148
|
+
table.add_row(
|
|
149
|
+
_benchmark_rate_id(benchmark),
|
|
150
|
+
f"{benchmark.prompt_token_distribution.mean:.2f}",
|
|
151
|
+
", ".join(
|
|
152
|
+
f"{percentile:.1f}"
|
|
153
|
+
for percentile in benchmark.prompt_token_distribution.percentiles(
|
|
154
|
+
[1, 5, 50, 95, 99]
|
|
155
|
+
)
|
|
156
|
+
),
|
|
157
|
+
f"{benchmark.output_token_distribution.mean:.2f}",
|
|
158
|
+
", ".join(
|
|
159
|
+
f"{percentile:.1f}"
|
|
160
|
+
for percentile in benchmark.output_token_distribution.percentiles(
|
|
161
|
+
[1, 5, 50, 95, 99]
|
|
162
|
+
)
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
logger.debug("Created data tokens summary table for the report.")
|
|
166
|
+
return table
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _create_benchmark_report_dist_perf_summary(
|
|
170
|
+
report: TextGenerationBenchmarkReport,
|
|
171
|
+
) -> Table:
|
|
172
|
+
"""
|
|
173
|
+
Create a table summarizing distribution performance of a benchmark report.
|
|
174
|
+
|
|
175
|
+
:param report: The benchmark report to summarize.
|
|
176
|
+
:type report: TextGenerationBenchmarkReport
|
|
177
|
+
:return: A rich Table object summarizing the performance statistics.
|
|
178
|
+
:rtype: Table
|
|
179
|
+
"""
|
|
180
|
+
table = Table(
|
|
181
|
+
"Benchmark",
|
|
182
|
+
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
|
|
183
|
+
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
|
|
184
|
+
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
|
|
185
|
+
title="[magenta]Performance Stats by Benchmark[/magenta]",
|
|
186
|
+
title_style="bold",
|
|
187
|
+
title_justify="left",
|
|
188
|
+
show_header=True,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
for benchmark in report.benchmarks_sorted:
|
|
192
|
+
table.add_row(
|
|
193
|
+
_benchmark_rate_id(benchmark),
|
|
194
|
+
", ".join(
|
|
195
|
+
f"{percentile:.2f}"
|
|
196
|
+
for percentile in benchmark.request_latency_distribution.percentiles(
|
|
197
|
+
[1, 5, 10, 50, 90, 95, 99]
|
|
198
|
+
)
|
|
199
|
+
),
|
|
200
|
+
", ".join(
|
|
201
|
+
f"{percentile * 1000:.1f}"
|
|
202
|
+
for percentile in benchmark.ttft_distribution.percentiles(
|
|
203
|
+
[1, 5, 10, 50, 90, 95, 99]
|
|
204
|
+
)
|
|
205
|
+
),
|
|
206
|
+
", ".join(
|
|
207
|
+
f"{percentile * 1000:.1f}"
|
|
208
|
+
for percentile in benchmark.itl_distribution.percentiles(
|
|
209
|
+
[1, 5, 10, 50, 90, 95, 99]
|
|
210
|
+
)
|
|
211
|
+
),
|
|
212
|
+
)
|
|
213
|
+
logger.debug("Created distribution performance summary table for the report.")
|
|
214
|
+
return table
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _create_benchmark_report_summary(report: TextGenerationBenchmarkReport) -> Table:
|
|
218
|
+
"""
|
|
219
|
+
Create a summary table for a benchmark report.
|
|
220
|
+
|
|
221
|
+
:param report: The benchmark report to summarize.
|
|
222
|
+
:type report: TextGenerationBenchmarkReport
|
|
223
|
+
:return: A rich Table object summarizing overall performance.
|
|
224
|
+
:rtype: Table
|
|
225
|
+
"""
|
|
226
|
+
table = Table(
|
|
227
|
+
"Benchmark",
|
|
228
|
+
"Requests per Second",
|
|
229
|
+
"Request Latency",
|
|
230
|
+
"Time to First Token",
|
|
231
|
+
"Inter Token Latency",
|
|
232
|
+
"Output Token Throughput",
|
|
233
|
+
title="[magenta]Performance Summary by Benchmark[/magenta]",
|
|
234
|
+
title_style="bold",
|
|
235
|
+
title_justify="left",
|
|
236
|
+
show_header=True,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
for benchmark in report.benchmarks_sorted:
|
|
240
|
+
table.add_row(
|
|
241
|
+
_benchmark_rate_id(benchmark),
|
|
242
|
+
f"{benchmark.completed_request_rate:.2f} req/sec",
|
|
243
|
+
f"{benchmark.request_latency:.2f} sec",
|
|
244
|
+
f"{benchmark.time_to_first_token:.2f} ms",
|
|
245
|
+
f"{benchmark.inter_token_latency:.2f} ms",
|
|
246
|
+
f"{benchmark.output_token_throughput:.2f} tokens/sec",
|
|
247
|
+
)
|
|
248
|
+
logger.debug("Created overall performance summary table for the report.")
|
|
249
|
+
return table
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class GuidanceReport(Serializable):
|
|
253
|
+
"""
|
|
254
|
+
A class to manage the guidance reports that include the benchmarking details,
|
|
255
|
+
potentially across multiple runs, for saving and loading from disk.
|
|
256
|
+
|
|
257
|
+
:param benchmarks: The list of benchmarking reports.
|
|
258
|
+
:type benchmarks: List[TextGenerationBenchmarkReport]
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
benchmarks: List[TextGenerationBenchmarkReport] = Field(
|
|
262
|
+
default_factory=list, description="The list of benchmark reports."
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
def print(
|
|
266
|
+
self, save_path: Optional[str] = None, continual_refresh: bool = False
|
|
267
|
+
) -> None:
|
|
268
|
+
"""
|
|
269
|
+
Print the guidance report to the console.
|
|
270
|
+
|
|
271
|
+
:param save_path: Optional path to save the report to disk.
|
|
272
|
+
:type save_path: Optional[str]
|
|
273
|
+
:param continual_refresh: Whether to continually refresh the report.
|
|
274
|
+
:type continual_refresh: bool
|
|
275
|
+
:return: None
|
|
276
|
+
"""
|
|
277
|
+
logger.info("Printing guidance report to console with save_path={}", save_path)
|
|
278
|
+
report_viz = Panel(
|
|
279
|
+
Group(
|
|
280
|
+
*[
|
|
281
|
+
Panel(
|
|
282
|
+
Group(
|
|
283
|
+
_create_benchmark_report_details(benchmark),
|
|
284
|
+
"",
|
|
285
|
+
_create_benchmark_report_requests_summary(benchmark),
|
|
286
|
+
"",
|
|
287
|
+
_create_benchmark_report_data_tokens_summary(benchmark),
|
|
288
|
+
"",
|
|
289
|
+
_create_benchmark_report_dist_perf_summary(benchmark),
|
|
290
|
+
"",
|
|
291
|
+
_create_benchmark_report_summary(benchmark),
|
|
292
|
+
),
|
|
293
|
+
title=(
|
|
294
|
+
f"[bold magenta]Benchmark Report "
|
|
295
|
+
f"{index + 1}[/bold magenta]"
|
|
296
|
+
),
|
|
297
|
+
expand=True,
|
|
298
|
+
title_align="left",
|
|
299
|
+
)
|
|
300
|
+
for index, benchmark in enumerate(self.benchmarks)
|
|
301
|
+
],
|
|
302
|
+
),
|
|
303
|
+
title=(
|
|
304
|
+
"[bold cyan]GuideLLM Benchmarks Report[/bold cyan] [italic]"
|
|
305
|
+
f"({save_path})[/italic]"
|
|
306
|
+
),
|
|
307
|
+
expand=True,
|
|
308
|
+
title_align="left",
|
|
309
|
+
)
|
|
310
|
+
console = Console()
|
|
311
|
+
|
|
312
|
+
if continual_refresh:
|
|
313
|
+
logger.info("Starting live report with continual refresh.")
|
|
314
|
+
with Live(report_viz, refresh_per_second=1, console=console) as live:
|
|
315
|
+
while True:
|
|
316
|
+
live.update(report_viz)
|
|
317
|
+
time.sleep(1)
|
|
318
|
+
else:
|
|
319
|
+
console.print(report_viz)
|
|
320
|
+
|
|
321
|
+
logger.info("Guidance report printing completed.")
|
guidellm/core/request.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
|
|
6
|
+
from guidellm.core.serializable import Serializable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TextGenerationRequest(Serializable):
|
|
10
|
+
"""
|
|
11
|
+
A class to represent a text generation request for generative AI workloads.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
id: str = Field(
|
|
15
|
+
default_factory=lambda: str(uuid.uuid4()),
|
|
16
|
+
description="The unique identifier for the request.",
|
|
17
|
+
)
|
|
18
|
+
prompt: str = Field(description="The input prompt for the text generation.")
|
|
19
|
+
prompt_token_count: Optional[int] = Field(
|
|
20
|
+
default=None,
|
|
21
|
+
description="The number of tokens in the input prompt.",
|
|
22
|
+
)
|
|
23
|
+
output_token_count: Optional[int] = Field(
|
|
24
|
+
default=None,
|
|
25
|
+
description="The number of tokens to generate.",
|
|
26
|
+
)
|
|
27
|
+
params: Dict[str, Any] = Field(
|
|
28
|
+
default_factory=dict,
|
|
29
|
+
description="The parameters for the text generation request.",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def __str__(self) -> str:
|
|
33
|
+
prompt_short = (
|
|
34
|
+
self.prompt[:32] + "..."
|
|
35
|
+
if self.prompt and len(self.prompt) > 32 # noqa: PLR2004
|
|
36
|
+
else self.prompt
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return (
|
|
40
|
+
f"TextGenerationRequest(id={self.id}, "
|
|
41
|
+
f"prompt={prompt_short}, prompt_token_count={self.prompt_token_count}, "
|
|
42
|
+
f"output_token_count={self.output_token_count}, "
|
|
43
|
+
f"params={self.params})"
|
|
44
|
+
)
|