guidellm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

@@ -0,0 +1,321 @@
1
+ import time
2
+ from datetime import datetime
3
+ from typing import List, Optional
4
+
5
+ from loguru import logger
6
+ from pydantic import Field
7
+ from rich.console import Console, Group
8
+ from rich.live import Live
9
+ from rich.panel import Panel
10
+ from rich.table import Table
11
+
12
+ from guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport
13
+ from guidellm.core.serializable import Serializable
14
+
15
+ __all__ = ["GuidanceReport"]
16
+
17
+
18
+ def _create_benchmark_report_details(report: TextGenerationBenchmarkReport) -> str:
19
+ """
20
+ Create a detailed string representation of a benchmark report.
21
+
22
+ :param report: The benchmark report to generate details for.
23
+ :type report: TextGenerationBenchmarkReport
24
+ :return: A string containing the backend, data, rate, and limits of
25
+ the benchmark report.
26
+ :rtype: str
27
+ """
28
+ backend = (
29
+ f"Backend(type={report.args.get('backend_type', 'N/A')}, "
30
+ f"target={report.args.get('target', 'N/A')}, "
31
+ f"model={report.args.get('model', 'N/A')})"
32
+ )
33
+ data = (
34
+ f"Data(type={report.args.get('data_type', 'N/A')}, "
35
+ f"source={report.args.get('data', 'N/A')}, "
36
+ f"tokenizer={report.args.get('tokenizer', 'N/A')})"
37
+ )
38
+ rate = (
39
+ f"Rate(type={report.args.get('mode', 'N/A')}, "
40
+ f"rate={report.args.get('rate', 'N/A')})"
41
+ )
42
+ limits = (
43
+ f"Limits(max_number={report.args.get('max_number', 'N/A')} requests, "
44
+ f"max_duration={report.args.get('max_duration', 'N/A')} sec)"
45
+ )
46
+
47
+ logger.debug(
48
+ "Created benchmark report details for backend={}, data={}, rate={}, limits={}",
49
+ backend,
50
+ data,
51
+ rate,
52
+ limits,
53
+ )
54
+
55
+ return backend + "\n" + data + "\n" + rate + "\n" + limits + "\n"
56
+
57
+
58
+ def _benchmark_rate_id(benchmark: TextGenerationBenchmark) -> str:
59
+ """
60
+ Generate a string identifier for a benchmark rate.
61
+
62
+ :param benchmark: The benchmark for which to generate the rate ID.
63
+ :type benchmark: TextGenerationBenchmark
64
+ :return: A string representing the benchmark rate ID.
65
+ :rtype: str
66
+ """
67
+ rate_id = (
68
+ f"{benchmark.mode}@{benchmark.rate:.2f} req/sec"
69
+ if benchmark.rate
70
+ else f"{benchmark.mode}"
71
+ )
72
+ logger.debug("Generated benchmark rate ID: {}", rate_id)
73
+ return rate_id
74
+
75
+
76
+ def _create_benchmark_report_requests_summary(
77
+ report: TextGenerationBenchmarkReport,
78
+ ) -> Table:
79
+ """
80
+ Create a table summarizing the requests of a benchmark report.
81
+
82
+ :param report: The benchmark report to summarize.
83
+ :type report: TextGenerationBenchmarkReport
84
+ :return: A rich Table object summarizing the requests.
85
+ :rtype: Table
86
+ """
87
+ table = Table(
88
+ "Benchmark",
89
+ "Requests Completed",
90
+ "Request Failed",
91
+ "Duration",
92
+ "Start Time",
93
+ "End Time",
94
+ title="[magenta]Requests Data by Benchmark[/magenta]",
95
+ title_style="bold",
96
+ title_justify="left",
97
+ show_header=True,
98
+ )
99
+
100
+ for benchmark in report.benchmarks_sorted:
101
+ start_time_str = (
102
+ datetime.fromtimestamp(benchmark.start_time).strftime("%H:%M:%S")
103
+ if benchmark.start_time
104
+ else "N/A"
105
+ )
106
+ end_time_str = (
107
+ datetime.fromtimestamp(benchmark.end_time).strftime("%H:%M:%S")
108
+ if benchmark.end_time
109
+ else "N/A"
110
+ )
111
+
112
+ table.add_row(
113
+ _benchmark_rate_id(benchmark),
114
+ f"{benchmark.request_count}/{benchmark.total_count}",
115
+ f"{benchmark.error_count}/{benchmark.total_count}",
116
+ f"{benchmark.duration:.2f} sec",
117
+ f"{start_time_str}",
118
+ f"{end_time_str}",
119
+ )
120
+ logger.debug("Created requests summary table for the report.")
121
+ return table
122
+
123
+
124
+ def _create_benchmark_report_data_tokens_summary(
125
+ report: TextGenerationBenchmarkReport,
126
+ ) -> Table:
127
+ """
128
+ Create a table summarizing data tokens of a benchmark report.
129
+
130
+ :param report: The benchmark report to summarize.
131
+ :type report: TextGenerationBenchmarkReport
132
+ :return: A rich Table object summarizing the data tokens.
133
+ :rtype: Table
134
+ """
135
+ table = Table(
136
+ "Benchmark",
137
+ "Prompt",
138
+ "Prompt (1%, 5%, 50%, 95%, 99%)",
139
+ "Output",
140
+ "Output (1%, 5%, 50%, 95%, 99%)",
141
+ title="[magenta]Tokens Data by Benchmark[/magenta]",
142
+ title_style="bold",
143
+ title_justify="left",
144
+ show_header=True,
145
+ )
146
+
147
+ for benchmark in report.benchmarks_sorted:
148
+ table.add_row(
149
+ _benchmark_rate_id(benchmark),
150
+ f"{benchmark.prompt_token_distribution.mean:.2f}",
151
+ ", ".join(
152
+ f"{percentile:.1f}"
153
+ for percentile in benchmark.prompt_token_distribution.percentiles(
154
+ [1, 5, 50, 95, 99]
155
+ )
156
+ ),
157
+ f"{benchmark.output_token_distribution.mean:.2f}",
158
+ ", ".join(
159
+ f"{percentile:.1f}"
160
+ for percentile in benchmark.output_token_distribution.percentiles(
161
+ [1, 5, 50, 95, 99]
162
+ )
163
+ ),
164
+ )
165
+ logger.debug("Created data tokens summary table for the report.")
166
+ return table
167
+
168
+
169
+ def _create_benchmark_report_dist_perf_summary(
170
+ report: TextGenerationBenchmarkReport,
171
+ ) -> Table:
172
+ """
173
+ Create a table summarizing distribution performance of a benchmark report.
174
+
175
+ :param report: The benchmark report to summarize.
176
+ :type report: TextGenerationBenchmarkReport
177
+ :return: A rich Table object summarizing the performance statistics.
178
+ :rtype: Table
179
+ """
180
+ table = Table(
181
+ "Benchmark",
182
+ "Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
183
+ "Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
184
+ "Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
185
+ title="[magenta]Performance Stats by Benchmark[/magenta]",
186
+ title_style="bold",
187
+ title_justify="left",
188
+ show_header=True,
189
+ )
190
+
191
+ for benchmark in report.benchmarks_sorted:
192
+ table.add_row(
193
+ _benchmark_rate_id(benchmark),
194
+ ", ".join(
195
+ f"{percentile:.2f}"
196
+ for percentile in benchmark.request_latency_distribution.percentiles(
197
+ [1, 5, 10, 50, 90, 95, 99]
198
+ )
199
+ ),
200
+ ", ".join(
201
+ f"{percentile * 1000:.1f}"
202
+ for percentile in benchmark.ttft_distribution.percentiles(
203
+ [1, 5, 10, 50, 90, 95, 99]
204
+ )
205
+ ),
206
+ ", ".join(
207
+ f"{percentile * 1000:.1f}"
208
+ for percentile in benchmark.itl_distribution.percentiles(
209
+ [1, 5, 10, 50, 90, 95, 99]
210
+ )
211
+ ),
212
+ )
213
+ logger.debug("Created distribution performance summary table for the report.")
214
+ return table
215
+
216
+
217
+ def _create_benchmark_report_summary(report: TextGenerationBenchmarkReport) -> Table:
218
+ """
219
+ Create a summary table for a benchmark report.
220
+
221
+ :param report: The benchmark report to summarize.
222
+ :type report: TextGenerationBenchmarkReport
223
+ :return: A rich Table object summarizing overall performance.
224
+ :rtype: Table
225
+ """
226
+ table = Table(
227
+ "Benchmark",
228
+ "Requests per Second",
229
+ "Request Latency",
230
+ "Time to First Token",
231
+ "Inter Token Latency",
232
+ "Output Token Throughput",
233
+ title="[magenta]Performance Summary by Benchmark[/magenta]",
234
+ title_style="bold",
235
+ title_justify="left",
236
+ show_header=True,
237
+ )
238
+
239
+ for benchmark in report.benchmarks_sorted:
240
+ table.add_row(
241
+ _benchmark_rate_id(benchmark),
242
+ f"{benchmark.completed_request_rate:.2f} req/sec",
243
+ f"{benchmark.request_latency:.2f} sec",
244
+ f"{benchmark.time_to_first_token:.2f} ms",
245
+ f"{benchmark.inter_token_latency:.2f} ms",
246
+ f"{benchmark.output_token_throughput:.2f} tokens/sec",
247
+ )
248
+ logger.debug("Created overall performance summary table for the report.")
249
+ return table
250
+
251
+
252
+ class GuidanceReport(Serializable):
253
+ """
254
+ A class to manage the guidance reports that include the benchmarking details,
255
+ potentially across multiple runs, for saving and loading from disk.
256
+
257
+ :param benchmarks: The list of benchmarking reports.
258
+ :type benchmarks: List[TextGenerationBenchmarkReport]
259
+ """
260
+
261
+ benchmarks: List[TextGenerationBenchmarkReport] = Field(
262
+ default_factory=list, description="The list of benchmark reports."
263
+ )
264
+
265
+ def print(
266
+ self, save_path: Optional[str] = None, continual_refresh: bool = False
267
+ ) -> None:
268
+ """
269
+ Print the guidance report to the console.
270
+
271
+ :param save_path: Optional path to save the report to disk.
272
+ :type save_path: Optional[str]
273
+ :param continual_refresh: Whether to continually refresh the report.
274
+ :type continual_refresh: bool
275
+ :return: None
276
+ """
277
+ logger.info("Printing guidance report to console with save_path={}", save_path)
278
+ report_viz = Panel(
279
+ Group(
280
+ *[
281
+ Panel(
282
+ Group(
283
+ _create_benchmark_report_details(benchmark),
284
+ "",
285
+ _create_benchmark_report_requests_summary(benchmark),
286
+ "",
287
+ _create_benchmark_report_data_tokens_summary(benchmark),
288
+ "",
289
+ _create_benchmark_report_dist_perf_summary(benchmark),
290
+ "",
291
+ _create_benchmark_report_summary(benchmark),
292
+ ),
293
+ title=(
294
+ f"[bold magenta]Benchmark Report "
295
+ f"{index + 1}[/bold magenta]"
296
+ ),
297
+ expand=True,
298
+ title_align="left",
299
+ )
300
+ for index, benchmark in enumerate(self.benchmarks)
301
+ ],
302
+ ),
303
+ title=(
304
+ "[bold cyan]GuideLLM Benchmarks Report[/bold cyan] [italic]"
305
+ f"({save_path})[/italic]"
306
+ ),
307
+ expand=True,
308
+ title_align="left",
309
+ )
310
+ console = Console()
311
+
312
+ if continual_refresh:
313
+ logger.info("Starting live report with continual refresh.")
314
+ with Live(report_viz, refresh_per_second=1, console=console) as live:
315
+ while True:
316
+ live.update(report_viz)
317
+ time.sleep(1)
318
+ else:
319
+ console.print(report_viz)
320
+
321
+ logger.info("Guidance report printing completed.")
@@ -0,0 +1,44 @@
1
+ import uuid
2
+ from typing import Any, Dict, Optional
3
+
4
+ from pydantic import Field
5
+
6
+ from guidellm.core.serializable import Serializable
7
+
8
+
9
+ class TextGenerationRequest(Serializable):
10
+ """
11
+ A class to represent a text generation request for generative AI workloads.
12
+ """
13
+
14
+ id: str = Field(
15
+ default_factory=lambda: str(uuid.uuid4()),
16
+ description="The unique identifier for the request.",
17
+ )
18
+ prompt: str = Field(description="The input prompt for the text generation.")
19
+ prompt_token_count: Optional[int] = Field(
20
+ default=None,
21
+ description="The number of tokens in the input prompt.",
22
+ )
23
+ output_token_count: Optional[int] = Field(
24
+ default=None,
25
+ description="The number of tokens to generate.",
26
+ )
27
+ params: Dict[str, Any] = Field(
28
+ default_factory=dict,
29
+ description="The parameters for the text generation request.",
30
+ )
31
+
32
+ def __str__(self) -> str:
33
+ prompt_short = (
34
+ self.prompt[:32] + "..."
35
+ if self.prompt and len(self.prompt) > 32 # noqa: PLR2004
36
+ else self.prompt
37
+ )
38
+
39
+ return (
40
+ f"TextGenerationRequest(id={self.id}, "
41
+ f"prompt={prompt_short}, prompt_token_count={self.prompt_token_count}, "
42
+ f"output_token_count={self.output_token_count}, "
43
+ f"params={self.params})"
44
+ )