guidellm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

@@ -0,0 +1,10 @@
1
+ from .base import Executor, ExecutorResult
2
+ from .profile_generator import Profile, ProfileGenerationMode, ProfileGenerator
3
+
4
+ __all__ = [
5
+ "Executor",
6
+ "ExecutorResult",
7
+ "Profile",
8
+ "ProfileGenerationMode",
9
+ "ProfileGenerator",
10
+ ]
@@ -0,0 +1,213 @@
1
+ from dataclasses import dataclass
2
+ from typing import AsyncGenerator, Optional, Sequence, Union
3
+
4
+ from loguru import logger
5
+
6
+ from guidellm.backend import Backend
7
+ from guidellm.core import TextGenerationBenchmarkReport
8
+ from guidellm.executor.profile_generator import (
9
+ Profile,
10
+ ProfileGenerationMode,
11
+ ProfileGenerator,
12
+ )
13
+ from guidellm.request import RequestGenerator
14
+ from guidellm.scheduler import Scheduler, SchedulerResult
15
+
16
+ __all__ = ["Executor", "ExecutorResult"]
17
+
18
+
19
+ @dataclass
20
+ class ExecutorResult:
21
+ """
22
+ Data class representing the result of executing tasks in the Executor.
23
+
24
+ :param completed: Indicates whether all tasks have completed.
25
+ :type completed: bool
26
+ :param count_total: Total number of profiles.
27
+ :type count_total: int
28
+ :param count_completed: Number of completed profiles.
29
+ :type count_completed: int
30
+ :param report: A report report for text generation.
31
+ :type report: TextGenerationBenchmarkReport
32
+ :param scheduler_result: Optional scheduler result for the last task.
33
+ :type scheduler_result: Optional[SchedulerResult]
34
+ """
35
+
36
+ completed: bool
37
+ count_total: int
38
+ count_completed: int
39
+ generation_modes: Sequence[ProfileGenerationMode]
40
+ report: TextGenerationBenchmarkReport
41
+ scheduler_result: Optional[SchedulerResult] = None
42
+ current_index: Optional[int] = None
43
+ current_profile: Optional[Profile] = None
44
+
45
+
46
+ class Executor:
47
+ """
48
+ The Executor class manages the execution of tasks based on a given profile
49
+ generation mode and rate. It orchestrates the interaction between the backend,
50
+ request generator, and profile generator, and runs benchmarks accordingly.
51
+
52
+ :param backend: The backend to run tasks against.
53
+ :type backend: Backend
54
+ :param request_generator: The generator that creates requests for execution.
55
+ :type request_generator: RequestGenerator
56
+ :param mode: The mode for profile generation (e.g., sweep, synchronous).
57
+ :type mode: ProfileGenerationMode
58
+ :param rate: The list of rates for load generation, or None.
59
+ :type rate: Optional[List[float]]
60
+ :param max_number: Maximum number of requests to generate for the scheduler
61
+ (a single report run), or None.
62
+ :type max_number: Optional[int]
63
+ :param max_duration: Maximum duration for generating requests for the scheduler,
64
+ (a single report run), or None.
65
+ :type max_duration: Optional[float]
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ backend: Backend,
71
+ request_generator: RequestGenerator,
72
+ mode: ProfileGenerationMode = "sweep",
73
+ rate: Optional[Union[float, Sequence[float]]] = None,
74
+ max_number: Optional[int] = None,
75
+ max_duration: Optional[float] = None,
76
+ ):
77
+ self._backend = backend
78
+ self._generator = request_generator
79
+ self._max_number = max_number
80
+ self._max_duration = max_duration
81
+ self._profile_generator = ProfileGenerator(mode=mode, rate=rate)
82
+ logger.info("Executor initialized with mode: {}, rate: {}", mode, rate)
83
+
84
+ @property
85
+ def backend(self) -> Backend:
86
+ """
87
+ Returns the backend being used by the Executor.
88
+
89
+ :return: Backend
90
+ :rtype: Backend
91
+ """
92
+ return self._backend
93
+
94
+ @property
95
+ def request_generator(self) -> RequestGenerator:
96
+ """
97
+ Returns the request generator used by the Executor.
98
+
99
+ :return: RequestGenerator
100
+ :rtype: RequestGenerator
101
+ """
102
+ return self._generator
103
+
104
+ @property
105
+ def profile_generator(self) -> ProfileGenerator:
106
+ """
107
+ Returns the profile generator for generating profiles during execution.
108
+
109
+ :return: ProfileGenerator
110
+ :rtype: ProfileGenerator
111
+ """
112
+ return self._profile_generator
113
+
114
+ @property
115
+ def max_number(self) -> Optional[int]:
116
+ """
117
+ Returns the maximum number of requests to generate.
118
+
119
+ :return: Maximum number of requests or None.
120
+ :rtype: Optional[int]
121
+ """
122
+ return self._max_number
123
+
124
+ @property
125
+ def max_duration(self) -> Optional[float]:
126
+ """
127
+ Returns the maximum duration for generating requests.
128
+
129
+ :return: Maximum duration in seconds or None.
130
+ :rtype: Optional[float]
131
+ """
132
+ return self._max_duration
133
+
134
+ async def run(self) -> AsyncGenerator[ExecutorResult, None]:
135
+ """
136
+ Runs the Executor, generating and scheduling tasks based on the profile
137
+ generation mode. Yields results incrementally.
138
+
139
+ :rtype: AsyncGenerator[ExecutorResult, None]
140
+ """
141
+ report = TextGenerationBenchmarkReport()
142
+ report.args = {
143
+ # backend args
144
+ "backend_type": self.backend.type_,
145
+ "target": self.backend.target,
146
+ "model": self.backend.model,
147
+ # data args
148
+ "data_type": self.request_generator.type_,
149
+ "data": self.request_generator.source,
150
+ "tokenizer": self.request_generator.tokenizer.name_or_path,
151
+ # rate args
152
+ "mode": self.profile_generator.mode,
153
+ "rate": self.profile_generator.rates,
154
+ # limits args
155
+ "max_number": self.max_number,
156
+ "max_duration": self.max_duration,
157
+ }
158
+ profile_index = -1
159
+ logger.info("Starting Executor run")
160
+
161
+ yield ExecutorResult(
162
+ completed=False,
163
+ count_total=len(self.profile_generator),
164
+ count_completed=0,
165
+ generation_modes=self.profile_generator.profile_generation_modes,
166
+ report=report,
167
+ )
168
+
169
+ while profile := self.profile_generator.next(report):
170
+ logger.debug("Generated profile: {}", profile)
171
+ scheduler = Scheduler(
172
+ generator=self.request_generator,
173
+ worker=self.backend,
174
+ mode=profile.load_gen_mode,
175
+ rate=profile.load_gen_rate,
176
+ max_number=self.max_number or profile.args.get("max_number", None),
177
+ max_duration=self.max_duration,
178
+ )
179
+ profile_index += 1
180
+
181
+ logger.info(
182
+ "Scheduling tasks with mode: {}, rate: {}",
183
+ profile.load_gen_mode,
184
+ profile.load_gen_rate,
185
+ )
186
+
187
+ async for scheduler_result in scheduler.run():
188
+ if scheduler_result.completed:
189
+ report.add_benchmark(scheduler_result.benchmark)
190
+ logger.debug(
191
+ "Benchmark added for scheduler result: {}",
192
+ scheduler_result.benchmark,
193
+ )
194
+
195
+ yield ExecutorResult(
196
+ completed=False,
197
+ count_total=len(self.profile_generator),
198
+ count_completed=len(report.benchmarks),
199
+ generation_modes=self.profile_generator.profile_generation_modes,
200
+ report=report,
201
+ scheduler_result=scheduler_result,
202
+ current_index=profile_index,
203
+ current_profile=profile,
204
+ )
205
+
206
+ logger.info("Executor run completed")
207
+ yield ExecutorResult(
208
+ completed=True,
209
+ count_total=len(self.profile_generator),
210
+ count_completed=len(report.benchmarks),
211
+ generation_modes=self.profile_generator.profile_generation_modes,
212
+ report=report,
213
+ )
@@ -0,0 +1,343 @@
1
+ from typing import Any, Dict, Literal, Optional, Sequence, Union, get_args
2
+
3
+ import numpy as np
4
+ from loguru import logger
5
+ from pydantic import Field
6
+
7
+ from guidellm.config import settings
8
+ from guidellm.core import TextGenerationBenchmark, TextGenerationBenchmarkReport
9
+ from guidellm.core.serializable import Serializable
10
+ from guidellm.scheduler import LoadGenerationMode
11
+
12
+ __all__ = [
13
+ "Profile",
14
+ "ProfileGenerationMode",
15
+ "ProfileGenerator",
16
+ ]
17
+
18
+ ProfileGenerationMode = Literal[
19
+ "sweep", "synchronous", "throughput", "constant", "poisson"
20
+ ]
21
+
22
+
23
+ class Profile(Serializable):
24
+ """
25
+ A data class representing a profile for load generation.
26
+
27
+ :param load_gen_mode: The mode of load generation (e.g., constant, poisson).
28
+ :type load_gen_mode: LoadGenerationMode
29
+ :param load_gen_rate: The rate of load generation, if applicable.
30
+ :type load_gen_rate: Optional[float]
31
+ :param args: Additional arguments for the profile.
32
+ :type args: Optional[Dict[str, Any]]
33
+ """
34
+
35
+ load_gen_mode: LoadGenerationMode
36
+ load_gen_rate: Optional[float] = None
37
+ args: Dict[str, Any] = Field(default_factory=dict)
38
+
39
+
40
+ class ProfileGenerator:
41
+ """
42
+ Generates profiles based on different load generation modes.
43
+
44
+ :param mode: The mode for profile generation (e.g., sweep, synchronous).
45
+ :type mode: ProfileGenerationMode
46
+ :param rate: The rate(s) for load generation; could be a float or list of floats.
47
+ :type rate: Optional[Union[float, Sequence[float]]]
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ mode: ProfileGenerationMode,
53
+ rate: Optional[Union[float, Sequence[float]]] = None,
54
+ ):
55
+ if mode not in get_args(ProfileGenerationMode):
56
+ err = ValueError(
57
+ f"{mode} is not a valid Profile Generation Mode. "
58
+ f"Valid options are {get_args(ProfileGenerationMode)}"
59
+ )
60
+ logger.error(err)
61
+ raise err
62
+
63
+ self._mode = mode
64
+
65
+ if self._mode in ("sweep", "throughput", "synchronous"):
66
+ if rate is not None:
67
+ err = ValueError(f"Rates are not applicable for {self._mode} mode")
68
+ logger.error(err)
69
+ raise err
70
+ self._rates = None
71
+ else:
72
+ if not rate:
73
+ err = ValueError(f"Rates are required for {self._mode} mode")
74
+ logger.error(err)
75
+ raise err
76
+ self._rates = rate if isinstance(rate, Sequence) else [rate]
77
+
78
+ for rt in self._rates:
79
+ if rt <= 0:
80
+ err = ValueError(
81
+ f"Rate must be > 0 for mode: {self._mode}. Given: {rt}"
82
+ )
83
+ logger.error(err)
84
+ raise err
85
+
86
+ self._generated_count = 0
87
+
88
+ def __len__(self) -> int:
89
+ """
90
+ Returns the number of profiles to generate based on the mode and rates.
91
+
92
+ :return: The number of profiles.
93
+ :rtype: int
94
+ """
95
+ if self._mode == "sweep":
96
+ return settings.num_sweep_profiles + 2
97
+
98
+ if self._mode in ("throughput", "synchronous"):
99
+ return 1
100
+
101
+ if not self._rates:
102
+ raise ValueError(f"Rates are required for {self._mode} mode")
103
+
104
+ return len(self._rates)
105
+
106
+ @property
107
+ def mode(self) -> ProfileGenerationMode:
108
+ """
109
+ Returns the current mode of profile generation.
110
+
111
+ :return: The profile generation mode.
112
+ :rtype: ProfileGenerationMode
113
+ """
114
+ return self._mode
115
+
116
+ @property
117
+ def rates(self) -> Optional[Sequence[float]]:
118
+ """
119
+ Returns the list of rates for load generation, if any.
120
+
121
+ :return: Sequence of rates or None if not applicable.
122
+ :rtype: Optional[Sequence[float]]
123
+ """
124
+ return self._rates
125
+
126
+ @property
127
+ def generated_count(self) -> int:
128
+ """
129
+ Returns the current count of generated profiles.
130
+
131
+ :return: The current count of generated profiles.
132
+ :rtype: int
133
+ """
134
+ return self._generated_count
135
+
136
+ @property
137
+ def profile_generation_modes(self) -> Sequence[ProfileGenerationMode]:
138
+ """
139
+ Return the list of profile modes to be run in the report.
140
+
141
+ :return: Sequence of profile modes to be run in the report.
142
+ :rtype: Sequence[ProfileGenerationMode]
143
+ """
144
+ if self._mode == "sweep":
145
+ return ["synchronous", "throughput"] + ["constant"] * ( # type: ignore # noqa: PGH003
146
+ settings.num_sweep_profiles
147
+ )
148
+
149
+ if self._mode in ["throughput", "synchronous"]:
150
+ return [self._mode]
151
+
152
+ if self._rates is None:
153
+ raise ValueError(f"Rates are required for {self._mode} mode")
154
+
155
+ if self._mode in ["constant", "poisson"]:
156
+ return [self._mode] * len(self._rates)
157
+
158
+ raise ValueError(f"Invalid mode: {self._mode}")
159
+
160
+ def next(self, current_report: TextGenerationBenchmarkReport) -> Optional[Profile]:
161
+ """
162
+ Generates the next profile based on the current mode and report.
163
+
164
+ :param current_report: The current report report.
165
+ :type current_report: TextGenerationBenchmarkReport
166
+ :return: The generated profile or None if no more profiles.
167
+ :rtype: Optional[Profile]
168
+ """
169
+ logger.debug(
170
+ "Generating the next profile with mode: {}, current report: {}",
171
+ self.mode,
172
+ current_report,
173
+ )
174
+
175
+ if self.mode in ["constant", "poisson"]:
176
+ if not self.rates:
177
+ err = ValueError(f"Rates are required for {self.mode} mode")
178
+ logger.error(err)
179
+ raise err
180
+
181
+ profile = self.create_fixed_rate_profile(
182
+ self.generated_count,
183
+ self.mode,
184
+ self.rates,
185
+ )
186
+ elif self.mode == "synchronous":
187
+ profile = self.create_synchronous_profile(self.generated_count)
188
+ elif self.mode == "throughput":
189
+ profile = self.create_throughput_profile(self.generated_count)
190
+ elif self.mode == "sweep":
191
+ profile = self.create_sweep_profile(
192
+ self.generated_count,
193
+ sync_benchmark=current_report.benchmarks[0]
194
+ if current_report.benchmarks
195
+ else None,
196
+ throughput_benchmark=current_report.benchmarks[1]
197
+ if len(current_report.benchmarks) > 1
198
+ else None,
199
+ )
200
+ else:
201
+ err = ValueError(f"Invalid mode: {self.mode}")
202
+ logger.error(err)
203
+ raise err
204
+
205
+ self._generated_count += 1
206
+ logger.info(
207
+ "Generated profile: {}, total generated count: {}",
208
+ profile,
209
+ self._generated_count,
210
+ )
211
+ return profile
212
+
213
+ @staticmethod
214
+ def create_fixed_rate_profile(
215
+ index: int, mode: ProfileGenerationMode, rates: Sequence[float]
216
+ ) -> Optional[Profile]:
217
+ """
218
+ Creates a profile with a fixed rate.
219
+
220
+ :param index: The index of the rate in the list.
221
+ :type index: int
222
+ :param mode: The mode for profile generation (e.g., constant, poisson).
223
+ :type mode: ProfileGenerationMode
224
+ :param rates: The list of rates for load generation.
225
+ :type rates: Sequence[float]
226
+ :return: The generated profile or None if index is out of range.
227
+ :rtype: Optional[Profile]
228
+ """
229
+ modes_map: Dict[str, LoadGenerationMode] = {
230
+ "constant": "constant",
231
+ "poisson": "poisson",
232
+ }
233
+
234
+ if mode not in modes_map:
235
+ err = ValueError(f"Invalid mode: {mode}")
236
+ logger.error(err)
237
+ raise err
238
+
239
+ profile = (
240
+ Profile(
241
+ load_gen_mode=modes_map[mode],
242
+ load_gen_rate=rates[index],
243
+ )
244
+ if index < len(rates)
245
+ else None
246
+ )
247
+ logger.debug("Created fixed rate profile: {}", profile)
248
+ return profile
249
+
250
+ @staticmethod
251
+ def create_synchronous_profile(index: int) -> Optional[Profile]:
252
+ """
253
+ Creates a profile with synchronous mode.
254
+
255
+ :param index: The index of the profile to create.
256
+ :type index: int
257
+ :return: The generated profile or None if index is out of range.
258
+ :rtype: Optional[Profile]
259
+ """
260
+ profile = (
261
+ Profile(
262
+ load_gen_mode="synchronous",
263
+ load_gen_rate=None,
264
+ )
265
+ if index < 1
266
+ else None
267
+ )
268
+ logger.debug("Created synchronous profile: {}", profile)
269
+ return profile
270
+
271
+ @staticmethod
272
+ def create_throughput_profile(index: int) -> Optional[Profile]:
273
+ """
274
+ Creates a profile with throughput mode.
275
+
276
+ :param index: The index of the profile to create.
277
+ :type index: int
278
+ :return: The generated profile or None if index is out of range.
279
+ :rtype: Optional[Profile]
280
+ """
281
+ profile = (
282
+ Profile(
283
+ load_gen_mode="throughput",
284
+ load_gen_rate=None,
285
+ )
286
+ if index < 1
287
+ else None
288
+ )
289
+ logger.debug("Created throughput profile: {}", profile)
290
+ return profile
291
+
292
+ @staticmethod
293
+ def create_sweep_profile(
294
+ index: int,
295
+ sync_benchmark: Optional[TextGenerationBenchmark],
296
+ throughput_benchmark: Optional[TextGenerationBenchmark],
297
+ ) -> Optional[Profile]:
298
+ """
299
+ Creates a profile with sweep mode, generating profiles between
300
+ synchronous and throughput benchmarks.
301
+
302
+ :param index: The index of the profile to create.
303
+ :type index: int
304
+ :param sync_benchmark: The synchronous report data.
305
+ :type sync_benchmark: Optional[TextGenerationBenchmark]
306
+ :param throughput_benchmark: The throughput report data.
307
+ :type throughput_benchmark: Optional[TextGenerationBenchmark]
308
+ :return: The generated profile or None if index is out of range.
309
+ :rtype: Optional[Profile]
310
+ """
311
+ if index < 0 or index >= settings.num_sweep_profiles + 2:
312
+ return None
313
+
314
+ if index == 0:
315
+ return ProfileGenerator.create_synchronous_profile(0)
316
+
317
+ if not sync_benchmark:
318
+ err = ValueError("Synchronous report is required for sweep mode")
319
+ logger.error(err)
320
+ raise err
321
+
322
+ if index == 1:
323
+ throughput_profile: Profile = ProfileGenerator.create_throughput_profile(0) # type: ignore # noqa: PGH003
324
+ # set the max number of requests to 5 times the number of requests
325
+ # incase it is not set for the sweep to limit the number of requests
326
+ throughput_profile.args = {"max_number": sync_benchmark.request_count * 5}
327
+ return throughput_profile
328
+
329
+ if not throughput_benchmark:
330
+ err = ValueError("Throughput report is required for sweep mode")
331
+ logger.error(err)
332
+ raise err
333
+
334
+ min_rate = sync_benchmark.completed_request_rate
335
+ max_rate = throughput_benchmark.completed_request_rate
336
+ intermediate_rates = list(
337
+ np.linspace(min_rate, max_rate, settings.num_sweep_profiles + 1)
338
+ )[1:]
339
+
340
+ return Profile(
341
+ load_gen_mode="constant",
342
+ load_gen_rate=intermediate_rates[index - 2],
343
+ )
guidellm/logger.py ADDED
@@ -0,0 +1,83 @@
1
+ """
2
+ Logger configuration for GuideLLM.
3
+
4
+ This module provides a flexible logging configuration using the loguru library.
5
+ It supports console and file logging with options to configure via environment
6
+ variables or direct function calls.
7
+
8
+ Environment Variables:
9
+ - GUIDELLM__LOGGING__DISABLED: Disable logging (default: false).
10
+ - GUIDELLM__LOGGING__CLEAR_LOGGERS: Clear existing loggers
11
+ from loguru (default: true).
12
+ - GUIDELLM__LOGGING__LOG_LEVEL: Log level for console logging
13
+ (default: none, options: DEBUG, INFO, WARNING, ERROR, CRITICAL).
14
+ - GUIDELLM__LOGGING__FILE: Path to the log file for file logging
15
+ (default: guidellm.log if log file level set else none)
16
+ - GUIDELLM__LOGGING__FILE_LEVEL: Log level for file logging
17
+ (default: INFO if log file set else none).
18
+
19
+ Usage:
20
+ from guidellm import logger, configure_logger, LoggerConfig
21
+
22
+ # Configure metrics with default settings
23
+ configure_logger(
24
+ config=LoggingConfig
25
+ disabled=False,
26
+ clear_loggers=True,
27
+ console_log_level="DEBUG",
28
+ log_file=None,
29
+ log_file_level=None,
30
+ )
31
+ )
32
+
33
+ logger.debug("This is a debug message")
34
+ logger.info("This is an info message")
35
+ """
36
+
37
+ import sys
38
+
39
+ from loguru import logger
40
+
41
+ from guidellm.config import LoggingSettings, settings
42
+
43
+ __all__ = ["configure_logger", "logger"]
44
+
45
+
46
+ def configure_logger(config: LoggingSettings = settings.logging):
47
+ """
48
+ Configure the metrics for LLM Compressor.
49
+ This function sets up the console and file logging
50
+ as per the specified or default parameters.
51
+
52
+ Note: Environment variables take precedence over the function parameters.
53
+
54
+ :param config: The configuration for the logger to use.
55
+ :type config: LoggerConfig
56
+ """
57
+
58
+ if config.disabled:
59
+ logger.disable("guidellm")
60
+ return
61
+
62
+ logger.enable("guidellm")
63
+
64
+ if config.clear_loggers:
65
+ logger.remove()
66
+
67
+ # log as a human readable string with the time, function, level, and message
68
+ logger.add(
69
+ sys.stdout,
70
+ level=config.console_log_level.upper(),
71
+ format="{time} | {function} | {level} - {message}",
72
+ )
73
+
74
+ if config.log_file or config.log_file_level:
75
+ log_file = config.log_file or "guidellm.log"
76
+ log_file_level = config.log_file_level or "INFO"
77
+ # log as json to the file for easier parsing
78
+ logger.add(log_file, level=log_file_level.upper(), serialize=True)
79
+
80
+
81
+ # invoke logger setup on import with default values
82
+ # enabling console logging with INFO and disabling file logging
83
+ configure_logger()