guidellm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

@@ -0,0 +1,374 @@
1
+ import asyncio
2
+ import math
3
+ import time
4
+ from dataclasses import dataclass
5
+ from typing import AsyncGenerator, Literal, Optional, Union, get_args
6
+
7
+ from loguru import logger
8
+
9
+ from guidellm.backend import Backend
10
+ from guidellm.config import settings
11
+ from guidellm.core import (
12
+ TextGenerationBenchmark,
13
+ TextGenerationError,
14
+ TextGenerationRequest,
15
+ TextGenerationResult,
16
+ )
17
+ from guidellm.request import RequestGenerator
18
+ from guidellm.scheduler.load_generator import LoadGenerationMode, LoadGenerator
19
+
20
+ __all__ = ["Scheduler", "SchedulerResult"]
21
+
22
+
23
+ @dataclass
24
+ class SchedulerResult:
25
+ """
26
+ Represents the result of a single task execution within the Scheduler.
27
+
28
+ :param completed: Indicates if the task is completed.
29
+ :type completed: bool
30
+ :param count_total: Total number of tasks to be executed.
31
+ :type count_total: int
32
+ :param count_completed: Number of tasks that have been completed so far.
33
+ :type count_completed: int
34
+ :param report: Benchmark data for the task execution.
35
+ :type benchmark: TextGenerationBenchmark
36
+ :param current_result: The result of the current request, if any.
37
+ :type current_result: Optional[Union[TextGenerationResult, Exception]]
38
+ """
39
+
40
+ completed: bool
41
+ count_total: int
42
+ count_completed: int
43
+ benchmark: TextGenerationBenchmark
44
+ current_result: Optional[Union[TextGenerationResult, TextGenerationError]] = None
45
+
46
+
47
+ class Scheduler:
48
+ """
49
+ Schedules and manages the execution of tasks for text generation requests.
50
+
51
+ :param generator: The request generator that produces text generation requests.
52
+ :type generator: RequestGenerator
53
+ :param worker: The backend worker that processes the requests.
54
+ :type worker: Backend
55
+ :param mode: The mode of load generation (e.g., synchronous, asynchronous).
56
+ :type mode: LoadGenerationMode
57
+ :param rate: The rate at which requests are generated, if applicable.
58
+ :type rate: Optional[float]
59
+ :param max_number: Maximum number of requests to be processed.
60
+ :type max_number: Optional[int]
61
+ :param max_duration: Maximum duration in seconds for which requests
62
+ should be processed.
63
+ :type max_duration: Optional[float]
64
+
65
+ :raises ValueError: If neither max_number nor max_duration is specified or
66
+ if they are not positive.
67
+ """
68
+
69
+ def __init__(
70
+ self,
71
+ generator: RequestGenerator,
72
+ worker: Backend,
73
+ mode: LoadGenerationMode = "synchronous",
74
+ rate: Optional[float] = None,
75
+ max_number: Optional[int] = None,
76
+ max_duration: Optional[float] = None,
77
+ ):
78
+ logger.info(
79
+ "Scheduler initialized with params: generator={}, worker={}, mode={}, "
80
+ "rate={}, max_number={}, max_duration={}",
81
+ generator,
82
+ worker,
83
+ mode,
84
+ rate,
85
+ max_number,
86
+ max_duration,
87
+ )
88
+
89
+ if mode not in get_args(LoadGenerationMode):
90
+ err = ValueError(
91
+ f"{mode} is not a valid Load Generation Mode. "
92
+ f"Valid options are {get_args(LoadGenerationMode)}"
93
+ )
94
+ logger.error(err)
95
+ raise err
96
+
97
+ if not max_number and not max_duration:
98
+ err = ValueError("Either max_number or max_duration must be specified")
99
+ logger.error(err)
100
+ raise err
101
+
102
+ if max_number and max_number <= 0:
103
+ err = ValueError(f"max_number must be > 0, given: {max_number}")
104
+ logger.error(err)
105
+ raise err
106
+
107
+ if max_duration and max_duration <= 0:
108
+ err = ValueError(f"max_duration must be > 0, given: {max_duration}")
109
+ logger.error(err)
110
+ raise err
111
+
112
+ if mode in ["constant", "poisson"] and not rate:
113
+ err = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}")
114
+ logger.error(err)
115
+ raise err
116
+
117
+ self._generator = generator
118
+ self._worker = worker
119
+ self._mode = mode
120
+ self._rate = rate
121
+ self._max_number = max_number
122
+ self._max_duration = max_duration
123
+
124
+ self._load_generator = LoadGenerator(mode, rate)
125
+
126
+ @property
127
+ def generator(self) -> RequestGenerator:
128
+ """
129
+ The request generator that produces text generation requests.
130
+
131
+ :return: The request generator instance.
132
+ :rtype: RequestGenerator
133
+ """
134
+ return self._generator
135
+
136
+ @property
137
+ def worker(self) -> Backend:
138
+ """
139
+ The backend worker that processes the requests.
140
+
141
+ :return: The backend worker instance.
142
+ :rtype: Backend
143
+ """
144
+ return self._worker
145
+
146
+ @property
147
+ def mode(self) -> LoadGenerationMode:
148
+ """
149
+ The mode of load generation (e.g., synchronous, asynchronous).
150
+
151
+ :return: The load generation mode.
152
+ :rtype: LoadGenerationMode
153
+ """
154
+ return self._mode
155
+
156
+ @property
157
+ def rate(self) -> Optional[float]:
158
+ """
159
+ The rate at which requests are generated, if applicable.
160
+
161
+ :return: The rate of request generation.
162
+ :rtype: Optional[float]
163
+ """
164
+ return self._rate
165
+
166
+ @property
167
+ def max_number(self) -> Optional[int]:
168
+ """
169
+ Maximum number of requests to be processed.
170
+
171
+ :return: The maximum number of requests.
172
+ :rtype: Optional[int]
173
+ """
174
+ return self._max_number
175
+
176
+ @property
177
+ def max_duration(self) -> Optional[float]:
178
+ """
179
+ Maximum duration in seconds for which requests should be processed.
180
+
181
+ :return: The maximum duration in seconds.
182
+ :rtype: Optional[float]
183
+ """
184
+ return self._max_duration
185
+
186
+ @property
187
+ def load_generator(self) -> LoadGenerator:
188
+ """
189
+ The load generator responsible for generating load based on mode and rate.
190
+
191
+ :return: The load generator instance.
192
+ :rtype: LoadGenerator
193
+ """
194
+ return self._load_generator
195
+
196
+ @property
197
+ def benchmark_mode(self) -> Literal["asynchronous", "synchronous", "throughput"]:
198
+ """
199
+ The report mode for the scheduler.
200
+
201
+ :return: The report mode.
202
+ :rtype: Literal["asynchronous", "synchronous", "throughput"]
203
+ """
204
+ if self._mode == "synchronous":
205
+ return "synchronous"
206
+
207
+ if self._mode == "throughput":
208
+ return "throughput"
209
+
210
+ return "asynchronous"
211
+
212
+ async def run(self) -> AsyncGenerator[SchedulerResult, None]:
213
+ """
214
+ Run the scheduler to process requests based on the configured mode, rate,
215
+ maximum number, and maximum duration.
216
+
217
+ :yield: The result of each task executed by the scheduler.
218
+ :rtype: Generator[SchedulerResult, None, None]
219
+ """
220
+ logger.info("Starting Scheduler run")
221
+
222
+ benchmark = TextGenerationBenchmark(mode=self.benchmark_mode, rate=self.rate)
223
+ start_time = time.time()
224
+ end_time = start_time + self.max_duration if self.max_duration else math.inf
225
+ max_number = float(self.max_number) if self.max_number else math.inf
226
+ runner = self._run_sync if self._mode == "synchronous" else self._run_async
227
+ count_total = (
228
+ self.max_number
229
+ if self.max_number
230
+ else round(self.max_duration)
231
+ if self.max_duration
232
+ else 0
233
+ )
234
+
235
+ # yield initial result for progress tracking
236
+ yield SchedulerResult(
237
+ completed=False,
238
+ count_total=count_total,
239
+ count_completed=0,
240
+ benchmark=benchmark,
241
+ )
242
+
243
+ run_count = 0
244
+ async for res in runner(benchmark, end_time, max_number):
245
+ run_count += 1
246
+ count_completed = (
247
+ min(run_count, self.max_number)
248
+ if self.max_number
249
+ else round(time.time() - start_time)
250
+ if self.max_duration
251
+ else 0
252
+ )
253
+
254
+ yield SchedulerResult(
255
+ completed=False,
256
+ count_total=count_total,
257
+ count_completed=count_completed,
258
+ benchmark=benchmark,
259
+ current_result=res,
260
+ )
261
+
262
+ logger.info("Scheduler run completed")
263
+
264
+ yield SchedulerResult(
265
+ completed=True,
266
+ count_total=count_total,
267
+ count_completed=(
268
+ benchmark.request_count + benchmark.error_count
269
+ if self.max_number
270
+ else round(time.time() - start_time)
271
+ if self.max_duration
272
+ else 0
273
+ ),
274
+ benchmark=benchmark,
275
+ )
276
+
277
+ async def _run_sync(
278
+ self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
279
+ ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
280
+ for index, (request, submit_at) in enumerate(
281
+ zip(self.generator, self.load_generator.times())
282
+ ):
283
+ if index >= max_number or time.time() >= end_time:
284
+ break
285
+
286
+ logger.debug(
287
+ "Running synchronous request={} at submit_at={}",
288
+ request,
289
+ submit_at,
290
+ )
291
+ benchmark.request_started()
292
+ result = await self._submit_task_coroutine(request, submit_at, end_time)
293
+ if result is not None:
294
+ benchmark.request_completed(result)
295
+ logger.debug("Request completed with output: {}", result)
296
+ yield result
297
+
298
+ async def _run_async(
299
+ self, benchmark: TextGenerationBenchmark, end_time: float, max_number: float
300
+ ) -> AsyncGenerator[Union[TextGenerationResult, TextGenerationError], None]:
301
+ tasks = []
302
+ completed = 0
303
+
304
+ for index, (request, submit_at) in enumerate(
305
+ zip(self.generator, self.load_generator.times())
306
+ ):
307
+ while (index + 1 - completed) >= settings.max_concurrency:
308
+ await asyncio.sleep(0.1)
309
+
310
+ if index >= max_number or time.time() >= end_time or submit_at >= end_time:
311
+ break
312
+
313
+ logger.debug(
314
+ "Running asynchronous request={} at submit_at={}",
315
+ request,
316
+ submit_at,
317
+ )
318
+
319
+ def _completed(_task: asyncio.Task) -> None:
320
+ nonlocal completed
321
+ completed += 1
322
+ _res = _task.result()
323
+
324
+ if _res:
325
+ benchmark.request_completed(_res)
326
+ logger.debug("Request completed: {}", _res)
327
+
328
+ benchmark.request_started()
329
+ task = asyncio.create_task(
330
+ self._submit_task_coroutine(request, submit_at, end_time)
331
+ )
332
+ task.add_done_callback(_completed)
333
+ tasks.append(task)
334
+
335
+ # release control to the event loop for other tasks
336
+ await asyncio.sleep(0.001)
337
+
338
+ for compl_task in asyncio.as_completed(tasks):
339
+ task_res = await compl_task
340
+ if task_res is not None:
341
+ yield task_res
342
+
343
+ async def _submit_task_coroutine(
344
+ self, request: TextGenerationRequest, submit_at: float, end_time: float
345
+ ) -> Optional[Union[TextGenerationResult, TextGenerationError]]:
346
+ try:
347
+ if submit_at > end_time:
348
+ logger.info(
349
+ "Request {} submission time {} is greater than end time {}",
350
+ request,
351
+ submit_at,
352
+ end_time,
353
+ )
354
+ raise asyncio.TimeoutError(
355
+ f"Request submission time {submit_at} "
356
+ f"is greater than end time {end_time}"
357
+ )
358
+
359
+ if submit_at > time.time():
360
+ await asyncio.sleep(submit_at - time.time())
361
+
362
+ timeout = (
363
+ end_time - time.time() if end_time and end_time < math.inf else None
364
+ )
365
+
366
+ return await asyncio.wait_for(self._worker.submit(request), timeout=timeout)
367
+ except asyncio.TimeoutError as exc:
368
+ logger.info("Request {} timed out: {}", request, exc)
369
+
370
+ return None
371
+ except Exception as exc: # noqa: BLE001
372
+ logger.warning("Request {} failed: {}", request, exc)
373
+
374
+ return TextGenerationError(request=request, message=str(exc))
@@ -0,0 +1,196 @@
1
+ import time
2
+ from typing import Generator, Literal, Optional, get_args
3
+
4
+ import numpy as np
5
+ from loguru import logger
6
+
7
+ __all__ = ["LoadGenerationMode", "LoadGenerator"]
8
+
9
+ LoadGenerationMode = Literal["synchronous", "constant", "poisson", "throughput"]
10
+
11
+
12
+ class LoadGenerator:
13
+ """
14
+ Load Generator class that generates timestamps for load generation.
15
+
16
+ This class supports multiple load generation modes: "constant", "poisson",
17
+ "throughput", and "synchronous". Each mode has its own method for generating
18
+ timestamps based on the rate provided during initialization.
19
+
20
+ :param mode: The mode of load generation. Valid options are "constant",
21
+ "poisson", "throughput", and "synchronous".
22
+ :type mode: LoadGenerationMode
23
+ :param rate: The rate at which to generate timestamps. This value is
24
+ interpreted differently depending on the mode.
25
+ :type rate: float
26
+
27
+ :raises ValueError: If an invalid mode is provided.
28
+ """
29
+
30
+ def __init__(self, mode: LoadGenerationMode, rate: Optional[float] = None):
31
+ """
32
+ Initialize the Load Generator with the mode and rate.
33
+
34
+ :param mode: The mode of load generation ("constant", "poisson", "throughput",
35
+ or "synchronous").
36
+ :type mode: LoadGenerationMode
37
+ :param rate: The rate at which to generate timestamps. In the "constant"
38
+ mode, this represents the frequency of events. In the "poisson" mode,
39
+ it represents the average frequency.
40
+ :type rate: Optional[float]
41
+ """
42
+ if mode not in get_args(LoadGenerationMode):
43
+ error = ValueError(
44
+ f"{mode} is not a valid Load Generation Mode. "
45
+ f"Valid options are {get_args(LoadGenerationMode)}"
46
+ )
47
+ logger.error(error)
48
+ raise error
49
+
50
+ if mode not in ["synchronous", "throughput"] and (rate is None or rate <= 0):
51
+ error = ValueError(f"Rate must be > 0 for mode: {mode}. Given: {rate}")
52
+ logger.error(error)
53
+ raise error
54
+
55
+ self._mode = mode
56
+ self._rate = rate
57
+ logger.debug(
58
+ "Initialized LoadGenerator with mode: {mode}, rate: {rate}",
59
+ mode=mode,
60
+ rate=rate,
61
+ )
62
+
63
+ @property
64
+ def mode(self) -> LoadGenerationMode:
65
+ """
66
+ Get the mode of load generation.
67
+
68
+ :return: The mode of load generation.
69
+ :rtype: LoadGenerationMode
70
+ """
71
+ return self._mode
72
+
73
+ @property
74
+ def rate(self) -> Optional[float]:
75
+ """
76
+ Get the rate of load generation.
77
+
78
+ :return: The rate of load generation.
79
+ :rtype: Optional[float]
80
+ """
81
+ return self._rate
82
+
83
+ def times(self) -> Generator[float, None, None]:
84
+ """
85
+ Generate timestamps for load generation based on the selected mode.
86
+
87
+ :return: A generator that yields timestamps at which each load
88
+ should be initiated.
89
+ :rtype: Generator[float, None, None]
90
+
91
+ :raises ValueError: If the mode is invalid.
92
+ """
93
+ logger.debug(f"Generating timestamps using mode: {self._mode}")
94
+
95
+ if self._mode == "throughput":
96
+ yield from self.throughput_times()
97
+ elif self._mode == "constant":
98
+ yield from self.constant_times()
99
+ elif self._mode == "poisson":
100
+ yield from self.poisson_times()
101
+ elif self._mode == "synchronous":
102
+ yield from self.synchronous_times()
103
+ else:
104
+ logger.error(f"Invalid mode encountered: {self._mode}")
105
+ raise ValueError(f"Invalid mode: {self._mode}")
106
+
107
+ def synchronous_times(self) -> Generator[float, None, None]:
108
+ """
109
+ Generate invalid timestamps for the "synchronous" mode.
110
+
111
+ :return: A generator that yields a constant invalid timestamp (-1.0).
112
+ :rtype: Generator[float, None, None]
113
+ """
114
+ logger.debug("Generating invalid timestamps for synchronous mode")
115
+ while True:
116
+ yield -1.0
117
+
118
+ def throughput_times(self) -> Generator[float, None, None]:
119
+ """
120
+ Generate timestamps at the maximum rate possible, returning the current time.
121
+
122
+ :return: A generator that yields the current time in seconds.
123
+ :rtype: Generator[float, None, None]
124
+ """
125
+ logger.debug("Generating timestamps at throughput rate")
126
+ while True:
127
+ yield time.time()
128
+
129
+ def constant_times(self) -> Generator[float, None, None]:
130
+ """
131
+ Generate timestamps at a constant rate based on the specified rate.
132
+
133
+ :return: A generator that yields timestamps incremented by 1/rate seconds.
134
+ :rtype: Generator[float, None, None]
135
+ """
136
+ logger.debug("Generating constant rate timestamps with rate: {}", self._rate)
137
+
138
+ if self._rate is None or self._rate == 0:
139
+ raise ValueError(
140
+ "Rate must be > 0 for constant mode, given: {}", self._rate
141
+ )
142
+
143
+ start_time = time.time()
144
+ time_increment = 1.0 / self._rate
145
+ counter = 0
146
+
147
+ while True:
148
+ yield_time = start_time + time_increment * counter
149
+ logger.debug(f"Yielding timestamp: {yield_time}")
150
+ yield yield_time
151
+ counter += 1
152
+
153
+ def poisson_times(self) -> Generator[float, None, None]:
154
+ """
155
+ Generate timestamps based on a Poisson process, where the number
156
+ of requests to be sent per second is drawn from a Poisson distribution.
157
+ The inter arrival time between requests is exponentially distributed.
158
+
159
+ :return: A generator that yields timestamps based on a Poisson distribution.
160
+ :rtype: Generator[float, None, None]
161
+ """
162
+ logger.debug("Generating Poisson rate timestamps with rate: {}", self._rate)
163
+
164
+ if self._rate is None or self._rate == 0:
165
+ raise ValueError("Rate must be > 0 for poisson mode, given: {}", self._rate)
166
+
167
+ time_tracker = time.time()
168
+ rng = np.random.default_rng()
169
+ time_increment = 1.0
170
+
171
+ while True:
172
+ num_requests = rng.poisson(self._rate)
173
+
174
+ if num_requests == 0:
175
+ yield time_tracker + time_increment
176
+ else:
177
+ inter_arrival_times = rng.exponential(1.0 / self._rate, num_requests)
178
+ logger.debug(
179
+ "Calculated new inter-arrival times for poisson process: {}",
180
+ inter_arrival_times,
181
+ )
182
+ arrival_time_tracker = time_tracker
183
+
184
+ for arrival_time in inter_arrival_times:
185
+ arrival_time_tracker += arrival_time
186
+
187
+ if arrival_time_tracker > time_tracker + time_increment:
188
+ logger.debug(
189
+ "Arrival time tracker: {} is greater than current time",
190
+ arrival_time_tracker,
191
+ )
192
+ break
193
+
194
+ yield arrival_time_tracker
195
+
196
+ time_tracker += time_increment # Move on to the next time period
@@ -0,0 +1,40 @@
1
+ from .injector import create_report, inject_data
2
+ from .progress import BenchmarkReportProgress
3
+ from .text import (
4
+ clean_text,
5
+ filter_text,
6
+ is_path,
7
+ is_path_like,
8
+ is_url,
9
+ load_text,
10
+ load_text_lines,
11
+ parse_text_objects,
12
+ split_lines_by_punctuation,
13
+ split_text,
14
+ )
15
+ from .transformers import (
16
+ load_transformers_dataset,
17
+ resolve_transformers_dataset,
18
+ resolve_transformers_dataset_column,
19
+ resolve_transformers_dataset_split,
20
+ )
21
+
22
+ __all__ = [
23
+ "BenchmarkReportProgress",
24
+ "clean_text",
25
+ "create_report",
26
+ "filter_text",
27
+ "inject_data",
28
+ "is_path",
29
+ "is_path_like",
30
+ "is_url",
31
+ "load_text",
32
+ "load_text_lines",
33
+ "load_transformers_dataset",
34
+ "parse_text_objects",
35
+ "resolve_transformers_dataset",
36
+ "resolve_transformers_dataset_column",
37
+ "resolve_transformers_dataset_split",
38
+ "split_lines_by_punctuation",
39
+ "split_text",
40
+ ]
@@ -0,0 +1,70 @@
1
+ from pathlib import Path
2
+ from typing import Union
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from guidellm.config import settings
7
+ from guidellm.utils.text import load_text
8
+
9
+ __all__ = ["create_report", "inject_data"]
10
+
11
+
12
+ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
13
+ """
14
+ Creates a report from the model and saves it to the output path.
15
+
16
+ :param model: the model to serialize and inject
17
+ :type model: BaseModel
18
+ :param output_path: the path, either a file or a directory,
19
+ to save the report to. If a directory, the report will be saved
20
+ as "report.html" inside of the directory.
21
+ :type output_path: str
22
+ :return: the path to the saved report
23
+ :rtype: str
24
+ """
25
+ if not isinstance(output_path, Path):
26
+ output_path = Path(output_path)
27
+
28
+ html_content = load_text(settings.report_generation.source)
29
+ report_content = inject_data(
30
+ model,
31
+ html_content,
32
+ settings.report_generation.report_html_match,
33
+ settings.report_generation.report_html_placeholder,
34
+ )
35
+
36
+ if not output_path.suffix:
37
+ # assume directory, save as report.html
38
+ output_path = output_path / "report.html"
39
+
40
+ output_path.parent.mkdir(parents=True, exist_ok=True)
41
+ output_path.write_text(report_content)
42
+
43
+ return output_path
44
+
45
+
46
+ def inject_data(
47
+ model: BaseModel,
48
+ html: str,
49
+ match: str,
50
+ placeholder: str,
51
+ ) -> str:
52
+ """
53
+ Injects the data from the model into the HTML while replacing the placeholder.
54
+
55
+ :param model: the model to serialize and inject
56
+ :type model: BaseModel
57
+ :param html: the html to inject the data into
58
+ :type html: str
59
+ :param match: the string to match in the html to find the placeholder
60
+ :type match: str
61
+ :param placeholder: the placeholder to replace with the model data
62
+ inside of the placeholder
63
+ :type placeholder: str
64
+ :return: the html with the model data injected
65
+ :rtype: str
66
+ """
67
+ model_str = model.json()
68
+ inject_str = match.replace(placeholder, model_str)
69
+
70
+ return html.replace(match, inject_str)