guidellm 0.1.0__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0.dev0.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0.dev0.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,334 @@
1
+ import time
2
+ import uuid
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator, Iterable
5
+ from pathlib import Path
6
+ from typing import (
7
+ Any,
8
+ Generic,
9
+ Literal,
10
+ Optional,
11
+ Union,
12
+ )
13
+
14
+ from pydantic import Field
15
+ from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003
16
+
17
+ from guidellm.backend import Backend, ResponseSummary
18
+ from guidellm.benchmark.aggregator import (
19
+ AggregatorT,
20
+ BenchmarkT,
21
+ GenerativeBenchmarkAggregator,
22
+ )
23
+ from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
24
+ from guidellm.benchmark.profile import Profile
25
+ from guidellm.objects import StandardBaseModel
26
+ from guidellm.request import (
27
+ GenerationRequest,
28
+ GenerativeRequestLoaderDescription,
29
+ RequestLoaderDescription,
30
+ )
31
+ from guidellm.scheduler import (
32
+ GenerativeRequestsWorker,
33
+ RequestsWorker,
34
+ RequestT,
35
+ ResponseT,
36
+ Scheduler,
37
+ SchedulerRequestResult,
38
+ SchedulingStrategy,
39
+ )
40
+
41
+ __all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
42
+
43
+
44
+ class BenchmarkerResult(
45
+ StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
46
+ ):
47
+ type_: Literal[
48
+ "run_start",
49
+ "run_complete",
50
+ "scheduler_start",
51
+ "scheduler_update",
52
+ "scheduler_complete",
53
+ "benchmark_compiled",
54
+ ]
55
+ start_time: float
56
+ end_number: int
57
+ profile: Profile
58
+ current_index: int
59
+ current_strategy: Optional[SchedulingStrategy] = None
60
+ current_aggregator: Optional[AggregatorT] = None
61
+ current_benchmark: Optional[BenchmarkT] = None
62
+ current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
63
+
64
+
65
+ class BenchmarkerStrategyLimits(StandardBaseModel):
66
+ requests_loader_size: Optional[int] = Field(
67
+ description="Size of the request loader.",
68
+ )
69
+ max_number_per_strategy: Optional[int] = Field(
70
+ description="Maximum number of requests to process per strategy.",
71
+ ge=0,
72
+ )
73
+ max_duration_per_strategy: Optional[float] = Field(
74
+ description="Maximum duration (in seconds) to process requests per strategy.",
75
+ ge=0,
76
+ )
77
+ warmup_percent_per_strategy: Optional[float] = Field(
78
+ description="Percentage of requests to use for warmup.",
79
+ ge=0,
80
+ le=1,
81
+ )
82
+ cooldown_percent_per_strategy: Optional[float] = Field(
83
+ description="Percentage of requests to use for cooldown.",
84
+ ge=0,
85
+ le=1,
86
+ )
87
+
88
+ @property
89
+ def max_number(self) -> Optional[int]:
90
+ if self.max_number_per_strategy is not None:
91
+ return self.max_number_per_strategy
92
+
93
+ if self.requests_loader_size is not None:
94
+ return self.requests_loader_size
95
+
96
+ return None
97
+
98
+ @property
99
+ def max_duration(self) -> Optional[float]:
100
+ return self.max_duration_per_strategy
101
+
102
+ @property
103
+ def warmup_number(self) -> Optional[int]:
104
+ if self.warmup_percent_per_strategy is None or self.max_number is None:
105
+ return None
106
+
107
+ return int(self.warmup_percent_per_strategy * self.max_number)
108
+
109
+ @property
110
+ def warmup_duration(self) -> Optional[float]:
111
+ if self.warmup_percent_per_strategy is None or self.max_duration is None:
112
+ return None
113
+
114
+ return self.warmup_percent_per_strategy * self.max_duration
115
+
116
+ @property
117
+ def cooldown_number(self) -> Optional[int]:
118
+ if self.cooldown_percent_per_strategy is None or self.max_number is None:
119
+ return None
120
+
121
+ return int(self.cooldown_percent_per_strategy * self.max_number)
122
+
123
+ @property
124
+ def cooldown_duration(self) -> Optional[float]:
125
+ if self.cooldown_percent_per_strategy is None or self.max_duration is None:
126
+ return None
127
+
128
+ return self.cooldown_percent_per_strategy * self.max_duration
129
+
130
+
131
+ class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
132
+ def __init__(
133
+ self,
134
+ worker: RequestsWorker[RequestT, ResponseT],
135
+ request_loader: Iterable[RequestT],
136
+ requests_loader_description: RequestLoaderDescription,
137
+ benchmark_save_extras: Optional[dict[str, Any]] = None,
138
+ ):
139
+ self.worker = worker
140
+ self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
141
+ worker=worker, request_loader=request_loader
142
+ )
143
+ self.requests_loader_description = requests_loader_description
144
+ self.benchmark_save_extras = benchmark_save_extras
145
+
146
+ async def run(
147
+ self,
148
+ profile: Profile,
149
+ max_number_per_strategy: Optional[int],
150
+ max_duration_per_strategy: Optional[float],
151
+ warmup_percent_per_strategy: Optional[float],
152
+ cooldown_percent_per_strategy: Optional[float],
153
+ ) -> AsyncGenerator[
154
+ BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
155
+ ]:
156
+ try:
157
+ requests_loader_size = len(self.scheduler.request_loader) # type: ignore[arg-type]
158
+ except Exception: # noqa: BLE001
159
+ requests_loader_size = None
160
+
161
+ strategy_limits = BenchmarkerStrategyLimits(
162
+ requests_loader_size=requests_loader_size,
163
+ max_number_per_strategy=max_number_per_strategy,
164
+ max_duration_per_strategy=max_duration_per_strategy,
165
+ warmup_percent_per_strategy=warmup_percent_per_strategy,
166
+ cooldown_percent_per_strategy=cooldown_percent_per_strategy,
167
+ )
168
+ start_time = time.time()
169
+ end_number = len(profile.strategy_types)
170
+ current_index = -1
171
+ run_id = str(uuid.uuid4())
172
+
173
+ yield BenchmarkerResult(
174
+ type_="run_start",
175
+ start_time=start_time,
176
+ end_number=end_number,
177
+ profile=profile,
178
+ current_index=current_index,
179
+ current_strategy=None,
180
+ current_aggregator=None,
181
+ current_benchmark=None,
182
+ current_result=None,
183
+ )
184
+
185
+ while scheduling_strategy := profile.next_strategy():
186
+ current_index += 1
187
+ aggregator = self.create_benchmark_aggregator(
188
+ run_id=run_id,
189
+ profile=profile,
190
+ strategy_index=current_index,
191
+ strategy=scheduling_strategy,
192
+ limits=strategy_limits,
193
+ )
194
+
195
+ async for result in self.scheduler.run(
196
+ scheduling_strategy=scheduling_strategy,
197
+ max_number=max_number_per_strategy,
198
+ max_duration=max_duration_per_strategy,
199
+ ):
200
+ if result.type_ == "run_start":
201
+ yield BenchmarkerResult(
202
+ type_="scheduler_start",
203
+ start_time=start_time,
204
+ end_number=end_number,
205
+ profile=profile,
206
+ current_index=current_index,
207
+ current_strategy=scheduling_strategy,
208
+ current_aggregator=aggregator,
209
+ current_benchmark=None,
210
+ current_result=None,
211
+ )
212
+ elif result.type_ == "run_complete":
213
+ yield BenchmarkerResult(
214
+ type_="scheduler_complete",
215
+ start_time=start_time,
216
+ end_number=end_number,
217
+ profile=profile,
218
+ current_index=current_index,
219
+ current_strategy=scheduling_strategy,
220
+ current_aggregator=aggregator,
221
+ current_benchmark=None,
222
+ current_result=None,
223
+ )
224
+ elif isinstance(result, SchedulerRequestResult):
225
+ aggregator.add_result(result)
226
+
227
+ yield BenchmarkerResult(
228
+ type_="scheduler_update",
229
+ start_time=start_time,
230
+ end_number=end_number,
231
+ profile=profile,
232
+ current_index=current_index,
233
+ current_strategy=scheduling_strategy,
234
+ current_aggregator=aggregator,
235
+ current_benchmark=None,
236
+ current_result=result,
237
+ )
238
+ else:
239
+ raise ValueError(f"Unexpected result type: {type(result)}")
240
+
241
+ benchmark: BenchmarkT = aggregator.compile()
242
+ profile.completed_strategy(
243
+ average_rate=benchmark.metrics.requests_per_second.successful.mean,
244
+ average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
245
+ )
246
+
247
+ yield BenchmarkerResult(
248
+ type_="benchmark_compiled",
249
+ start_time=start_time,
250
+ end_number=end_number,
251
+ profile=profile,
252
+ current_index=current_index,
253
+ current_strategy=scheduling_strategy,
254
+ current_aggregator=None,
255
+ current_benchmark=benchmark,
256
+ current_result=None,
257
+ )
258
+
259
+ yield BenchmarkerResult(
260
+ type_="run_complete",
261
+ start_time=start_time,
262
+ end_number=end_number,
263
+ profile=profile,
264
+ current_index=current_index,
265
+ current_strategy=None,
266
+ current_aggregator=None,
267
+ current_benchmark=None,
268
+ current_result=None,
269
+ )
270
+
271
+ @abstractmethod
272
+ def create_benchmark_aggregator(
273
+ self,
274
+ run_id: str,
275
+ profile: Profile,
276
+ strategy_index: int,
277
+ strategy: SchedulingStrategy,
278
+ limits: BenchmarkerStrategyLimits,
279
+ ) -> AggregatorT: ...
280
+
281
+
282
+ class GenerativeBenchmarker(
283
+ Benchmarker[
284
+ GenerativeBenchmarkAggregator,
285
+ GenerativeBenchmark,
286
+ GenerationRequest,
287
+ ResponseSummary,
288
+ ],
289
+ ):
290
+ def __init__(
291
+ self,
292
+ backend: Backend,
293
+ request_loader: Iterable[GenerationRequest],
294
+ request_loader_description: GenerativeRequestLoaderDescription,
295
+ benchmark_save_extras: Optional[dict[str, Any]] = None,
296
+ processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
297
+ processor_args: Optional[dict[str, Any]] = None,
298
+ ):
299
+ super().__init__(
300
+ worker=GenerativeRequestsWorker(backend),
301
+ request_loader=request_loader,
302
+ requests_loader_description=request_loader_description,
303
+ benchmark_save_extras=benchmark_save_extras,
304
+ )
305
+ self.processor = processor
306
+ self.processor_args = processor_args
307
+
308
+ def create_benchmark_aggregator(
309
+ self,
310
+ run_id: str,
311
+ profile: Profile,
312
+ strategy_index: int,
313
+ strategy: SchedulingStrategy,
314
+ limits: BenchmarkerStrategyLimits,
315
+ ) -> GenerativeBenchmarkAggregator:
316
+ return GenerativeBenchmarkAggregator(
317
+ run_id=run_id,
318
+ args=BenchmarkArgs(
319
+ profile=profile,
320
+ strategy_index=strategy_index,
321
+ strategy=strategy,
322
+ max_number=limits.max_number,
323
+ max_duration=limits.max_duration,
324
+ warmup_number=limits.warmup_number,
325
+ warmup_duration=limits.warmup_duration,
326
+ cooldown_number=limits.cooldown_number,
327
+ cooldown_duration=limits.cooldown_duration,
328
+ ),
329
+ worker_description=self.worker.description, # type: ignore[arg-type]
330
+ request_loader_description=self.requests_loader_description, # type: ignore[arg-type]
331
+ extras=self.benchmark_save_extras or {},
332
+ processor=self.processor,
333
+ processor_args=self.processor_args,
334
+ )
@@ -0,0 +1,141 @@
1
+ from collections.abc import Iterable
2
+ from pathlib import Path
3
+ from typing import Any, Literal, Optional, Union
4
+
5
+ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
6
+ from transformers import ( # type: ignore[import]
7
+ PreTrainedTokenizerBase,
8
+ )
9
+
10
+ from guidellm.backend import Backend, BackendType
11
+ from guidellm.benchmark.benchmarker import GenerativeBenchmarker
12
+ from guidellm.benchmark.output import (
13
+ GenerativeBenchmarksConsole,
14
+ GenerativeBenchmarksReport,
15
+ )
16
+ from guidellm.benchmark.profile import ProfileType, create_profile
17
+ from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
+ from guidellm.request import GenerativeRequestLoader
19
+ from guidellm.scheduler import StrategyType
20
+
21
+
22
+ async def benchmark_generative_text(
23
+ target: str,
24
+ backend_type: BackendType,
25
+ backend_args: Optional[dict[str, Any]],
26
+ model: Optional[str],
27
+ processor: Optional[Optional[Union[str, Path, PreTrainedTokenizerBase]]],
28
+ processor_args: Optional[dict[str, Any]],
29
+ data: Union[
30
+ str,
31
+ Path,
32
+ Iterable[Union[str, dict[str, Any]]],
33
+ Dataset,
34
+ DatasetDict,
35
+ IterableDataset,
36
+ IterableDatasetDict,
37
+ ],
38
+ data_args: Optional[dict[str, Any]],
39
+ data_sampler: Optional[Literal["random"]],
40
+ rate_type: Union[StrategyType, ProfileType],
41
+ rate: Optional[Union[int, float, list[Union[int, float]]]],
42
+ max_seconds: Optional[float],
43
+ max_requests: Optional[int],
44
+ warmup_percent: Optional[float],
45
+ cooldown_percent: Optional[float],
46
+ show_progress: bool,
47
+ show_progress_scheduler_stats: bool,
48
+ output_console: bool,
49
+ output_path: Optional[Union[str, Path]],
50
+ output_extras: Optional[dict[str, Any]],
51
+ output_sampling: Optional[int],
52
+ random_seed: int,
53
+ ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
54
+ console = GenerativeBenchmarksConsole(enabled=show_progress)
55
+ console.print_line("Creating backend...")
56
+ backend = Backend.create(
57
+ backend_type, target=target, model=model, **(backend_args or {})
58
+ )
59
+ await backend.validate()
60
+ console.print_line(
61
+ f"Backend {backend_type} connected to {target} for model {backend.model}."
62
+ )
63
+
64
+ if processor is None:
65
+ processor = backend.model
66
+
67
+ console.print_line("Creating request loader...")
68
+ request_loader = GenerativeRequestLoader(
69
+ data=data,
70
+ data_args=data_args,
71
+ processor=processor,
72
+ processor_args=processor_args,
73
+ shuffle=data_sampler == "random",
74
+ iter_type=(
75
+ "finite" # assume a finite dataset is our limit
76
+ if max_requests is None and max_seconds is None
77
+ else "infinite" # default to infinite so we don't run out of data
78
+ ),
79
+ random_seed=random_seed,
80
+ )
81
+ unique_requests = request_loader.num_unique_items(raise_err=False)
82
+ console.print_line(
83
+ f"Created loader with {unique_requests} unique requests from {data}.\n\n"
84
+ if unique_requests > 0
85
+ else f"Created loader with unknown number unique requests from {data}.\n\n"
86
+ )
87
+
88
+ profile = create_profile(rate_type=rate_type, rate=rate)
89
+ benchmarker = GenerativeBenchmarker(
90
+ backend=backend,
91
+ request_loader=request_loader,
92
+ request_loader_description=request_loader.description,
93
+ benchmark_save_extras=output_extras,
94
+ processor=processor,
95
+ processor_args=processor_args,
96
+ )
97
+ progress = (
98
+ GenerativeTextBenchmarkerProgressDisplay(
99
+ display_scheduler_stats=show_progress_scheduler_stats
100
+ )
101
+ if show_progress
102
+ else None
103
+ )
104
+ report = GenerativeBenchmarksReport()
105
+
106
+ async for result in benchmarker.run(
107
+ profile=profile,
108
+ max_number_per_strategy=max_requests,
109
+ max_duration_per_strategy=max_seconds,
110
+ warmup_percent_per_strategy=warmup_percent,
111
+ cooldown_percent_per_strategy=cooldown_percent,
112
+ ):
113
+ if progress:
114
+ progress.update(result)
115
+
116
+ if result.type_ == "benchmark_compiled":
117
+ if result.current_benchmark is None:
118
+ raise ValueError("Current benchmark is None")
119
+ report.benchmarks.append(
120
+ result.current_benchmark.set_sample_size(output_sampling)
121
+ )
122
+
123
+ if output_console:
124
+ orig_enabled = console.enabled
125
+ console.enabled = True
126
+ console.benchmarks = report.benchmarks
127
+ console.print_benchmarks_metadata()
128
+ console.print_benchmarks_info()
129
+ console.print_benchmarks_stats()
130
+ console.enabled = orig_enabled
131
+
132
+ if output_path:
133
+ console.print_line("\nSaving benchmarks report...")
134
+ saved_path = report.save_file(output_path)
135
+ console.print_line(f"Benchmarks report saved to {saved_path}")
136
+ else:
137
+ saved_path = None
138
+
139
+ console.print_line("\nBenchmarking complete.")
140
+
141
+ return report, saved_path