guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -1,334 +1,167 @@
1
- import time
2
- import uuid
3
- from abc import ABC, abstractmethod
4
- from collections.abc import AsyncGenerator, Iterable
5
- from pathlib import Path
6
- from typing import (
7
- Any,
8
- Generic,
9
- Literal,
10
- Optional,
11
- Union,
12
- )
1
+ """
2
+ Benchmark execution orchestration and lifecycle management.
3
+
4
+ Provides the core benchmarking engine that coordinates request scheduling,
5
+ data aggregation, and result compilation across different execution strategies
6
+ and environments. The Benchmarker acts as the primary workflow coordinator,
7
+ managing the complete benchmark lifecycle from request submission through
8
+ result compilation while supporting thread-safe singleton operations.
9
+ """
13
10
 
14
- from pydantic import Field
15
- from transformers import PreTrainedTokenizerBase # type: ignore # noqa: PGH003
11
+ from __future__ import annotations
16
12
 
17
- from guidellm.backend import Backend, ResponseSummary
18
- from guidellm.benchmark.aggregator import (
19
- AggregatorT,
13
+ import uuid
14
+ from abc import ABC
15
+ from collections.abc import AsyncIterator, Iterable
16
+ from typing import Generic
17
+
18
+ from guidellm.benchmark.profile import Profile
19
+ from guidellm.benchmark.progress import BenchmarkerProgress
20
+ from guidellm.benchmark.schemas import (
21
+ BenchmarkerArgs,
20
22
  BenchmarkT,
21
- GenerativeBenchmarkAggregator,
23
+ EstimatedBenchmarkState,
22
24
  )
23
- from guidellm.benchmark.benchmark import BenchmarkArgs, GenerativeBenchmark
24
- from guidellm.benchmark.profile import Profile
25
- from guidellm.objects import StandardBaseModel
26
- from guidellm.request import (
27
- GenerationRequest,
28
- GenerativeRequestLoaderDescription,
29
- RequestLoaderDescription,
25
+ from guidellm.logger import logger
26
+ from guidellm.scheduler import (
27
+ BackendInterface,
28
+ Environment,
30
29
  RequestT,
31
30
  ResponseT,
32
- )
33
- from guidellm.scheduler import (
34
- GenerativeRequestsWorker,
35
- RequestsWorker,
36
31
  Scheduler,
37
- SchedulerRequestResult,
38
- SchedulingStrategy,
39
32
  )
33
+ from guidellm.utils import ThreadSafeSingletonMixin
40
34
 
41
- __all__ = ["Benchmarker", "BenchmarkerResult", "GenerativeBenchmarker"]
35
+ __all__ = ["Benchmarker"]
42
36
 
43
37
 
44
- class BenchmarkerResult(
45
- StandardBaseModel, Generic[AggregatorT, BenchmarkT, RequestT, ResponseT]
38
+ class Benchmarker(
39
+ Generic[BenchmarkT, RequestT, ResponseT],
40
+ ABC,
41
+ ThreadSafeSingletonMixin,
46
42
  ):
47
- type_: Literal[
48
- "run_start",
49
- "run_complete",
50
- "scheduler_start",
51
- "scheduler_update",
52
- "scheduler_complete",
53
- "benchmark_compiled",
54
- ]
55
- start_time: float
56
- end_number: int
57
- profile: Profile
58
- current_index: int
59
- current_strategy: Optional[SchedulingStrategy] = None
60
- current_aggregator: Optional[AggregatorT] = None
61
- current_benchmark: Optional[BenchmarkT] = None
62
- current_result: Optional[SchedulerRequestResult[RequestT, ResponseT]] = None
63
-
64
-
65
- class BenchmarkerStrategyLimits(StandardBaseModel):
66
- requests_loader_size: Optional[int] = Field(
67
- description="Size of the request loader.",
68
- )
69
- max_number_per_strategy: Optional[int] = Field(
70
- description="Maximum number of requests to process per strategy.",
71
- ge=0,
72
- )
73
- max_duration_per_strategy: Optional[float] = Field(
74
- description="Maximum duration (in seconds) to process requests per strategy.",
75
- ge=0,
76
- )
77
- warmup_percent_per_strategy: Optional[float] = Field(
78
- description="Percentage of requests to use for warmup.",
79
- ge=0,
80
- le=1,
81
- )
82
- cooldown_percent_per_strategy: Optional[float] = Field(
83
- description="Percentage of requests to use for cooldown.",
84
- ge=0,
85
- le=1,
86
- )
87
-
88
- @property
89
- def max_number(self) -> Optional[int]:
90
- if self.max_number_per_strategy is not None:
91
- return self.max_number_per_strategy
92
-
93
- if self.requests_loader_size is not None:
94
- return self.requests_loader_size
95
-
96
- return None
97
-
98
- @property
99
- def max_duration(self) -> Optional[float]:
100
- return self.max_duration_per_strategy
101
-
102
- @property
103
- def warmup_number(self) -> Optional[int]:
104
- if self.warmup_percent_per_strategy is None or self.max_number is None:
105
- return None
106
-
107
- return int(self.warmup_percent_per_strategy * self.max_number)
108
-
109
- @property
110
- def warmup_duration(self) -> Optional[float]:
111
- if self.warmup_percent_per_strategy is None or self.max_duration is None:
112
- return None
113
-
114
- return self.warmup_percent_per_strategy * self.max_duration
115
-
116
- @property
117
- def cooldown_number(self) -> Optional[int]:
118
- if self.cooldown_percent_per_strategy is None or self.max_number is None:
119
- return None
120
-
121
- return int(self.cooldown_percent_per_strategy * self.max_number)
43
+ """
44
+ Abstract benchmark orchestrator for request processing workflows.
122
45
 
123
- @property
124
- def cooldown_duration(self) -> Optional[float]:
125
- if self.cooldown_percent_per_strategy is None or self.max_duration is None:
126
- return None
127
-
128
- return self.cooldown_percent_per_strategy * self.max_duration
129
-
130
-
131
- class Benchmarker(Generic[AggregatorT, BenchmarkT, RequestT, ResponseT], ABC):
132
- def __init__(
133
- self,
134
- worker: RequestsWorker[RequestT, ResponseT],
135
- request_loader: Iterable[RequestT],
136
- requests_loader_description: RequestLoaderDescription,
137
- benchmark_save_extras: Optional[dict[str, Any]] = None,
138
- ):
139
- self.worker = worker
140
- self.scheduler: Scheduler[RequestT, ResponseT] = Scheduler(
141
- worker=worker, request_loader=request_loader
142
- )
143
- self.requests_loader_description = requests_loader_description
144
- self.benchmark_save_extras = benchmark_save_extras
46
+ Coordinates execution of benchmarking runs across different scheduling
47
+ strategies, aggregating metrics and compiling results. Manages the complete
48
+ benchmark lifecycle from request submission through result compilation while
49
+ implementing thread-safe singleton pattern to ensure consistent state across
50
+ concurrent operations.
51
+ """
145
52
 
146
53
  async def run(
147
54
  self,
55
+ benchmark_class: type[BenchmarkT],
56
+ requests: Iterable[RequestT | Iterable[RequestT | tuple[RequestT, float]]],
57
+ backend: BackendInterface[RequestT, ResponseT],
148
58
  profile: Profile,
149
- max_number_per_strategy: Optional[int],
150
- max_duration_per_strategy: Optional[float],
151
- warmup_percent_per_strategy: Optional[float],
152
- cooldown_percent_per_strategy: Optional[float],
153
- ) -> AsyncGenerator[
154
- BenchmarkerResult[AggregatorT, BenchmarkT, RequestT, ResponseT], None
155
- ]:
156
- try:
157
- requests_loader_size = len(self.scheduler.request_loader) # type: ignore[arg-type]
158
- except Exception: # noqa: BLE001
159
- requests_loader_size = None
160
-
161
- strategy_limits = BenchmarkerStrategyLimits(
162
- requests_loader_size=requests_loader_size,
163
- max_number_per_strategy=max_number_per_strategy,
164
- max_duration_per_strategy=max_duration_per_strategy,
165
- warmup_percent_per_strategy=warmup_percent_per_strategy,
166
- cooldown_percent_per_strategy=cooldown_percent_per_strategy,
167
- )
168
- start_time = time.time()
169
- end_number = len(profile.strategy_types)
170
- current_index = -1
171
- run_id = str(uuid.uuid4())
172
-
173
- yield BenchmarkerResult(
174
- type_="run_start",
175
- start_time=start_time,
176
- end_number=end_number,
177
- profile=profile,
178
- current_index=current_index,
179
- current_strategy=None,
180
- current_aggregator=None,
181
- current_benchmark=None,
182
- current_result=None,
183
- )
184
-
185
- while scheduling_strategy := profile.next_strategy():
186
- current_index += 1
187
- aggregator = self.create_benchmark_aggregator(
188
- run_id=run_id,
189
- profile=profile,
190
- strategy_index=current_index,
191
- strategy=scheduling_strategy,
192
- limits=strategy_limits,
193
- )
194
-
195
- async for result in self.scheduler.run(
196
- scheduling_strategy=scheduling_strategy,
197
- max_number=max_number_per_strategy,
198
- max_duration=max_duration_per_strategy,
199
- ):
200
- if result.type_ == "run_start":
201
- yield BenchmarkerResult(
202
- type_="scheduler_start",
203
- start_time=start_time,
204
- end_number=end_number,
205
- profile=profile,
206
- current_index=current_index,
207
- current_strategy=scheduling_strategy,
208
- current_aggregator=aggregator,
209
- current_benchmark=None,
210
- current_result=None,
211
- )
212
- elif result.type_ == "run_complete":
213
- yield BenchmarkerResult(
214
- type_="scheduler_complete",
215
- start_time=start_time,
216
- end_number=end_number,
217
- profile=profile,
218
- current_index=current_index,
219
- current_strategy=scheduling_strategy,
220
- current_aggregator=aggregator,
221
- current_benchmark=None,
222
- current_result=None,
223
- )
224
- elif isinstance(result, SchedulerRequestResult):
225
- aggregator.add_result(result)
226
-
227
- yield BenchmarkerResult(
228
- type_="scheduler_update",
229
- start_time=start_time,
230
- end_number=end_number,
231
- profile=profile,
232
- current_index=current_index,
233
- current_strategy=scheduling_strategy,
234
- current_aggregator=aggregator,
235
- current_benchmark=None,
236
- current_result=result,
237
- )
238
- else:
239
- raise ValueError(f"Unexpected result type: {type(result)}")
240
-
241
- benchmark: BenchmarkT = aggregator.compile()
242
- profile.completed_strategy(
243
- average_rate=benchmark.metrics.requests_per_second.successful.mean,
244
- average_concurrency=benchmark.metrics.request_concurrency.successful.mean,
245
- )
246
-
247
- yield BenchmarkerResult(
248
- type_="benchmark_compiled",
249
- start_time=start_time,
250
- end_number=end_number,
251
- profile=profile,
252
- current_index=current_index,
253
- current_strategy=scheduling_strategy,
254
- current_aggregator=None,
255
- current_benchmark=benchmark,
256
- current_result=None,
257
- )
258
-
259
- yield BenchmarkerResult(
260
- type_="run_complete",
261
- start_time=start_time,
262
- end_number=end_number,
263
- profile=profile,
264
- current_index=current_index,
265
- current_strategy=None,
266
- current_aggregator=None,
267
- current_benchmark=None,
268
- current_result=None,
269
- )
270
-
271
- @abstractmethod
272
- def create_benchmark_aggregator(
273
- self,
274
- run_id: str,
275
- profile: Profile,
276
- strategy_index: int,
277
- strategy: SchedulingStrategy,
278
- limits: BenchmarkerStrategyLimits,
279
- ) -> AggregatorT: ...
280
-
281
-
282
- class GenerativeBenchmarker(
283
- Benchmarker[
284
- GenerativeBenchmarkAggregator,
285
- GenerativeBenchmark,
286
- GenerationRequest,
287
- ResponseSummary,
288
- ],
289
- ):
290
- def __init__(
291
- self,
292
- backend: Backend,
293
- request_loader: Iterable[GenerationRequest],
294
- request_loader_description: GenerativeRequestLoaderDescription,
295
- benchmark_save_extras: Optional[dict[str, Any]] = None,
296
- processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None,
297
- processor_args: Optional[dict[str, Any]] = None,
298
- ):
299
- super().__init__(
300
- worker=GenerativeRequestsWorker(backend),
301
- request_loader=request_loader,
302
- requests_loader_description=request_loader_description,
303
- benchmark_save_extras=benchmark_save_extras,
304
- )
305
- self.processor = processor
306
- self.processor_args = processor_args
307
-
308
- def create_benchmark_aggregator(
309
- self,
310
- run_id: str,
311
- profile: Profile,
312
- strategy_index: int,
313
- strategy: SchedulingStrategy,
314
- limits: BenchmarkerStrategyLimits,
315
- ) -> GenerativeBenchmarkAggregator:
316
- return GenerativeBenchmarkAggregator(
317
- run_id=run_id,
318
- args=BenchmarkArgs(
319
- profile=profile,
320
- strategy_index=strategy_index,
321
- strategy=strategy,
322
- max_number=limits.max_number,
323
- max_duration=limits.max_duration,
324
- warmup_number=limits.warmup_number,
325
- warmup_duration=limits.warmup_duration,
326
- cooldown_number=limits.cooldown_number,
327
- cooldown_duration=limits.cooldown_duration,
328
- ),
329
- worker_description=self.worker.description, # type: ignore[arg-type]
330
- request_loader_description=self.requests_loader_description, # type: ignore[arg-type]
331
- extras=self.benchmark_save_extras or {},
332
- processor=self.processor,
333
- processor_args=self.processor_args,
334
- )
59
+ environment: Environment,
60
+ data: list[Any],
61
+ progress: BenchmarkerProgress[BenchmarkT] | None = None,
62
+ sample_requests: int | None = 20,
63
+ warmup: float | None = None,
64
+ cooldown: float | None = None,
65
+ prefer_response_metrics: bool = True,
66
+ ) -> AsyncIterator[BenchmarkT]:
67
+ """
68
+ Execute benchmark runs across multiple scheduling strategies.
69
+
70
+ Orchestrates the complete benchmark workflow by iterating through scheduling
71
+ strategies from the profile, executing requests through the scheduler,
72
+ aggregating metrics, and compiling final benchmark results.
73
+
74
+ :param benchmark_class: Class for constructing final benchmark objects
75
+ :param requests: Request datasets for processing across strategies
76
+ :param backend: Backend interface for request processing
77
+ :param profile: Benchmark profile defining strategies and constraints
78
+ :param environment: Execution environment for coordination
79
+ :param progress: Optional progress tracker for benchmark lifecycle events
80
+ :param sample_requests: Number of sample requests to use for estimation
81
+ :param warmup: Optional warmup duration in seconds before benchmarking
82
+ :param cooldown: Optional cooldown duration in seconds after benchmarking
83
+ :param prefer_response_metrics: Whether to prefer response-based metrics over
84
+ request-based metrics
85
+ :yield: Compiled benchmark results for each strategy execution
86
+ :raises Exception: If benchmark execution or compilation fails
87
+ """
88
+ with self.thread_lock:
89
+ if progress:
90
+ await progress.on_initialize(profile)
91
+
92
+ run_id = str(uuid.uuid4())
93
+ strategies_generator = profile.strategies_generator()
94
+ strategy, constraints = next(strategies_generator)
95
+
96
+ while strategy is not None:
97
+ if progress:
98
+ await progress.on_benchmark_start(strategy)
99
+
100
+ args = BenchmarkerArgs(
101
+ run_id=run_id,
102
+ run_index=len(profile.completed_strategies),
103
+ sample_requests=sample_requests,
104
+ warmup=warmup,
105
+ cooldown=cooldown,
106
+ prefer_response_metrics=prefer_response_metrics,
107
+ )
108
+ estimated_state = EstimatedBenchmarkState()
109
+ scheduler_state = None
110
+ scheduler: Scheduler[RequestT, ResponseT] = Scheduler()
111
+
112
+ async for (
113
+ response,
114
+ request,
115
+ request_info,
116
+ scheduler_state,
117
+ ) in scheduler.run(
118
+ requests=requests,
119
+ backend=backend,
120
+ strategy=strategy,
121
+ startup_duration=warmup if warmup and warmup >= 1 else 0.0,
122
+ env=environment,
123
+ **constraints or {},
124
+ ):
125
+ try:
126
+ benchmark_class.update_estimate(
127
+ args,
128
+ estimated_state,
129
+ response,
130
+ request,
131
+ request_info,
132
+ scheduler_state,
133
+ )
134
+ if progress:
135
+ await progress.on_benchmark_update(
136
+ estimated_state, scheduler_state
137
+ )
138
+ except Exception as err: # noqa: BLE001
139
+ logger.error(
140
+ f"Error updating benchmark estimate/progress: {err}"
141
+ )
142
+
143
+ benchmark = benchmark_class.compile(
144
+ args=args,
145
+ estimated_state=estimated_state,
146
+ scheduler_state=scheduler_state,
147
+ profile=profile,
148
+ requests=requests,
149
+ backend=backend,
150
+ environment=environment,
151
+ strategy=strategy,
152
+ constraints=constraints,
153
+ data=data,
154
+ )
155
+ if progress:
156
+ await progress.on_benchmark_complete(benchmark)
157
+
158
+ yield benchmark
159
+
160
+ try:
161
+ strategy, constraints = strategies_generator.send(benchmark)
162
+ except StopIteration:
163
+ strategy = None
164
+ constraints = None
165
+
166
+ if progress:
167
+ await progress.on_finalize()