guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,428 @@
1
+ """
2
+ Response handlers for processing API responses from different generation backends.
3
+
4
+ Provides a pluggable system for handling responses from language model backends,
5
+ supporting both streaming and non-streaming responses. Each handler implements the
6
+ GenerationResponseHandler protocol to parse API responses, extract usage metrics,
7
+ and convert them into standardized GenerationResponse objects.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any, Protocol, cast
13
+
14
+ from guidellm.schemas import GenerationRequest, GenerationResponse, UsageMetrics
15
+ from guidellm.utils import RegistryMixin, json
16
+
17
+ __all__ = [
18
+ "AudioResponseHandler",
19
+ "ChatCompletionsResponseHandler",
20
+ "GenerationResponseHandler",
21
+ "GenerationResponseHandlerFactory",
22
+ "TextCompletionsResponseHandler",
23
+ ]
24
+
25
+
26
+ class GenerationResponseHandler(Protocol):
27
+ """
28
+ Protocol for handling generation API responses.
29
+
30
+ Defines the interface for processing both streaming and non-streaming responses
31
+ from backend APIs, converting them into standardized GenerationResponse objects
32
+ with consistent metrics extraction.
33
+ """
34
+
35
+ def compile_non_streaming(
36
+ self, request: GenerationRequest, response: Any
37
+ ) -> GenerationResponse:
38
+ """
39
+ Process a complete non-streaming API response.
40
+
41
+ :param request: Original generation request
42
+ :param response: Raw API response data from the backend
43
+ :return: Standardized GenerationResponse with extracted metrics
44
+ """
45
+ ...
46
+
47
+ def add_streaming_line(self, line: str) -> int | None:
48
+ """
49
+ Process a single line from a streaming response.
50
+
51
+ :param line: Raw line from the streaming response
52
+ :return: 1 if content was updated, 0 if line was ignored, None if done
53
+ """
54
+ ...
55
+
56
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
57
+ """
58
+ Compile accumulated streaming data into a final response.
59
+
60
+ :param request: Original generation request
61
+ :return: Standardized GenerationResponse with extracted metrics
62
+ """
63
+ ...
64
+
65
+
66
+ class GenerationResponseHandlerFactory(RegistryMixin[type[GenerationResponseHandler]]):
67
+ """
68
+ Factory for registering and creating response handlers by backend type.
69
+
70
+ Registry-based system for associating handler classes with specific backend API
71
+ types, enabling automatic selection of the appropriate handler for processing
72
+ responses from different generation services.
73
+ """
74
+
75
+ @classmethod
76
+ def create(
77
+ cls,
78
+ request_type: str,
79
+ handler_overrides: dict[str, type[GenerationResponseHandler]] | None = None,
80
+ ) -> GenerationResponseHandler:
81
+ """
82
+ Create a response handler class for the given request type.
83
+
84
+ :param request_type: The type of generation request (e.g., "text_completions")
85
+ :param handler_overrides: Optional mapping of request types to handler classes
86
+ to override the default registry by checking first and then falling back
87
+ to the registered handlers.
88
+ :return: The corresponding instantiated GenerationResponseHandler
89
+ :raises ValueError: When no handler is registered for the request type
90
+ """
91
+ if handler_overrides and request_type in handler_overrides:
92
+ return handler_overrides[request_type]()
93
+
94
+ handler_cls = cls.get_registered_object(request_type)
95
+ if not handler_cls:
96
+ raise ValueError(
97
+ f"No response handler registered for type '{request_type}'."
98
+ )
99
+
100
+ return handler_cls()
101
+
102
+
103
+ @GenerationResponseHandlerFactory.register("text_completions")
104
+ class TextCompletionsResponseHandler(GenerationResponseHandler):
105
+ """
106
+ Response handler for OpenAI-style text completion endpoints.
107
+
108
+ Processes responses from text completion APIs that return generated text in the
109
+ 'choices' array with 'text' fields. Handles both streaming and non-streaming
110
+ responses, extracting usage metrics for input and output tokens.
111
+
112
+ Example:
113
+ ::
114
+ handler = TextCompletionsResponseHandler()
115
+ response = handler.compile_non_streaming(request, api_response)
116
+ """
117
+
118
+ def __init__(self):
119
+ """
120
+ Initialize the text completions response handler.
121
+
122
+ Sets up internal state for accumulating streaming response data including
123
+ text chunks and usage metrics.
124
+ """
125
+ self.streaming_texts: list[str] = []
126
+ self.streaming_usage: dict[str, int | dict[str, int]] | None = None
127
+ self.streaming_response_id: str | None = None
128
+
129
+ def compile_non_streaming(
130
+ self, request: GenerationRequest, response: dict
131
+ ) -> GenerationResponse:
132
+ """
133
+ Process a complete text completion response.
134
+
135
+ :param request: Original generation request
136
+ :param response: Complete API response containing choices and usage data
137
+ :return: Standardized GenerationResponse with extracted text and metrics
138
+ """
139
+ choices, usage = self.extract_choices_and_usage(response)
140
+ choice = choices[0] if choices else {}
141
+ text = choice.get("text", "")
142
+ input_metrics, output_metrics = self.extract_metrics(usage, text)
143
+
144
+ return GenerationResponse(
145
+ request_id=request.request_id,
146
+ request_args=str(
147
+ request.arguments.model_dump() if request.arguments else None
148
+ ),
149
+ response_id=response.get("id"), # use vLLM ID if available
150
+ text=text,
151
+ input_metrics=input_metrics,
152
+ output_metrics=output_metrics,
153
+ )
154
+
155
+ def add_streaming_line(self, line: str) -> int | None:
156
+ """
157
+ Process a single line from a text completion streaming response.
158
+
159
+ Parses Server-Sent Events (SSE) formatted lines, extracting text content
160
+ and usage metrics. Accumulates text chunks for final response compilation.
161
+
162
+ :param line: Raw SSE line from the streaming response
163
+ :return: 1 if text content was extracted, 0 if line ignored, None if done
164
+ """
165
+ if not (data := self.extract_line_data(line)):
166
+ return None if data is None else 0
167
+
168
+ if "id" in data and self.streaming_response_id is None:
169
+ self.streaming_response_id = data["id"]
170
+
171
+ updated = False
172
+ choices, usage = self.extract_choices_and_usage(data)
173
+ choice = choices[0] if choices else {}
174
+
175
+ if choices and (text := choice.get("text")):
176
+ self.streaming_texts.append(text)
177
+ updated = True
178
+
179
+ if usage:
180
+ self.streaming_usage = usage
181
+
182
+ return 1 if updated else 0
183
+
184
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
185
+ """
186
+ Compile accumulated streaming text chunks into a final response.
187
+
188
+ :param request: Original generation request
189
+ :return: Standardized GenerationResponse with concatenated text and metrics
190
+ """
191
+ text = "".join(self.streaming_texts)
192
+ input_metrics, output_metrics = self.extract_metrics(self.streaming_usage, text)
193
+
194
+ return GenerationResponse(
195
+ request_id=request.request_id,
196
+ request_args=str(
197
+ request.arguments.model_dump() if request.arguments else None
198
+ ),
199
+ response_id=self.streaming_response_id, # use vLLM ID if available
200
+ text=text,
201
+ input_metrics=input_metrics,
202
+ output_metrics=output_metrics,
203
+ )
204
+
205
+ def extract_line_data(self, line: str) -> dict[str, Any] | None:
206
+ """
207
+ Extract JSON data from a streaming response line.
208
+
209
+ :param line: Raw line from the streaming response
210
+ :return: Parsed JSON data as dictionary, or None if line indicates completion
211
+ """
212
+ if line == "data: [DONE]":
213
+ return None
214
+
215
+ if not line or not (line := line.strip()) or not line.startswith("data:"):
216
+ return {}
217
+
218
+ line = line[len("data:") :].strip()
219
+
220
+ return json.loads(line)
221
+
222
+ def extract_choices_and_usage(
223
+ self, response: dict
224
+ ) -> tuple[list[dict], dict[str, int | dict[str, int]]]:
225
+ """
226
+ Extract choices and usage data from the API response.
227
+
228
+ :param response: Complete API response containing choices and usage data
229
+ :return: Tuple of choices list and usage dictionary
230
+ """
231
+ return response.get("choices", []), response.get("usage", {})
232
+
233
+ def extract_metrics(
234
+ self, usage: dict[str, int | dict[str, int]] | None, text: str
235
+ ) -> tuple[UsageMetrics, UsageMetrics]:
236
+ """
237
+ Extract input and output usage metrics from API response usage data.
238
+
239
+ :param usage: Usage data dictionary from API response
240
+ :param text: Generated text for calculating word and character counts
241
+ :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
242
+ """
243
+ if not usage:
244
+ return UsageMetrics(), UsageMetrics(
245
+ text_words=len(text.split()) if text else 0,
246
+ text_characters=len(text) if text else 0,
247
+ )
248
+
249
+ input_details: dict[str, int] = cast(
250
+ "dict[str, int]", usage.get("prompt_tokens_details", {}) or {}
251
+ )
252
+ output_details: dict[str, int] = cast(
253
+ "dict[str, int]", usage.get("completion_tokens_details", {}) or {}
254
+ )
255
+ usage_metrics: dict[str, int] = cast("dict[str, int]", usage)
256
+
257
+ return UsageMetrics(
258
+ text_tokens=(
259
+ input_details.get("prompt_tokens")
260
+ or usage_metrics.get("prompt_tokens")
261
+ or 0
262
+ ),
263
+ image_tokens=input_details.get("image_tokens"),
264
+ video_tokens=input_details.get("video_tokens"),
265
+ audio_tokens=input_details.get("audio_tokens"),
266
+ audio_seconds=input_details.get("seconds"),
267
+ ), UsageMetrics(
268
+ text_tokens=(
269
+ output_details.get("completion_tokens")
270
+ or usage_metrics.get("completion_tokens")
271
+ or 0
272
+ ),
273
+ text_words=len(text.split()) if text else 0,
274
+ text_characters=len(text) if text else 0,
275
+ image_tokens=output_details.get("image_tokens"),
276
+ video_tokens=output_details.get("video_tokens"),
277
+ audio_tokens=output_details.get("audio_tokens"),
278
+ audio_seconds=output_details.get("seconds"),
279
+ )
280
+
281
+
282
+ @GenerationResponseHandlerFactory.register("chat_completions")
283
+ class ChatCompletionsResponseHandler(TextCompletionsResponseHandler):
284
+ """
285
+ Response handler for OpenAI-style chat completion endpoints.
286
+
287
+ Extends TextCompletionsResponseHandler to handle chat completion responses where
288
+ generated text is nested within message objects in the choices array. Processes
289
+ both streaming and non-streaming chat completion responses.
290
+ """
291
+
292
+ def compile_non_streaming(
293
+ self, request: GenerationRequest, response: dict
294
+ ) -> GenerationResponse:
295
+ """
296
+ Process a complete chat completion response.
297
+
298
+ Extracts content from the message object within choices, handling the nested
299
+ structure specific to chat completion endpoints.
300
+
301
+ :param request: Original generation request
302
+ :param response: Complete API response containing choices and usage data
303
+ :return: Standardized GenerationResponse with extracted content and metrics
304
+ """
305
+ choices, usage = self.extract_choices_and_usage(response)
306
+ choice: dict[str, dict] = choices[0] if choices else {}
307
+ text = choice.get("message", {}).get("content", "")
308
+ input_metrics, output_metrics = self.extract_metrics(usage, text)
309
+
310
+ return GenerationResponse(
311
+ request_id=request.request_id,
312
+ request_args=str(
313
+ request.arguments.model_dump() if request.arguments else None
314
+ ),
315
+ response_id=response.get("id"), # use vLLM ID if available
316
+ text=text,
317
+ input_metrics=input_metrics,
318
+ output_metrics=output_metrics,
319
+ )
320
+
321
+ def add_streaming_line(self, line: str) -> int | None:
322
+ """
323
+ Process a single line from a chat completion streaming response.
324
+
325
+ Handles the chat completion specific delta structure where content is nested
326
+ within delta objects in the streaming response chunks.
327
+
328
+ :param line: Raw SSE line from the streaming response
329
+ :return: 1 if content was extracted, 0 if line ignored, None if done
330
+ """
331
+ if not (data := self.extract_line_data(line)):
332
+ return None if data is None else 0
333
+
334
+ if "id" in data and self.streaming_response_id is None:
335
+ self.streaming_response_id = data["id"]
336
+
337
+ updated = False
338
+ choices, usage = self.extract_choices_and_usage(data)
339
+ choice: dict[str, dict] = choices[0] if choices else {}
340
+
341
+ if choices and (content := choice.get("delta", {}).get("content")):
342
+ self.streaming_texts.append(content)
343
+ updated = True
344
+
345
+ if usage:
346
+ self.streaming_usage = usage
347
+
348
+ return 1 if updated else 0
349
+
350
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
351
+ """
352
+ Compile accumulated streaming chat completion content into a final response.
353
+
354
+ :param request: Original generation request
355
+ :return: Standardized GenerationResponse with concatenated content and metrics
356
+ """
357
+ text = "".join(self.streaming_texts)
358
+ input_metrics, output_metrics = self.extract_metrics(self.streaming_usage, text)
359
+
360
+ return GenerationResponse(
361
+ request_id=request.request_id,
362
+ request_args=str(
363
+ request.arguments.model_dump() if request.arguments else None
364
+ ),
365
+ response_id=self.streaming_response_id, # use vLLM ID if available
366
+ text=text,
367
+ input_metrics=input_metrics,
368
+ output_metrics=output_metrics,
369
+ )
370
+
371
+
372
+ @GenerationResponseHandlerFactory.register(
373
+ ["audio_transcriptions", "audio_translations"]
374
+ )
375
+ class AudioResponseHandler(ChatCompletionsResponseHandler):
376
+ """
377
+ Response handler for audio transcription and translation endpoints.
378
+
379
+ Processes responses from audio processing APIs that convert speech to text,
380
+ handling both transcription and translation services. Manages audio-specific
381
+ usage metrics including audio tokens and processing duration.
382
+
383
+ Example:
384
+ ::
385
+ handler = AudioResponseHandler()
386
+ response = handler.compile_non_streaming(request, api_response)
387
+ """
388
+
389
+ def __init__(self):
390
+ """
391
+ Initialize the audio response handler.
392
+
393
+ Sets up internal state for accumulating streaming response data including
394
+ audio buffers, text chunks, and usage metrics.
395
+ """
396
+ self.streaming_buffer: bytearray = bytearray()
397
+ self.streaming_texts: list[str] = []
398
+ self.streaming_usage: dict[str, int | dict[str, int]] | None = None
399
+ self.streaming_response_id: str | None = None
400
+
401
+ def extract_metrics(
402
+ self, usage: dict[str, int | dict[str, int]] | None, text: str
403
+ ) -> tuple[UsageMetrics, UsageMetrics]:
404
+ """
405
+ Extract input and output usage metrics from audio API response usage data.
406
+
407
+ Handles audio-specific metrics including processing duration and audio tokens
408
+ in addition to standard text token counts.
409
+
410
+ :param usage: Usage data dictionary from audio API response
411
+ :param text: Generated text for calculating word and character counts
412
+ :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
413
+ """
414
+ if not usage:
415
+ return UsageMetrics(), UsageMetrics(
416
+ text_words=len(text.split()) if text else 0,
417
+ text_characters=len(text) if text else 0,
418
+ )
419
+
420
+ usage_metrics: dict[str, int] = cast("dict[str, int]", usage)
421
+
422
+ return UsageMetrics(
423
+ audio_tokens=(usage_metrics.get("prompt_tokens") or 0),
424
+ ), UsageMetrics(
425
+ text_tokens=(usage_metrics.get("completion_tokens") or 0),
426
+ text_words=len(text.split()) if text else 0,
427
+ text_characters=len(text) if text else 0,
428
+ )
@@ -1,20 +1,24 @@
1
- from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator
2
- from .benchmark import (
3
- Benchmark,
4
- BenchmarkArgs,
5
- BenchmarkMetrics,
6
- BenchmarkRunStats,
7
- BenchmarkT,
8
- GenerativeBenchmark,
9
- GenerativeMetrics,
10
- GenerativeTextErrorStats,
11
- GenerativeTextResponseStats,
12
- StatusBreakdown,
13
- )
14
- from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
1
+ """
2
+ Benchmark execution and performance analysis framework.
3
+
4
+ Provides comprehensive benchmarking capabilities for LLM inference workloads,
5
+ including profile-based execution strategies, metrics collection and aggregation,
6
+ progress tracking, and multi-format output generation. Supports synchronous,
7
+ asynchronous, concurrent, sweep, and throughput-based benchmarking profiles for
8
+ evaluating model performance under various load conditions.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from .benchmarker import Benchmarker
15
14
  from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
16
- from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
17
- from .profile import (
15
+ from .outputs import (
16
+ GenerativeBenchmarkerConsole,
17
+ GenerativeBenchmarkerCSV,
18
+ GenerativeBenchmarkerHTML,
19
+ GenerativeBenchmarkerOutput,
20
+ )
21
+ from .profiles import (
18
22
  AsyncProfile,
19
23
  ConcurrentProfile,
20
24
  Profile,
@@ -22,46 +26,72 @@ from .profile import (
22
26
  SweepProfile,
23
27
  SynchronousProfile,
24
28
  ThroughputProfile,
25
- create_profile,
26
29
  )
27
- from .progress import (
28
- BenchmarkerProgressDisplay,
29
- BenchmarkerTaskProgressState,
30
- GenerativeTextBenchmarkerProgressDisplay,
31
- GenerativeTextBenchmarkerTaskProgressState,
30
+ from .progress import BenchmarkerProgress, GenerativeConsoleBenchmarkerProgress
31
+ from .scenarios import get_builtin_scenarios
32
+ from .schemas import (
33
+ Benchmark,
34
+ BenchmarkAccumulator,
35
+ BenchmarkAccumulatorT,
36
+ BenchmarkConfig,
37
+ BenchmarkGenerativeTextArgs,
38
+ BenchmarkT,
39
+ GenerativeAudioMetricsSummary,
40
+ GenerativeBenchmark,
41
+ GenerativeBenchmarkAccumulator,
42
+ GenerativeBenchmarkMetadata,
43
+ GenerativeBenchmarksReport,
44
+ GenerativeBenchmarkTimings,
45
+ GenerativeImageMetricsSummary,
46
+ GenerativeMetrics,
47
+ GenerativeMetricsAccumulator,
48
+ GenerativeMetricsSummary,
49
+ GenerativeRequestsAccumulator,
50
+ GenerativeTextMetricsSummary,
51
+ GenerativeVideoMetricsSummary,
52
+ RunningMetricStats,
53
+ SchedulerMetrics,
54
+ SchedulerMetricsAccumulator,
32
55
  )
33
56
 
34
57
  __all__ = [
35
- "AggregatorT",
36
58
  "AsyncProfile",
37
59
  "Benchmark",
38
- "BenchmarkAggregator",
39
- "BenchmarkArgs",
40
- "BenchmarkMetrics",
41
- "BenchmarkRunStats",
60
+ "BenchmarkAccumulator",
61
+ "BenchmarkAccumulatorT",
62
+ "BenchmarkConfig",
63
+ "BenchmarkGenerativeTextArgs",
42
64
  "BenchmarkT",
43
65
  "Benchmarker",
44
- "BenchmarkerProgressDisplay",
45
- "BenchmarkerResult",
46
- "BenchmarkerTaskProgressState",
66
+ "BenchmarkerProgress",
47
67
  "ConcurrentProfile",
68
+ "GenerativeAudioMetricsSummary",
48
69
  "GenerativeBenchmark",
49
- "GenerativeBenchmarkAggregator",
50
- "GenerativeBenchmarker",
51
- "GenerativeBenchmarksConsole",
70
+ "GenerativeBenchmarkAccumulator",
71
+ "GenerativeBenchmarkMetadata",
72
+ "GenerativeBenchmarkTimings",
73
+ "GenerativeBenchmarkerCSV",
74
+ "GenerativeBenchmarkerConsole",
75
+ "GenerativeBenchmarkerHTML",
76
+ "GenerativeBenchmarkerOutput",
52
77
  "GenerativeBenchmarksReport",
78
+ "GenerativeConsoleBenchmarkerProgress",
79
+ "GenerativeImageMetricsSummary",
53
80
  "GenerativeMetrics",
54
- "GenerativeTextBenchmarkerProgressDisplay",
55
- "GenerativeTextBenchmarkerTaskProgressState",
56
- "GenerativeTextErrorStats",
57
- "GenerativeTextResponseStats",
81
+ "GenerativeMetricsAccumulator",
82
+ "GenerativeMetricsSummary",
83
+ "GenerativeRequestsAccumulator",
84
+ "GenerativeTextMetricsSummary",
85
+ "GenerativeVideoMetricsSummary",
58
86
  "Profile",
59
87
  "ProfileType",
60
- "StatusBreakdown",
88
+ "RunningMetricStats",
89
+ "SchedulerMetrics",
90
+ "SchedulerMetricsAccumulator",
61
91
  "SweepProfile",
62
92
  "SynchronousProfile",
63
93
  "ThroughputProfile",
64
94
  "benchmark_generative_text",
65
- "create_profile",
95
+ "get_builtin_scenarios",
66
96
  "reimport_benchmarks_report",
67
97
  ]