guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,455 @@
1
+ """
2
+ Response handlers for processing API responses from different generation backends.
3
+
4
+ Provides a pluggable system for handling responses from language model backends,
5
+ supporting both streaming and non-streaming responses. Each handler implements the
6
+ GenerationResponseHandler protocol to parse API responses, extract usage metrics,
7
+ and convert them into standardized GenerationResponse objects.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any, Protocol
13
+
14
+ from guidellm.schemas import GenerationRequest, GenerationResponse, UsageMetrics
15
+ from guidellm.utils import RegistryMixin, json
16
+
17
+ __all__ = [
18
+ "AudioResponseHandler",
19
+ "ChatCompletionsResponseHandler",
20
+ "GenerationResponseHandler",
21
+ "GenerationResponseHandlerFactory",
22
+ "TextCompletionsResponseHandler",
23
+ ]
24
+
25
+
26
+ class GenerationResponseHandler(Protocol):
27
+ """
28
+ Protocol for handling generation API responses.
29
+
30
+ Defines the interface for processing both streaming and non-streaming responses
31
+ from backend APIs, converting them into standardized GenerationResponse objects
32
+ with consistent metrics extraction.
33
+ """
34
+
35
+ def compile_non_streaming(
36
+ self, request: GenerationRequest, response: Any
37
+ ) -> GenerationResponse:
38
+ """
39
+ Process a complete non-streaming API response.
40
+
41
+ :param request: Original generation request
42
+ :param response: Raw API response data from the backend
43
+ :return: Standardized GenerationResponse with extracted metrics
44
+ """
45
+ ...
46
+
47
+ def add_streaming_line(self, line: str) -> int | None:
48
+ """
49
+ Process a single line from a streaming response.
50
+
51
+ :param line: Raw line from the streaming response
52
+ :return: 1 if content was updated, 0 if line was ignored, None if done
53
+ """
54
+ ...
55
+
56
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
57
+ """
58
+ Compile accumulated streaming data into a final response.
59
+
60
+ :param request: Original generation request
61
+ :return: Standardized GenerationResponse with extracted metrics
62
+ """
63
+ ...
64
+
65
+
66
+ class GenerationResponseHandlerFactory(RegistryMixin[type[GenerationResponseHandler]]):
67
+ """
68
+ Factory for registering and creating response handlers by backend type.
69
+
70
+ Registry-based system for associating handler classes with specific backend API
71
+ types, enabling automatic selection of the appropriate handler for processing
72
+ responses from different generation services.
73
+ """
74
+
75
+
76
+ @GenerationResponseHandlerFactory.register("text_completions")
77
+ class TextCompletionsResponseHandler(GenerationResponseHandler):
78
+ """
79
+ Response handler for OpenAI-style text completion endpoints.
80
+
81
+ Processes responses from text completion APIs that return generated text in the
82
+ 'choices' array with 'text' fields. Handles both streaming and non-streaming
83
+ responses, extracting usage metrics for input and output tokens.
84
+
85
+ Example:
86
+ ::
87
+ handler = TextCompletionsResponseHandler()
88
+ response = handler.compile_non_streaming(request, api_response)
89
+ """
90
+
91
+ def __init__(self):
92
+ """
93
+ Initialize the text completions response handler.
94
+
95
+ Sets up internal state for accumulating streaming response data including
96
+ text chunks and usage metrics.
97
+ """
98
+ self.streaming_texts: list[str] = []
99
+ self.streaming_usage: dict[str, int | dict[str, int]] | None = None
100
+
101
+ def compile_non_streaming(
102
+ self, request: GenerationRequest, response: dict
103
+ ) -> GenerationResponse:
104
+ """
105
+ Process a complete text completion response.
106
+
107
+ :param request: Original generation request
108
+ :param response: Complete API response containing choices and usage data
109
+ :return: Standardized GenerationResponse with extracted text and metrics
110
+ """
111
+ choices, usage = self.extract_choices_and_usage(response)
112
+ input_metrics, output_metrics = self.extract_metrics(usage)
113
+
114
+ return GenerationResponse(
115
+ request_id=request.request_id,
116
+ request_args=str(
117
+ request.arguments.model_dump() if request.arguments else None
118
+ ),
119
+ text=choices[0].get("text", "") if choices else "",
120
+ input_metrics=input_metrics,
121
+ output_metrics=output_metrics,
122
+ )
123
+
124
+ def add_streaming_line(self, line: str) -> int | None:
125
+ """
126
+ Process a single line from a text completion streaming response.
127
+
128
+ Parses Server-Sent Events (SSE) formatted lines, extracting text content
129
+ and usage metrics. Accumulates text chunks for final response compilation.
130
+
131
+ :param line: Raw SSE line from the streaming response
132
+ :return: 1 if text content was extracted, 0 if line ignored, None if done
133
+ """
134
+ if not (data := self.extract_line_data(line)):
135
+ return None if data is None else 0
136
+
137
+ updated = False
138
+ choices, usage = self.extract_choices_and_usage(data)
139
+
140
+ if text := choices[0].get("text"):
141
+ self.streaming_texts.append(text)
142
+ updated = True
143
+
144
+ if usage:
145
+ self.streaming_usage = usage
146
+
147
+ return 1 if updated else 0
148
+
149
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
150
+ """
151
+ Compile accumulated streaming text chunks into a final response.
152
+
153
+ :param request: Original generation request
154
+ :return: Standardized GenerationResponse with concatenated text and metrics
155
+ """
156
+ input_metrics, output_metrics = self.extract_metrics(self.streaming_usage)
157
+
158
+ return GenerationResponse(
159
+ request_id=request.request_id,
160
+ request_args=str(
161
+ request.arguments.model_dump() if request.arguments else None
162
+ ),
163
+ text="".join(self.streaming_texts),
164
+ input_metrics=input_metrics,
165
+ output_metrics=output_metrics,
166
+ )
167
+
168
+ def extract_line_data(self, line: str) -> dict[str, Any] | None:
169
+ """
170
+ Extract JSON data from a streaming response line.
171
+
172
+ :param line: Raw line from the streaming response
173
+ :return: Parsed JSON data as dictionary, or None if line indicates completion
174
+ """
175
+ if line == "data: [DONE]":
176
+ return None
177
+
178
+ if not line or not (line := line.strip()) or not line.startswith("data:"):
179
+ return {}
180
+
181
+ line = line[len("data:") :].strip()
182
+
183
+ return json.loads(line)
184
+
185
+ def extract_choices_and_usage(
186
+ self, response: dict
187
+ ) -> tuple[list[dict], dict[str, int | dict[str, int]]]:
188
+ """
189
+ Extract choices and usage data from the API response.
190
+
191
+ :param response: Complete API response containing choices and usage data
192
+ :return: Tuple of choices list and usage dictionary
193
+ """
194
+ return response.get("choices", []), response.get("usage", {})
195
+
196
+ def extract_metrics(
197
+ self, usage: dict[str, int | dict[str, int]] | None
198
+ ) -> tuple[UsageMetrics, UsageMetrics]:
199
+ """
200
+ Extract input and output usage metrics from API response usage data.
201
+
202
+ :param usage: Usage data dictionary from API response
203
+ :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
204
+ """
205
+ if not usage:
206
+ return UsageMetrics(), UsageMetrics()
207
+
208
+ input_details: dict[str, int] = usage.get("prompt_tokens_details", {}) or {}
209
+ output_details: dict[str, int] = (
210
+ usage.get("completion_tokens_details", {}) or {}
211
+ )
212
+
213
+ return UsageMetrics(
214
+ text_tokens=(
215
+ input_details.get("prompt_tokens") or usage.get("prompt_tokens")
216
+ ),
217
+ image_tokens=input_details.get("image_tokens"),
218
+ video_tokens=input_details.get("video_tokens"),
219
+ audio_tokens=input_details.get("audio_tokens"),
220
+ audio_seconds=input_details.get("seconds"),
221
+ ), UsageMetrics(
222
+ text_tokens=(
223
+ output_details.get("completion_tokens")
224
+ or usage.get("completion_tokens")
225
+ ),
226
+ image_tokens=output_details.get("image_tokens"),
227
+ video_tokens=output_details.get("video_tokens"),
228
+ audio_tokens=output_details.get("audio_tokens"),
229
+ audio_seconds=output_details.get("seconds"),
230
+ )
231
+
232
+
233
+ @GenerationResponseHandlerFactory.register("chat_completions")
234
+ class ChatCompletionsResponseHandler(TextCompletionsResponseHandler):
235
+ """
236
+ Response handler for OpenAI-style chat completion endpoints.
237
+
238
+ Extends TextCompletionsResponseHandler to handle chat completion responses where
239
+ generated text is nested within message objects in the choices array. Processes
240
+ both streaming and non-streaming chat completion responses.
241
+ """
242
+
243
+ def compile_non_streaming(
244
+ self, request: GenerationRequest, response: dict
245
+ ) -> GenerationResponse:
246
+ """
247
+ Process a complete chat completion response.
248
+
249
+ Extracts content from the message object within choices, handling the nested
250
+ structure specific to chat completion endpoints.
251
+
252
+ :param request: Original generation request
253
+ :param response: Complete API response containing choices and usage data
254
+ :return: Standardized GenerationResponse with extracted content and metrics
255
+ """
256
+ choices, usage = self.extract_choices_and_usage(response)
257
+ input_metrics, output_metrics = self.extract_metrics(usage)
258
+
259
+ return GenerationResponse(
260
+ request_id=request.request_id,
261
+ request_args=str(
262
+ request.arguments.model_dump() if request.arguments else None
263
+ ),
264
+ text=(choices[0].get("message", {}).get("content", "") if choices else ""),
265
+ input_metrics=input_metrics,
266
+ output_metrics=output_metrics,
267
+ )
268
+
269
+ def add_streaming_line(self, line: str) -> int | None:
270
+ """
271
+ Process a single line from a chat completion streaming response.
272
+
273
+ Handles the chat completion specific delta structure where content is nested
274
+ within delta objects in the streaming response chunks.
275
+
276
+ :param line: Raw SSE line from the streaming response
277
+ :return: 1 if content was extracted, 0 if line ignored, None if done
278
+ """
279
+ if not (data := self.extract_line_data(line)):
280
+ return None if data is None else 0
281
+
282
+ updated = False
283
+ choices, usage = self.extract_choices_and_usage(data)
284
+
285
+ if choices and (content := choices[0].get("delta", {}).get("content")):
286
+ self.streaming_texts.append(content)
287
+ updated = True
288
+
289
+ if usage:
290
+ self.streaming_usage = usage
291
+
292
+ return 1 if updated else 0
293
+
294
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
295
+ """
296
+ Compile accumulated streaming chat completion content into a final response.
297
+
298
+ :param request: Original generation request
299
+ :return: Standardized GenerationResponse with concatenated content and metrics
300
+ """
301
+ input_metrics, output_metrics = self.extract_metrics(self.streaming_usage)
302
+
303
+ return GenerationResponse(
304
+ request_id=request.request_id,
305
+ request_args=str(
306
+ request.arguments.model_dump() if request.arguments else None
307
+ ),
308
+ text="".join(self.streaming_texts),
309
+ input_metrics=input_metrics,
310
+ output_metrics=output_metrics,
311
+ )
312
+
313
+
314
+ @GenerationResponseHandlerFactory.register(
315
+ ["audio_transcriptions", "audio_translations"]
316
+ )
317
+ class AudioResponseHandler:
318
+ """
319
+ Response handler for audio transcription and translation endpoints.
320
+
321
+ Processes responses from audio processing APIs that convert speech to text,
322
+ handling both transcription and translation services. Manages audio-specific
323
+ usage metrics including audio tokens and processing duration.
324
+
325
+ Example:
326
+ ::
327
+ handler = AudioResponseHandler()
328
+ response = handler.compile_non_streaming(request, api_response)
329
+ """
330
+
331
+ def __init__(self):
332
+ """
333
+ Initialize the audio response handler.
334
+
335
+ Sets up internal state for accumulating streaming response data including
336
+ audio buffers, text chunks, and usage metrics.
337
+ """
338
+ self.streaming_buffer: bytearray = bytearray()
339
+ self.streaming_texts: list[str] = []
340
+ self.streaming_usage: dict[str, int | dict[str, int]] | None = None
341
+
342
+ def compile_non_streaming(
343
+ self, request: GenerationRequest, response: dict
344
+ ) -> GenerationResponse:
345
+ """
346
+ Process a complete audio transcription or translation response.
347
+
348
+ Extracts transcribed or translated text and audio-specific usage metrics
349
+ including processing duration and token counts for audio content.
350
+
351
+ :param request: Original generation request
352
+ :param response: Complete API response containing text and usage data
353
+ :return: Standardized GenerationResponse with extracted text and metrics
354
+ """
355
+ usage: dict[str, int | dict[str, int]] = response.get("usage", {})
356
+ input_details: dict[str, int] = usage.get("input_token_details", {}) or {}
357
+ output_details: dict[str, int] = usage.get("output_token_details", {}) or {}
358
+ text: str = response.get("text", "")
359
+
360
+ return GenerationResponse(
361
+ request_id=request.request_id,
362
+ request_args=str(
363
+ request.arguments.model_dump() if request.arguments else None
364
+ ),
365
+ text=text,
366
+ input_metrics=UsageMetrics(
367
+ text_tokens=input_details.get("text_tokens", usage.get("input_tokens")),
368
+ audio_tokens=input_details.get(
369
+ "audio_tokens", usage.get("input_tokens")
370
+ ),
371
+ audio_seconds=input_details.get("seconds", usage.get("seconds")),
372
+ ),
373
+ output_metrics=UsageMetrics(
374
+ text_tokens=output_details.get(
375
+ "text_tokens", usage.get("output_tokens")
376
+ ),
377
+ ),
378
+ )
379
+
380
+ def add_streaming_line(self, line: str) -> int | None:
381
+ """
382
+ Process a single line from an audio streaming response.
383
+
384
+ Handles JSON-formatted streaming responses from audio processing endpoints,
385
+ extracting text content and usage metrics as they become available.
386
+
387
+ :param line: Raw JSON line from the streaming response
388
+ :return: 1 if text content was extracted, 0 if line ignored, None if done
389
+ """
390
+ if line == "data: [DONE]":
391
+ return None
392
+
393
+ if not line or not (line := line.strip()) or not line.startswith("{"):
394
+ return 0
395
+
396
+ data: dict[str, Any] = json.loads(line)
397
+ text: str
398
+ usage: dict[str, int | dict[str, int]]
399
+ updated = False
400
+
401
+ if text := data.get("text"):
402
+ self.streaming_texts.append(text)
403
+ updated = True
404
+
405
+ if usage := data.get("usage"):
406
+ self.streaming_usage = usage
407
+
408
+ return 1 if updated else 0
409
+
410
+ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
411
+ """
412
+ Compile accumulated streaming audio text into a final response.
413
+
414
+ :param request: Original generation request
415
+ :return: Standardized GenerationResponse with concatenated text and metrics
416
+ """
417
+ input_metrics, output_metrics = self.extract_metrics(self.streaming_usage)
418
+
419
+ return GenerationResponse(
420
+ request_id=request.request_id,
421
+ request_args=str(
422
+ request.arguments.model_dump() if request.arguments else None
423
+ ),
424
+ text="".join(self.streaming_texts),
425
+ input_metrics=input_metrics,
426
+ output_metrics=output_metrics,
427
+ )
428
+
429
+ def extract_metrics(
430
+ self, usage: dict[str, int | dict[str, int]] | None
431
+ ) -> tuple[UsageMetrics, UsageMetrics]:
432
+ """
433
+ Extract input and output usage metrics from audio API response usage data.
434
+
435
+ Handles audio-specific metrics including processing duration and audio tokens
436
+ in addition to standard text token counts.
437
+
438
+ :param usage: Usage data dictionary from audio API response
439
+ :return: Tuple of input_metrics and output_metrics as UsageMetrics objects
440
+ """
441
+ if not usage:
442
+ return UsageMetrics(), UsageMetrics()
443
+
444
+ input_details: dict[str, int] = usage.get("input_token_details", {}) or {}
445
+ output_details: dict[str, int] = usage.get("output_token_details", {}) or {}
446
+
447
+ return UsageMetrics(
448
+ text_tokens=(input_details.get("text_tokens") or usage.get("input_tokens")),
449
+ audio_tokens=(
450
+ input_details.get("audio_tokens") or usage.get("audio_tokens")
451
+ ),
452
+ audio_seconds=(input_details.get("seconds") or usage.get("seconds")),
453
+ ), UsageMetrics(
454
+ text_tokens=output_details.get("text_tokens") or usage.get("output_tokens"),
455
+ )
@@ -1,19 +1,23 @@
1
- from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator
2
- from .benchmark import (
3
- Benchmark,
4
- BenchmarkArgs,
5
- BenchmarkMetrics,
6
- BenchmarkRunStats,
7
- BenchmarkT,
8
- GenerativeBenchmark,
9
- GenerativeMetrics,
10
- GenerativeTextErrorStats,
11
- GenerativeTextResponseStats,
12
- StatusBreakdown,
13
- )
14
- from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
1
+ """
2
+ Benchmark execution and performance analysis framework.
3
+
4
+ Provides comprehensive benchmarking capabilities for LLM inference workloads,
5
+ including profile-based execution strategies, metrics collection and aggregation,
6
+ progress tracking, and multi-format output generation. Supports synchronous,
7
+ asynchronous, concurrent, sweep, and throughput-based benchmarking profiles for
8
+ evaluating model performance under various load conditions.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from .benchmarker import Benchmarker
15
14
  from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
16
- from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
15
+ from .output import (
16
+ GenerativeBenchmarkerConsole,
17
+ GenerativeBenchmarkerCSV,
18
+ GenerativeBenchmarkerHTML,
19
+ GenerativeBenchmarkerOutput,
20
+ )
17
21
  from .profile import (
18
22
  AsyncProfile,
19
23
  ConcurrentProfile,
@@ -22,46 +26,56 @@ from .profile import (
22
26
  SweepProfile,
23
27
  SynchronousProfile,
24
28
  ThroughputProfile,
25
- create_profile,
26
29
  )
27
- from .progress import (
28
- BenchmarkerProgressDisplay,
29
- BenchmarkerTaskProgressState,
30
- GenerativeTextBenchmarkerProgressDisplay,
31
- GenerativeTextBenchmarkerTaskProgressState,
30
+ from .progress import BenchmarkerProgress, GenerativeConsoleBenchmarkerProgress
31
+ from .scenarios import get_builtin_scenarios
32
+ from .schemas import (
33
+ Benchmark,
34
+ BenchmarkerArgs,
35
+ BenchmarkerDict,
36
+ BenchmarkGenerativeTextArgs,
37
+ BenchmarkSchedulerStats,
38
+ EstimatedBenchmarkState,
39
+ GenerativeAudioMetricsSummary,
40
+ GenerativeBenchmark,
41
+ GenerativeBenchmarksReport,
42
+ GenerativeImageMetricsSummary,
43
+ GenerativeMetrics,
44
+ GenerativeMetricsSummary,
45
+ GenerativeVideoMetricsSummary,
46
+ SchedulerDict,
32
47
  )
33
48
 
34
49
  __all__ = [
35
- "AggregatorT",
36
50
  "AsyncProfile",
37
51
  "Benchmark",
38
- "BenchmarkAggregator",
39
- "BenchmarkArgs",
40
- "BenchmarkMetrics",
41
- "BenchmarkRunStats",
42
- "BenchmarkT",
52
+ "BenchmarkGenerativeTextArgs",
53
+ "BenchmarkSchedulerStats",
43
54
  "Benchmarker",
44
- "BenchmarkerProgressDisplay",
45
- "BenchmarkerResult",
46
- "BenchmarkerTaskProgressState",
55
+ "BenchmarkerArgs",
56
+ "BenchmarkerDict",
57
+ "BenchmarkerProgress",
47
58
  "ConcurrentProfile",
59
+ "EstimatedBenchmarkState",
60
+ "GenerativeAudioMetricsSummary",
48
61
  "GenerativeBenchmark",
49
- "GenerativeBenchmarkAggregator",
50
- "GenerativeBenchmarker",
51
- "GenerativeBenchmarksConsole",
62
+ "GenerativeBenchmarkerCSV",
63
+ "GenerativeBenchmarkerConsole",
64
+ "GenerativeBenchmarkerHTML",
65
+ "GenerativeBenchmarkerOutput",
52
66
  "GenerativeBenchmarksReport",
67
+ "GenerativeConsoleBenchmarkerProgress",
68
+ "GenerativeImageMetricsSummary",
53
69
  "GenerativeMetrics",
54
- "GenerativeTextBenchmarkerProgressDisplay",
55
- "GenerativeTextBenchmarkerTaskProgressState",
56
- "GenerativeTextErrorStats",
57
- "GenerativeTextResponseStats",
70
+ "GenerativeMetricsSummary",
71
+ "GenerativeVideoMetricsSummary",
58
72
  "Profile",
59
73
  "ProfileType",
60
- "StatusBreakdown",
74
+ "SchedulerDict",
61
75
  "SweepProfile",
62
76
  "SynchronousProfile",
63
77
  "ThroughputProfile",
64
78
  "benchmark_generative_text",
65
- "create_profile",
79
+ "get_builtin_scenarios",
66
80
  "reimport_benchmarks_report",
67
81
  ]