guidellm 0.3.0rc20250429__py3-none-any.whl → 0.4.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show
  1. guidellm/__init__.py +8 -13
  2. guidellm/__main__.py +290 -69
  3. guidellm/backend/__init__.py +6 -6
  4. guidellm/backend/backend.py +25 -4
  5. guidellm/backend/openai.py +147 -27
  6. guidellm/backend/response.py +6 -2
  7. guidellm/benchmark/__init__.py +16 -22
  8. guidellm/benchmark/aggregator.py +3 -3
  9. guidellm/benchmark/benchmark.py +11 -12
  10. guidellm/benchmark/benchmarker.py +2 -2
  11. guidellm/benchmark/entrypoints.py +34 -10
  12. guidellm/benchmark/output.py +57 -5
  13. guidellm/benchmark/profile.py +4 -4
  14. guidellm/benchmark/progress.py +2 -2
  15. guidellm/benchmark/scenario.py +104 -0
  16. guidellm/benchmark/scenarios/__init__.py +0 -0
  17. guidellm/config.py +28 -7
  18. guidellm/dataset/__init__.py +4 -4
  19. guidellm/dataset/creator.py +1 -1
  20. guidellm/dataset/synthetic.py +36 -11
  21. guidellm/logger.py +8 -4
  22. guidellm/objects/__init__.py +2 -2
  23. guidellm/objects/pydantic.py +30 -1
  24. guidellm/objects/statistics.py +20 -14
  25. guidellm/preprocess/__init__.py +3 -0
  26. guidellm/preprocess/dataset.py +374 -0
  27. guidellm/presentation/__init__.py +28 -0
  28. guidellm/presentation/builder.py +27 -0
  29. guidellm/presentation/data_models.py +232 -0
  30. guidellm/presentation/injector.py +66 -0
  31. guidellm/request/__init__.py +6 -3
  32. guidellm/request/loader.py +5 -5
  33. guidellm/{scheduler → request}/types.py +4 -1
  34. guidellm/scheduler/__init__.py +10 -15
  35. guidellm/scheduler/queues.py +25 -0
  36. guidellm/scheduler/result.py +21 -3
  37. guidellm/scheduler/scheduler.py +68 -60
  38. guidellm/scheduler/strategy.py +26 -24
  39. guidellm/scheduler/worker.py +64 -103
  40. guidellm/utils/__init__.py +17 -5
  41. guidellm/utils/cli.py +62 -0
  42. guidellm/utils/default_group.py +105 -0
  43. guidellm/utils/dict.py +23 -0
  44. guidellm/utils/hf_datasets.py +36 -0
  45. guidellm/utils/random.py +1 -1
  46. guidellm/utils/text.py +14 -15
  47. guidellm/version.py +6 -0
  48. guidellm-0.4.0a0.dist-info/METADATA +317 -0
  49. guidellm-0.4.0a0.dist-info/RECORD +62 -0
  50. {guidellm-0.3.0rc20250429.dist-info → guidellm-0.4.0a0.dist-info}/WHEEL +1 -1
  51. guidellm-0.3.0rc20250429.dist-info/METADATA +0 -453
  52. guidellm-0.3.0rc20250429.dist-info/RECORD +0 -48
  53. {guidellm-0.3.0rc20250429.dist-info → guidellm-0.4.0a0.dist-info}/entry_points.txt +0 -0
  54. {guidellm-0.3.0rc20250429.dist-info → guidellm-0.4.0a0.dist-info}/licenses/LICENSE +0 -0
  55. {guidellm-0.3.0rc20250429.dist-info → guidellm-0.4.0a0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import copy
2
3
  import json
3
4
  import time
4
5
  from collections.abc import AsyncGenerator
@@ -17,12 +18,24 @@ from guidellm.backend.response import (
17
18
  )
18
19
  from guidellm.config import settings
19
20
 
20
- __all__ = ["OpenAIHTTPBackend", "TEXT_COMPLETIONS_PATH", "CHAT_COMPLETIONS_PATH"]
21
+ __all__ = [
22
+ "CHAT_COMPLETIONS",
23
+ "CHAT_COMPLETIONS_PATH",
24
+ "MODELS",
25
+ "TEXT_COMPLETIONS",
26
+ "TEXT_COMPLETIONS_PATH",
27
+ "OpenAIHTTPBackend",
28
+ ]
21
29
 
22
30
 
23
31
  TEXT_COMPLETIONS_PATH = "/v1/completions"
24
32
  CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
25
33
 
34
+ EndpointType = Literal["chat_completions", "models", "text_completions"]
35
+ CHAT_COMPLETIONS: EndpointType = "chat_completions"
36
+ MODELS: EndpointType = "models"
37
+ TEXT_COMPLETIONS: EndpointType = "text_completions"
38
+
26
39
 
27
40
  @Backend.register("openai_http")
28
41
  class OpenAIHTTPBackend(Backend):
@@ -49,8 +62,23 @@ class OpenAIHTTPBackend(Backend):
49
62
  If not provided, the default timeout provided from settings is used.
50
63
  :param http2: If True, uses HTTP/2 for requests to the OpenAI server.
51
64
  Defaults to True.
65
+ :param follow_redirects: If True, the HTTP client will follow redirect responses.
66
+ If not provided, the default value from settings is used.
52
67
  :param max_output_tokens: The maximum number of tokens to request for completions.
53
68
  If not provided, the default maximum tokens provided from settings is used.
69
+ :param extra_query: Query parameters to include in requests to the OpenAI server.
70
+ If "chat_completions", "models", or "text_completions" are included as keys,
71
+ the values of these keys will be used as the parameters for the respective
72
+ endpoint.
73
+ If not provided, no extra query parameters are added.
74
+ :param extra_body: Body parameters to include in requests to the OpenAI server.
75
+ If "chat_completions", "models", or "text_completions" are included as keys,
76
+ the values of these keys will be included in the body for the respective
77
+ endpoint.
78
+ If not provided, no extra body parameters are added.
79
+ :param remove_from_body: Parameters that should be removed from the body of each
80
+ request.
81
+ If not provided, no parameters are removed from the body.
54
82
  """
55
83
 
56
84
  def __init__(
@@ -62,7 +90,13 @@ class OpenAIHTTPBackend(Backend):
62
90
  project: Optional[str] = None,
63
91
  timeout: Optional[float] = None,
64
92
  http2: Optional[bool] = True,
93
+ follow_redirects: Optional[bool] = None,
65
94
  max_output_tokens: Optional[int] = None,
95
+ extra_query: Optional[dict] = None,
96
+ extra_body: Optional[dict] = None,
97
+ remove_from_body: Optional[list[str]] = None,
98
+ headers: Optional[dict] = None,
99
+ verify: Optional[bool] = None,
66
100
  ):
67
101
  super().__init__(type_="openai_http")
68
102
  self._target = target or settings.openai.base_url
@@ -79,20 +113,48 @@ class OpenAIHTTPBackend(Backend):
79
113
 
80
114
  self._model = model
81
115
 
116
+ # Start with default headers based on other params
117
+ default_headers: dict[str, str] = {}
82
118
  api_key = api_key or settings.openai.api_key
83
- self.authorization = (
84
- f"Bearer {api_key}" if api_key else settings.openai.bearer_token
85
- )
119
+ bearer_token = settings.openai.bearer_token
120
+ if api_key:
121
+ default_headers["Authorization"] = f"Bearer {api_key}"
122
+ elif bearer_token:
123
+ default_headers["Authorization"] = bearer_token
86
124
 
87
125
  self.organization = organization or settings.openai.organization
126
+ if self.organization:
127
+ default_headers["OpenAI-Organization"] = self.organization
128
+
88
129
  self.project = project or settings.openai.project
130
+ if self.project:
131
+ default_headers["OpenAI-Project"] = self.project
132
+
133
+ # User-provided headers from kwargs or settings override defaults
134
+ merged_headers = default_headers.copy()
135
+ merged_headers.update(settings.openai.headers or {})
136
+ if headers:
137
+ merged_headers.update(headers)
138
+
139
+ # Remove headers with None values for backward compatibility and convenience
140
+ self.headers = {k: v for k, v in merged_headers.items() if v is not None}
141
+
89
142
  self.timeout = timeout if timeout is not None else settings.request_timeout
90
143
  self.http2 = http2 if http2 is not None else settings.request_http2
144
+ self.follow_redirects = (
145
+ follow_redirects
146
+ if follow_redirects is not None
147
+ else settings.request_follow_redirects
148
+ )
149
+ self.verify = verify if verify is not None else settings.openai.verify
91
150
  self.max_output_tokens = (
92
151
  max_output_tokens
93
152
  if max_output_tokens is not None
94
153
  else settings.openai.max_output_tokens
95
154
  )
155
+ self.extra_query = extra_query
156
+ self.extra_body = extra_body
157
+ self.remove_from_body = remove_from_body
96
158
  self._async_client: Optional[httpx.AsyncClient] = None
97
159
 
98
160
  @property
@@ -120,13 +182,21 @@ class OpenAIHTTPBackend(Backend):
120
182
  "max_output_tokens": self.max_output_tokens,
121
183
  "timeout": self.timeout,
122
184
  "http2": self.http2,
123
- "authorization": bool(self.authorization),
124
- "organization": self.organization,
125
- "project": self.project,
185
+ "follow_redirects": self.follow_redirects,
186
+ "headers": self.headers,
126
187
  "text_completions_path": TEXT_COMPLETIONS_PATH,
127
188
  "chat_completions_path": CHAT_COMPLETIONS_PATH,
128
189
  }
129
190
 
191
+ async def reset(self) -> None:
192
+ """
193
+ Reset the connection object. This is useful for backends that
194
+ reuse connections or have state that needs to be cleared.
195
+ For this backend, it closes the async client if it exists.
196
+ """
197
+ if self._async_client is not None:
198
+ await self._async_client.aclose()
199
+
130
200
  async def check_setup(self):
131
201
  """
132
202
  Check if the backend is setup correctly and can be used for requests.
@@ -165,7 +235,10 @@ class OpenAIHTTPBackend(Backend):
165
235
  """
166
236
  target = f"{self.target}/v1/models"
167
237
  headers = self._headers()
168
- response = await self._get_async_client().get(target, headers=headers)
238
+ params = self._params(MODELS)
239
+ response = await self._get_async_client().get(
240
+ target, headers=headers, params=params
241
+ )
169
242
  response.raise_for_status()
170
243
 
171
244
  models = []
@@ -210,7 +283,9 @@ class OpenAIHTTPBackend(Backend):
210
283
  )
211
284
 
212
285
  headers = self._headers()
286
+ params = self._params(TEXT_COMPLETIONS)
213
287
  payload = self._completions_payload(
288
+ endpoint_type=TEXT_COMPLETIONS,
214
289
  orig_kwargs=kwargs,
215
290
  max_output_tokens=output_token_count,
216
291
  prompt=prompt,
@@ -223,14 +298,16 @@ class OpenAIHTTPBackend(Backend):
223
298
  request_prompt_tokens=prompt_token_count,
224
299
  request_output_tokens=output_token_count,
225
300
  headers=headers,
301
+ params=params,
226
302
  payload=payload,
227
303
  ):
228
304
  yield resp
229
305
  except Exception as ex:
230
306
  logger.error(
231
- "{} request with headers: {} and payload: {} failed: {}",
307
+ "{} request with headers: {} and params: {} and payload: {} failed: {}",
232
308
  self.__class__.__name__,
233
309
  headers,
310
+ params,
234
311
  payload,
235
312
  ex,
236
313
  )
@@ -282,10 +359,12 @@ class OpenAIHTTPBackend(Backend):
282
359
  """
283
360
  logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
284
361
  headers = self._headers()
362
+ params = self._params(CHAT_COMPLETIONS)
285
363
  messages = (
286
364
  content if raw_content else self._create_chat_messages(content=content)
287
365
  )
288
366
  payload = self._completions_payload(
367
+ endpoint_type=CHAT_COMPLETIONS,
289
368
  orig_kwargs=kwargs,
290
369
  max_output_tokens=output_token_count,
291
370
  messages=messages,
@@ -298,14 +377,16 @@ class OpenAIHTTPBackend(Backend):
298
377
  request_prompt_tokens=prompt_token_count,
299
378
  request_output_tokens=output_token_count,
300
379
  headers=headers,
380
+ params=params,
301
381
  payload=payload,
302
382
  ):
303
383
  yield resp
304
384
  except Exception as ex:
305
385
  logger.error(
306
- "{} request with headers: {} and payload: {} failed: {}",
386
+ "{} request with headers: {} and params: {} and payload: {} failed: {}",
307
387
  self.__class__.__name__,
308
388
  headers,
389
+ params,
309
390
  payload,
310
391
  ex,
311
392
  )
@@ -318,8 +399,13 @@ class OpenAIHTTPBackend(Backend):
318
399
 
319
400
  :return: The async HTTP client.
320
401
  """
321
- if self._async_client is None:
322
- client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
402
+ if self._async_client is None or self._async_client.is_closed:
403
+ client = httpx.AsyncClient(
404
+ http2=self.http2,
405
+ timeout=self.timeout,
406
+ follow_redirects=self.follow_redirects,
407
+ verify=self.verify,
408
+ )
323
409
  self._async_client = client
324
410
  else:
325
411
  client = self._async_client
@@ -330,22 +416,44 @@ class OpenAIHTTPBackend(Backend):
330
416
  headers = {
331
417
  "Content-Type": "application/json",
332
418
  }
419
+ headers.update(self.headers)
420
+ return headers
333
421
 
334
- if self.authorization:
335
- headers["Authorization"] = self.authorization
422
+ def _params(self, endpoint_type: EndpointType) -> dict[str, str]:
423
+ if self.extra_query is None:
424
+ return {}
336
425
 
337
- if self.organization:
338
- headers["OpenAI-Organization"] = self.organization
426
+ if (
427
+ CHAT_COMPLETIONS in self.extra_query
428
+ or MODELS in self.extra_query
429
+ or TEXT_COMPLETIONS in self.extra_query
430
+ ):
431
+ return self.extra_query.get(endpoint_type, {})
339
432
 
340
- if self.project:
341
- headers["OpenAI-Project"] = self.project
433
+ return self.extra_query
342
434
 
343
- return headers
435
+ def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
436
+ if self.extra_body is None:
437
+ return {}
438
+
439
+ if (
440
+ CHAT_COMPLETIONS in self.extra_body
441
+ or MODELS in self.extra_body
442
+ or TEXT_COMPLETIONS in self.extra_body
443
+ ):
444
+ return copy.deepcopy(self.extra_body.get(endpoint_type, {}))
445
+
446
+ return copy.deepcopy(self.extra_body)
344
447
 
345
448
  def _completions_payload(
346
- self, orig_kwargs: Optional[dict], max_output_tokens: Optional[int], **kwargs
449
+ self,
450
+ endpoint_type: EndpointType,
451
+ orig_kwargs: Optional[dict],
452
+ max_output_tokens: Optional[int],
453
+ **kwargs,
347
454
  ) -> dict:
348
- payload = orig_kwargs or {}
455
+ payload = self._extra_body(endpoint_type)
456
+ payload.update(orig_kwargs or {})
349
457
  payload.update(kwargs)
350
458
  payload["model"] = self.model
351
459
  payload["stream"] = True
@@ -368,6 +476,10 @@ class OpenAIHTTPBackend(Backend):
368
476
  payload["stop"] = None
369
477
  payload["ignore_eos"] = True
370
478
 
479
+ if self.remove_from_body:
480
+ for key in self.remove_from_body:
481
+ payload.pop(key, None)
482
+
371
483
  return payload
372
484
 
373
485
  @staticmethod
@@ -438,8 +550,9 @@ class OpenAIHTTPBackend(Backend):
438
550
  request_id: Optional[str],
439
551
  request_prompt_tokens: Optional[int],
440
552
  request_output_tokens: Optional[int],
441
- headers: dict,
442
- payload: dict,
553
+ headers: dict[str, str],
554
+ params: dict[str, str],
555
+ payload: dict[str, Any],
443
556
  ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
444
557
  if type_ == "text_completions":
445
558
  target = f"{self.target}{TEXT_COMPLETIONS_PATH}"
@@ -449,14 +562,17 @@ class OpenAIHTTPBackend(Backend):
449
562
  raise ValueError(f"Unsupported type: {type_}")
450
563
 
451
564
  logger.info(
452
- "{} making request: {} to target: {} using http2: {} for "
453
- "timeout: {} with headers: {} and payload: {}",
565
+ "{} making request: {} to target: {} using http2: {} following "
566
+ "redirects: {} for timeout: {} with headers: {} and params: {} and ",
567
+ "payload: {}",
454
568
  self.__class__.__name__,
455
569
  request_id,
456
570
  target,
457
571
  self.http2,
572
+ self.follow_redirects,
458
573
  self.timeout,
459
574
  headers,
575
+ params,
460
576
  payload,
461
577
  )
462
578
 
@@ -484,7 +600,7 @@ class OpenAIHTTPBackend(Backend):
484
600
  start_time = time.time()
485
601
 
486
602
  async with self._get_async_client().stream(
487
- "POST", target, headers=headers, json=payload
603
+ "POST", target, headers=headers, params=params, json=payload
488
604
  ) as stream:
489
605
  stream.raise_for_status()
490
606
 
@@ -528,10 +644,12 @@ class OpenAIHTTPBackend(Backend):
528
644
  response_output_count = usage["output"]
529
645
 
530
646
  logger.info(
531
- "{} request: {} with headers: {} and payload: {} completed with: {}",
647
+ "{} request: {} with headers: {} and params: {} and payload: {} completed"
648
+ "with: {}",
532
649
  self.__class__.__name__,
533
650
  request_id,
534
651
  headers,
652
+ params,
535
653
  payload,
536
654
  response_value,
537
655
  )
@@ -541,9 +659,11 @@ class OpenAIHTTPBackend(Backend):
541
659
  request_args=RequestArgs(
542
660
  target=target,
543
661
  headers=headers,
662
+ params=params,
544
663
  payload=payload,
545
664
  timeout=self.timeout,
546
665
  http2=self.http2,
666
+ follow_redirects=self.follow_redirects,
547
667
  ),
548
668
  start_time=start_time,
549
669
  end_time=iter_time,
@@ -6,10 +6,10 @@ from guidellm.config import settings
6
6
  from guidellm.objects.pydantic import StandardBaseModel
7
7
 
8
8
  __all__ = [
9
- "StreamingResponseType",
10
- "StreamingTextResponse",
11
9
  "RequestArgs",
12
10
  "ResponseSummary",
11
+ "StreamingResponseType",
12
+ "StreamingTextResponse",
13
13
  ]
14
14
 
15
15
 
@@ -48,17 +48,21 @@ class RequestArgs(StandardBaseModel):
48
48
 
49
49
  :param target: The target URL or function for the request.
50
50
  :param headers: The headers, if any, included in the request such as authorization.
51
+ :param params: The query parameters, if any, included in the request.
51
52
  :param payload: The payload / arguments for the request including the prompt /
52
53
  content and other configurations.
53
54
  :param timeout: The timeout for the request in seconds, if any.
54
55
  :param http2: Whether HTTP/2 was used for the request, if applicable.
56
+ :param follow_redirects: Whether the request should follow redirect responses.
55
57
  """
56
58
 
57
59
  target: str
58
60
  headers: dict[str, str]
61
+ params: dict[str, str]
59
62
  payload: dict[str, Any]
60
63
  timeout: Optional[float] = None
61
64
  http2: Optional[bool] = None
65
+ follow_redirects: Optional[bool] = None
62
66
 
63
67
 
64
68
  class ResponseSummary(StandardBaseModel):
@@ -12,7 +12,7 @@ from .benchmark import (
12
12
  StatusBreakdown,
13
13
  )
14
14
  from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
15
- from .entrypoints import benchmark_generative_text
15
+ from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
16
16
  from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
17
17
  from .profile import (
18
18
  AsyncProfile,
@@ -32,42 +32,36 @@ from .progress import (
32
32
  )
33
33
 
34
34
  __all__ = [
35
- # Aggregator
36
35
  "AggregatorT",
37
- "BenchmarkAggregator",
38
- "GenerativeBenchmarkAggregator",
39
- # Benchmark
36
+ "AsyncProfile",
40
37
  "Benchmark",
38
+ "BenchmarkAggregator",
41
39
  "BenchmarkArgs",
42
40
  "BenchmarkMetrics",
43
41
  "BenchmarkRunStats",
44
42
  "BenchmarkT",
45
- "GenerativeBenchmark",
46
- "GenerativeMetrics",
47
- "GenerativeTextErrorStats",
48
- "GenerativeTextResponseStats",
49
- "StatusBreakdown",
50
- # Benchmarker
51
43
  "Benchmarker",
44
+ "BenchmarkerProgressDisplay",
52
45
  "BenchmarkerResult",
46
+ "BenchmarkerTaskProgressState",
47
+ "ConcurrentProfile",
48
+ "GenerativeBenchmark",
49
+ "GenerativeBenchmarkAggregator",
53
50
  "GenerativeBenchmarker",
54
- # Entry points
55
- "benchmark_generative_text",
56
- # Output
57
51
  "GenerativeBenchmarksConsole",
58
52
  "GenerativeBenchmarksReport",
59
- # Profile
60
- "AsyncProfile",
61
- "ConcurrentProfile",
53
+ "GenerativeMetrics",
54
+ "GenerativeTextBenchmarkerProgressDisplay",
55
+ "GenerativeTextBenchmarkerTaskProgressState",
56
+ "GenerativeTextErrorStats",
57
+ "GenerativeTextResponseStats",
62
58
  "Profile",
63
59
  "ProfileType",
60
+ "StatusBreakdown",
64
61
  "SweepProfile",
65
62
  "SynchronousProfile",
66
63
  "ThroughputProfile",
64
+ "benchmark_generative_text",
67
65
  "create_profile",
68
- # Progress
69
- "BenchmarkerProgressDisplay",
70
- "BenchmarkerTaskProgressState",
71
- "GenerativeTextBenchmarkerProgressDisplay",
72
- "GenerativeTextBenchmarkerTaskProgressState",
66
+ "reimport_benchmarks_report",
73
67
  ]
@@ -32,11 +32,11 @@ from guidellm.request import (
32
32
  GenerationRequest,
33
33
  GenerativeRequestLoaderDescription,
34
34
  RequestLoaderDescription,
35
+ RequestT,
36
+ ResponseT,
35
37
  )
36
38
  from guidellm.scheduler import (
37
39
  GenerativeRequestsWorkerDescription,
38
- RequestT,
39
- ResponseT,
40
40
  SchedulerRequestResult,
41
41
  WorkerDescription,
42
42
  )
@@ -403,7 +403,7 @@ class BenchmarkAggregator(
403
403
  in_warmup_duration = (
404
404
  self.args.warmup_duration
405
405
  and result.request_info.worker_start
406
- <= (global_start_time - self.args.warmup_duration)
406
+ <= (global_start_time + self.args.warmup_duration)
407
407
  )
408
408
 
409
409
  if in_warmup_number or in_warmup_duration:
@@ -34,16 +34,16 @@ from guidellm.scheduler import (
34
34
  )
35
35
 
36
36
  __all__ = [
37
- "BenchmarkT",
38
- "StatusBreakdown",
39
- "BenchmarkArgs",
40
- "BenchmarkRunStats",
41
37
  "Benchmark",
38
+ "BenchmarkArgs",
42
39
  "BenchmarkMetrics",
43
- "GenerativeTextResponseStats",
44
- "GenerativeTextErrorStats",
45
- "GenerativeMetrics",
40
+ "BenchmarkRunStats",
41
+ "BenchmarkT",
46
42
  "GenerativeBenchmark",
43
+ "GenerativeMetrics",
44
+ "GenerativeTextErrorStats",
45
+ "GenerativeTextResponseStats",
46
+ "StatusBreakdown",
47
47
  ]
48
48
 
49
49
 
@@ -815,12 +815,11 @@ class GenerativeBenchmark(Benchmark):
815
815
  req.first_token_time or req.start_time
816
816
  for req in total_with_output_first
817
817
  ],
818
- iter_counts=[
819
- req.prompt_tokens + req.output_tokens
820
- for req in total_with_output_first
821
- ],
818
+ iter_counts=[req.output_tokens for req in total_with_output_first],
822
819
  first_iter_counts=[
823
- req.prompt_tokens for req in total_with_output_first
820
+ # prompt tokens + first token
821
+ req.prompt_tokens + 1
822
+ for req in total_with_output_first
824
823
  ],
825
824
  ),
826
825
  ),
@@ -27,12 +27,12 @@ from guidellm.request import (
27
27
  GenerationRequest,
28
28
  GenerativeRequestLoaderDescription,
29
29
  RequestLoaderDescription,
30
+ RequestT,
31
+ ResponseT,
30
32
  )
31
33
  from guidellm.scheduler import (
32
34
  GenerativeRequestsWorker,
33
35
  RequestsWorker,
34
- RequestT,
35
- ResponseT,
36
36
  Scheduler,
37
37
  SchedulerRequestResult,
38
38
  SchedulingStrategy,
@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
15
15
  )
16
16
  from guidellm.benchmark.profile import ProfileType, create_profile
17
17
  from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
+ from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
18
19
  from guidellm.request import GenerativeRequestLoader
19
20
  from guidellm.scheduler import StrategyType
20
21
 
21
22
 
23
+ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
24
+ """
25
+ Run a benchmark using a scenario and specify any extra arguments
26
+ """
27
+
28
+ if isinstance(scenario, GenerativeTextScenario):
29
+ return await benchmark_generative_text(**vars(scenario), **kwargs)
30
+ else:
31
+ raise ValueError(f"Unsupported Scenario type {type(scenario)}")
32
+
33
+
22
34
  async def benchmark_generative_text(
23
35
  target: str,
24
36
  backend_type: BackendType,
@@ -38,18 +50,18 @@ async def benchmark_generative_text(
38
50
  data_args: Optional[dict[str, Any]],
39
51
  data_sampler: Optional[Literal["random"]],
40
52
  rate_type: Union[StrategyType, ProfileType],
41
- rate: Optional[Union[int, float, list[Union[int, float]]]],
53
+ rate: Optional[Union[float, list[float]]],
42
54
  max_seconds: Optional[float],
43
55
  max_requests: Optional[int],
44
56
  warmup_percent: Optional[float],
45
57
  cooldown_percent: Optional[float],
46
- show_progress: bool,
47
- show_progress_scheduler_stats: bool,
48
- output_console: bool,
49
58
  output_path: Optional[Union[str, Path]],
50
59
  output_extras: Optional[dict[str, Any]],
51
60
  output_sampling: Optional[int],
52
61
  random_seed: int,
62
+ show_progress: bool = True,
63
+ show_progress_scheduler_stats: bool = False,
64
+ output_console: bool = True,
53
65
  ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
54
66
  console = GenerativeBenchmarksConsole(enabled=show_progress)
55
67
  console.print_line("Creating backend...")
@@ -121,13 +133,8 @@ async def benchmark_generative_text(
121
133
  )
122
134
 
123
135
  if output_console:
124
- orig_enabled = console.enabled
125
- console.enabled = True
126
136
  console.benchmarks = report.benchmarks
127
- console.print_benchmarks_metadata()
128
- console.print_benchmarks_info()
129
- console.print_benchmarks_stats()
130
- console.enabled = orig_enabled
137
+ console.print_full_report()
131
138
 
132
139
  if output_path:
133
140
  console.print_line("\nSaving benchmarks report...")
@@ -139,3 +146,20 @@ async def benchmark_generative_text(
139
146
  console.print_line("\nBenchmarking complete.")
140
147
 
141
148
  return report, saved_path
149
+
150
+
151
+ def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
152
+ """
153
+ The command-line entry point for re-importing and displaying an
154
+ existing benchmarks report. Can also specify
155
+ Assumes the file provided exists.
156
+ """
157
+ console = GenerativeBenchmarksConsole(enabled=True)
158
+ report = GenerativeBenchmarksReport.load_file(file)
159
+ console.benchmarks = report.benchmarks
160
+ console.print_full_report()
161
+
162
+ if output_path:
163
+ console.print_line("\nSaving benchmarks report...")
164
+ saved_path = report.save_file(output_path)
165
+ console.print_line(f"Benchmarks report saved to {saved_path}")