guidellm 0.3.0rc20250507__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show
  1. guidellm/__init__.py +8 -13
  2. guidellm/__main__.py +290 -69
  3. guidellm/backend/__init__.py +6 -6
  4. guidellm/backend/backend.py +25 -4
  5. guidellm/backend/openai.py +153 -30
  6. guidellm/backend/response.py +6 -2
  7. guidellm/benchmark/__init__.py +16 -22
  8. guidellm/benchmark/aggregator.py +3 -3
  9. guidellm/benchmark/benchmark.py +11 -12
  10. guidellm/benchmark/benchmarker.py +2 -2
  11. guidellm/benchmark/entrypoints.py +34 -10
  12. guidellm/benchmark/output.py +59 -8
  13. guidellm/benchmark/profile.py +4 -4
  14. guidellm/benchmark/progress.py +2 -2
  15. guidellm/benchmark/scenario.py +104 -0
  16. guidellm/benchmark/scenarios/__init__.py +0 -0
  17. guidellm/config.py +32 -7
  18. guidellm/dataset/__init__.py +4 -4
  19. guidellm/dataset/creator.py +1 -1
  20. guidellm/dataset/synthetic.py +36 -11
  21. guidellm/logger.py +8 -4
  22. guidellm/objects/__init__.py +2 -2
  23. guidellm/objects/pydantic.py +30 -1
  24. guidellm/objects/statistics.py +20 -14
  25. guidellm/preprocess/__init__.py +3 -0
  26. guidellm/preprocess/dataset.py +374 -0
  27. guidellm/presentation/__init__.py +28 -0
  28. guidellm/presentation/builder.py +27 -0
  29. guidellm/presentation/data_models.py +232 -0
  30. guidellm/presentation/injector.py +66 -0
  31. guidellm/request/__init__.py +6 -3
  32. guidellm/request/loader.py +5 -5
  33. guidellm/{scheduler → request}/types.py +4 -1
  34. guidellm/scheduler/__init__.py +10 -15
  35. guidellm/scheduler/queues.py +25 -0
  36. guidellm/scheduler/result.py +21 -3
  37. guidellm/scheduler/scheduler.py +68 -60
  38. guidellm/scheduler/strategy.py +26 -24
  39. guidellm/scheduler/worker.py +64 -103
  40. guidellm/utils/__init__.py +17 -5
  41. guidellm/utils/cli.py +62 -0
  42. guidellm/utils/default_group.py +105 -0
  43. guidellm/utils/dict.py +23 -0
  44. guidellm/utils/hf_datasets.py +36 -0
  45. guidellm/utils/random.py +1 -1
  46. guidellm/utils/text.py +12 -5
  47. guidellm/version.py +6 -0
  48. guidellm-0.3.1.dist-info/METADATA +329 -0
  49. guidellm-0.3.1.dist-info/RECORD +62 -0
  50. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
  51. guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
  52. guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
  53. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
  54. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
  55. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import copy
2
3
  import json
3
4
  import time
4
5
  from collections.abc import AsyncGenerator
@@ -17,12 +18,25 @@ from guidellm.backend.response import (
17
18
  )
18
19
  from guidellm.config import settings
19
20
 
20
- __all__ = ["OpenAIHTTPBackend", "TEXT_COMPLETIONS_PATH", "CHAT_COMPLETIONS_PATH"]
21
+ __all__ = [
22
+ "CHAT_COMPLETIONS",
23
+ "CHAT_COMPLETIONS_PATH",
24
+ "MODELS",
25
+ "TEXT_COMPLETIONS",
26
+ "TEXT_COMPLETIONS_PATH",
27
+ "OpenAIHTTPBackend",
28
+ ]
21
29
 
22
30
 
23
31
  TEXT_COMPLETIONS_PATH = "/v1/completions"
24
32
  CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
25
33
 
34
+ CompletionEndpointType = Literal["text_completions", "chat_completions"]
35
+ EndpointType = Union[Literal["models"], CompletionEndpointType]
36
+ CHAT_COMPLETIONS: CompletionEndpointType = "chat_completions"
37
+ MODELS: EndpointType = "models"
38
+ TEXT_COMPLETIONS: CompletionEndpointType = "text_completions"
39
+
26
40
 
27
41
  @Backend.register("openai_http")
28
42
  class OpenAIHTTPBackend(Backend):
@@ -49,8 +63,23 @@ class OpenAIHTTPBackend(Backend):
49
63
  If not provided, the default timeout provided from settings is used.
50
64
  :param http2: If True, uses HTTP/2 for requests to the OpenAI server.
51
65
  Defaults to True.
66
+ :param follow_redirects: If True, the HTTP client will follow redirect responses.
67
+ If not provided, the default value from settings is used.
52
68
  :param max_output_tokens: The maximum number of tokens to request for completions.
53
69
  If not provided, the default maximum tokens provided from settings is used.
70
+ :param extra_query: Query parameters to include in requests to the OpenAI server.
71
+ If "chat_completions", "models", or "text_completions" are included as keys,
72
+ the values of these keys will be used as the parameters for the respective
73
+ endpoint.
74
+ If not provided, no extra query parameters are added.
75
+ :param extra_body: Body parameters to include in requests to the OpenAI server.
76
+ If "chat_completions", "models", or "text_completions" are included as keys,
77
+ the values of these keys will be included in the body for the respective
78
+ endpoint.
79
+ If not provided, no extra body parameters are added.
80
+ :param remove_from_body: Parameters that should be removed from the body of each
81
+ request.
82
+ If not provided, no parameters are removed from the body.
54
83
  """
55
84
 
56
85
  def __init__(
@@ -62,7 +91,13 @@ class OpenAIHTTPBackend(Backend):
62
91
  project: Optional[str] = None,
63
92
  timeout: Optional[float] = None,
64
93
  http2: Optional[bool] = True,
94
+ follow_redirects: Optional[bool] = None,
65
95
  max_output_tokens: Optional[int] = None,
96
+ extra_query: Optional[dict] = None,
97
+ extra_body: Optional[dict] = None,
98
+ remove_from_body: Optional[list[str]] = None,
99
+ headers: Optional[dict] = None,
100
+ verify: Optional[bool] = None,
66
101
  ):
67
102
  super().__init__(type_="openai_http")
68
103
  self._target = target or settings.openai.base_url
@@ -79,20 +114,48 @@ class OpenAIHTTPBackend(Backend):
79
114
 
80
115
  self._model = model
81
116
 
117
+ # Start with default headers based on other params
118
+ default_headers: dict[str, str] = {}
82
119
  api_key = api_key or settings.openai.api_key
83
- self.authorization = (
84
- f"Bearer {api_key}" if api_key else settings.openai.bearer_token
85
- )
120
+ bearer_token = settings.openai.bearer_token
121
+ if api_key:
122
+ default_headers["Authorization"] = f"Bearer {api_key}"
123
+ elif bearer_token:
124
+ default_headers["Authorization"] = bearer_token
86
125
 
87
126
  self.organization = organization or settings.openai.organization
127
+ if self.organization:
128
+ default_headers["OpenAI-Organization"] = self.organization
129
+
88
130
  self.project = project or settings.openai.project
131
+ if self.project:
132
+ default_headers["OpenAI-Project"] = self.project
133
+
134
+ # User-provided headers from kwargs or settings override defaults
135
+ merged_headers = default_headers.copy()
136
+ merged_headers.update(settings.openai.headers or {})
137
+ if headers:
138
+ merged_headers.update(headers)
139
+
140
+ # Remove headers with None values for backward compatibility and convenience
141
+ self.headers = {k: v for k, v in merged_headers.items() if v is not None}
142
+
89
143
  self.timeout = timeout if timeout is not None else settings.request_timeout
90
144
  self.http2 = http2 if http2 is not None else settings.request_http2
145
+ self.follow_redirects = (
146
+ follow_redirects
147
+ if follow_redirects is not None
148
+ else settings.request_follow_redirects
149
+ )
150
+ self.verify = verify if verify is not None else settings.openai.verify
91
151
  self.max_output_tokens = (
92
152
  max_output_tokens
93
153
  if max_output_tokens is not None
94
154
  else settings.openai.max_output_tokens
95
155
  )
156
+ self.extra_query = extra_query
157
+ self.extra_body = extra_body
158
+ self.remove_from_body = remove_from_body
96
159
  self._async_client: Optional[httpx.AsyncClient] = None
97
160
 
98
161
  @property
@@ -120,13 +183,21 @@ class OpenAIHTTPBackend(Backend):
120
183
  "max_output_tokens": self.max_output_tokens,
121
184
  "timeout": self.timeout,
122
185
  "http2": self.http2,
123
- "authorization": bool(self.authorization),
124
- "organization": self.organization,
125
- "project": self.project,
186
+ "follow_redirects": self.follow_redirects,
187
+ "headers": self.headers,
126
188
  "text_completions_path": TEXT_COMPLETIONS_PATH,
127
189
  "chat_completions_path": CHAT_COMPLETIONS_PATH,
128
190
  }
129
191
 
192
+ async def reset(self) -> None:
193
+ """
194
+ Reset the connection object. This is useful for backends that
195
+ reuse connections or have state that needs to be cleared.
196
+ For this backend, it closes the async client if it exists.
197
+ """
198
+ if self._async_client is not None:
199
+ await self._async_client.aclose()
200
+
130
201
  async def check_setup(self):
131
202
  """
132
203
  Check if the backend is setup correctly and can be used for requests.
@@ -165,7 +236,10 @@ class OpenAIHTTPBackend(Backend):
165
236
  """
166
237
  target = f"{self.target}/v1/models"
167
238
  headers = self._headers()
168
- response = await self._get_async_client().get(target, headers=headers)
239
+ params = self._params(MODELS)
240
+ response = await self._get_async_client().get(
241
+ target, headers=headers, params=params
242
+ )
169
243
  response.raise_for_status()
170
244
 
171
245
  models = []
@@ -210,7 +284,9 @@ class OpenAIHTTPBackend(Backend):
210
284
  )
211
285
 
212
286
  headers = self._headers()
287
+ params = self._params(TEXT_COMPLETIONS)
213
288
  payload = self._completions_payload(
289
+ endpoint_type=TEXT_COMPLETIONS,
214
290
  orig_kwargs=kwargs,
215
291
  max_output_tokens=output_token_count,
216
292
  prompt=prompt,
@@ -223,14 +299,16 @@ class OpenAIHTTPBackend(Backend):
223
299
  request_prompt_tokens=prompt_token_count,
224
300
  request_output_tokens=output_token_count,
225
301
  headers=headers,
302
+ params=params,
226
303
  payload=payload,
227
304
  ):
228
305
  yield resp
229
306
  except Exception as ex:
230
307
  logger.error(
231
- "{} request with headers: {} and payload: {} failed: {}",
308
+ "{} request with headers: {} and params: {} and payload: {} failed: {}",
232
309
  self.__class__.__name__,
233
310
  headers,
311
+ params,
234
312
  payload,
235
313
  ex,
236
314
  )
@@ -282,10 +360,12 @@ class OpenAIHTTPBackend(Backend):
282
360
  """
283
361
  logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
284
362
  headers = self._headers()
363
+ params = self._params(CHAT_COMPLETIONS)
285
364
  messages = (
286
365
  content if raw_content else self._create_chat_messages(content=content)
287
366
  )
288
367
  payload = self._completions_payload(
368
+ endpoint_type=CHAT_COMPLETIONS,
289
369
  orig_kwargs=kwargs,
290
370
  max_output_tokens=output_token_count,
291
371
  messages=messages,
@@ -298,14 +378,16 @@ class OpenAIHTTPBackend(Backend):
298
378
  request_prompt_tokens=prompt_token_count,
299
379
  request_output_tokens=output_token_count,
300
380
  headers=headers,
381
+ params=params,
301
382
  payload=payload,
302
383
  ):
303
384
  yield resp
304
385
  except Exception as ex:
305
386
  logger.error(
306
- "{} request with headers: {} and payload: {} failed: {}",
387
+ "{} request with headers: {} and params: {} and payload: {} failed: {}",
307
388
  self.__class__.__name__,
308
389
  headers,
390
+ params,
309
391
  payload,
310
392
  ex,
311
393
  )
@@ -318,8 +400,13 @@ class OpenAIHTTPBackend(Backend):
318
400
 
319
401
  :return: The async HTTP client.
320
402
  """
321
- if self._async_client is None:
322
- client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
403
+ if self._async_client is None or self._async_client.is_closed:
404
+ client = httpx.AsyncClient(
405
+ http2=self.http2,
406
+ timeout=self.timeout,
407
+ follow_redirects=self.follow_redirects,
408
+ verify=self.verify,
409
+ )
323
410
  self._async_client = client
324
411
  else:
325
412
  client = self._async_client
@@ -330,22 +417,44 @@ class OpenAIHTTPBackend(Backend):
330
417
  headers = {
331
418
  "Content-Type": "application/json",
332
419
  }
420
+ headers.update(self.headers)
421
+ return headers
333
422
 
334
- if self.authorization:
335
- headers["Authorization"] = self.authorization
423
+ def _params(self, endpoint_type: EndpointType) -> dict[str, str]:
424
+ if self.extra_query is None:
425
+ return {}
336
426
 
337
- if self.organization:
338
- headers["OpenAI-Organization"] = self.organization
427
+ if (
428
+ CHAT_COMPLETIONS in self.extra_query
429
+ or MODELS in self.extra_query
430
+ or TEXT_COMPLETIONS in self.extra_query
431
+ ):
432
+ return self.extra_query.get(endpoint_type, {})
339
433
 
340
- if self.project:
341
- headers["OpenAI-Project"] = self.project
434
+ return self.extra_query
342
435
 
343
- return headers
436
+ def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
437
+ if self.extra_body is None:
438
+ return {}
439
+
440
+ if (
441
+ CHAT_COMPLETIONS in self.extra_body
442
+ or MODELS in self.extra_body
443
+ or TEXT_COMPLETIONS in self.extra_body
444
+ ):
445
+ return copy.deepcopy(self.extra_body.get(endpoint_type, {}))
446
+
447
+ return copy.deepcopy(self.extra_body)
344
448
 
345
449
  def _completions_payload(
346
- self, orig_kwargs: Optional[dict], max_output_tokens: Optional[int], **kwargs
450
+ self,
451
+ endpoint_type: CompletionEndpointType,
452
+ orig_kwargs: Optional[dict],
453
+ max_output_tokens: Optional[int],
454
+ **kwargs,
347
455
  ) -> dict:
348
- payload = orig_kwargs or {}
456
+ payload = self._extra_body(endpoint_type)
457
+ payload.update(orig_kwargs or {})
349
458
  payload.update(kwargs)
350
459
  payload["model"] = self.model
351
460
  payload["stream"] = True
@@ -359,8 +468,10 @@ class OpenAIHTTPBackend(Backend):
359
468
  self.__class__.__name__,
360
469
  max_output_tokens or self.max_output_tokens,
361
470
  )
362
- payload["max_tokens"] = max_output_tokens or self.max_output_tokens
363
- payload["max_completion_tokens"] = payload["max_tokens"]
471
+ max_output_key = settings.openai.max_output_key.get(
472
+ endpoint_type, "max_tokens"
473
+ )
474
+ payload[max_output_key] = max_output_tokens or self.max_output_tokens
364
475
 
365
476
  if max_output_tokens:
366
477
  # only set stop and ignore_eos if max_output_tokens set at request level
@@ -368,6 +479,10 @@ class OpenAIHTTPBackend(Backend):
368
479
  payload["stop"] = None
369
480
  payload["ignore_eos"] = True
370
481
 
482
+ if self.remove_from_body:
483
+ for key in self.remove_from_body:
484
+ payload.pop(key, None)
485
+
371
486
  return payload
372
487
 
373
488
  @staticmethod
@@ -438,8 +553,9 @@ class OpenAIHTTPBackend(Backend):
438
553
  request_id: Optional[str],
439
554
  request_prompt_tokens: Optional[int],
440
555
  request_output_tokens: Optional[int],
441
- headers: dict,
442
- payload: dict,
556
+ headers: dict[str, str],
557
+ params: dict[str, str],
558
+ payload: dict[str, Any],
443
559
  ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
444
560
  if type_ == "text_completions":
445
561
  target = f"{self.target}{TEXT_COMPLETIONS_PATH}"
@@ -449,14 +565,17 @@ class OpenAIHTTPBackend(Backend):
449
565
  raise ValueError(f"Unsupported type: {type_}")
450
566
 
451
567
  logger.info(
452
- "{} making request: {} to target: {} using http2: {} for "
453
- "timeout: {} with headers: {} and payload: {}",
568
+ "{} making request: {} to target: {} using http2: {} following "
569
+ "redirects: {} for timeout: {} with headers: {} and params: {} and ",
570
+ "payload: {}",
454
571
  self.__class__.__name__,
455
572
  request_id,
456
573
  target,
457
574
  self.http2,
575
+ self.follow_redirects,
458
576
  self.timeout,
459
577
  headers,
578
+ params,
460
579
  payload,
461
580
  )
462
581
 
@@ -484,7 +603,7 @@ class OpenAIHTTPBackend(Backend):
484
603
  start_time = time.time()
485
604
 
486
605
  async with self._get_async_client().stream(
487
- "POST", target, headers=headers, json=payload
606
+ "POST", target, headers=headers, params=params, json=payload
488
607
  ) as stream:
489
608
  stream.raise_for_status()
490
609
 
@@ -528,10 +647,12 @@ class OpenAIHTTPBackend(Backend):
528
647
  response_output_count = usage["output"]
529
648
 
530
649
  logger.info(
531
- "{} request: {} with headers: {} and payload: {} completed with: {}",
650
+ "{} request: {} with headers: {} and params: {} and payload: {} completed"
651
+ "with: {}",
532
652
  self.__class__.__name__,
533
653
  request_id,
534
654
  headers,
655
+ params,
535
656
  payload,
536
657
  response_value,
537
658
  )
@@ -541,9 +662,11 @@ class OpenAIHTTPBackend(Backend):
541
662
  request_args=RequestArgs(
542
663
  target=target,
543
664
  headers=headers,
665
+ params=params,
544
666
  payload=payload,
545
667
  timeout=self.timeout,
546
668
  http2=self.http2,
669
+ follow_redirects=self.follow_redirects,
547
670
  ),
548
671
  start_time=start_time,
549
672
  end_time=iter_time,
@@ -568,7 +691,7 @@ class OpenAIHTTPBackend(Backend):
568
691
  return data["choices"][0]["text"]
569
692
 
570
693
  if type_ == "chat_completions":
571
- return data["choices"][0]["delta"]["content"]
694
+ return data.get("choices", [{}])[0].get("delta", {}).get("content")
572
695
 
573
696
  raise ValueError(f"Unsupported type: {type_}")
574
697
 
@@ -6,10 +6,10 @@ from guidellm.config import settings
6
6
  from guidellm.objects.pydantic import StandardBaseModel
7
7
 
8
8
  __all__ = [
9
- "StreamingResponseType",
10
- "StreamingTextResponse",
11
9
  "RequestArgs",
12
10
  "ResponseSummary",
11
+ "StreamingResponseType",
12
+ "StreamingTextResponse",
13
13
  ]
14
14
 
15
15
 
@@ -48,17 +48,21 @@ class RequestArgs(StandardBaseModel):
48
48
 
49
49
  :param target: The target URL or function for the request.
50
50
  :param headers: The headers, if any, included in the request such as authorization.
51
+ :param params: The query parameters, if any, included in the request.
51
52
  :param payload: The payload / arguments for the request including the prompt /
52
53
  content and other configurations.
53
54
  :param timeout: The timeout for the request in seconds, if any.
54
55
  :param http2: Whether HTTP/2 was used for the request, if applicable.
56
+ :param follow_redirects: Whether the request should follow redirect responses.
55
57
  """
56
58
 
57
59
  target: str
58
60
  headers: dict[str, str]
61
+ params: dict[str, str]
59
62
  payload: dict[str, Any]
60
63
  timeout: Optional[float] = None
61
64
  http2: Optional[bool] = None
65
+ follow_redirects: Optional[bool] = None
62
66
 
63
67
 
64
68
  class ResponseSummary(StandardBaseModel):
@@ -12,7 +12,7 @@ from .benchmark import (
12
12
  StatusBreakdown,
13
13
  )
14
14
  from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
15
- from .entrypoints import benchmark_generative_text
15
+ from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
16
16
  from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
17
17
  from .profile import (
18
18
  AsyncProfile,
@@ -32,42 +32,36 @@ from .progress import (
32
32
  )
33
33
 
34
34
  __all__ = [
35
- # Aggregator
36
35
  "AggregatorT",
37
- "BenchmarkAggregator",
38
- "GenerativeBenchmarkAggregator",
39
- # Benchmark
36
+ "AsyncProfile",
40
37
  "Benchmark",
38
+ "BenchmarkAggregator",
41
39
  "BenchmarkArgs",
42
40
  "BenchmarkMetrics",
43
41
  "BenchmarkRunStats",
44
42
  "BenchmarkT",
45
- "GenerativeBenchmark",
46
- "GenerativeMetrics",
47
- "GenerativeTextErrorStats",
48
- "GenerativeTextResponseStats",
49
- "StatusBreakdown",
50
- # Benchmarker
51
43
  "Benchmarker",
44
+ "BenchmarkerProgressDisplay",
52
45
  "BenchmarkerResult",
46
+ "BenchmarkerTaskProgressState",
47
+ "ConcurrentProfile",
48
+ "GenerativeBenchmark",
49
+ "GenerativeBenchmarkAggregator",
53
50
  "GenerativeBenchmarker",
54
- # Entry points
55
- "benchmark_generative_text",
56
- # Output
57
51
  "GenerativeBenchmarksConsole",
58
52
  "GenerativeBenchmarksReport",
59
- # Profile
60
- "AsyncProfile",
61
- "ConcurrentProfile",
53
+ "GenerativeMetrics",
54
+ "GenerativeTextBenchmarkerProgressDisplay",
55
+ "GenerativeTextBenchmarkerTaskProgressState",
56
+ "GenerativeTextErrorStats",
57
+ "GenerativeTextResponseStats",
62
58
  "Profile",
63
59
  "ProfileType",
60
+ "StatusBreakdown",
64
61
  "SweepProfile",
65
62
  "SynchronousProfile",
66
63
  "ThroughputProfile",
64
+ "benchmark_generative_text",
67
65
  "create_profile",
68
- # Progress
69
- "BenchmarkerProgressDisplay",
70
- "BenchmarkerTaskProgressState",
71
- "GenerativeTextBenchmarkerProgressDisplay",
72
- "GenerativeTextBenchmarkerTaskProgressState",
66
+ "reimport_benchmarks_report",
73
67
  ]
@@ -32,11 +32,11 @@ from guidellm.request import (
32
32
  GenerationRequest,
33
33
  GenerativeRequestLoaderDescription,
34
34
  RequestLoaderDescription,
35
+ RequestT,
36
+ ResponseT,
35
37
  )
36
38
  from guidellm.scheduler import (
37
39
  GenerativeRequestsWorkerDescription,
38
- RequestT,
39
- ResponseT,
40
40
  SchedulerRequestResult,
41
41
  WorkerDescription,
42
42
  )
@@ -403,7 +403,7 @@ class BenchmarkAggregator(
403
403
  in_warmup_duration = (
404
404
  self.args.warmup_duration
405
405
  and result.request_info.worker_start
406
- <= (global_start_time - self.args.warmup_duration)
406
+ <= (global_start_time + self.args.warmup_duration)
407
407
  )
408
408
 
409
409
  if in_warmup_number or in_warmup_duration:
@@ -34,16 +34,16 @@ from guidellm.scheduler import (
34
34
  )
35
35
 
36
36
  __all__ = [
37
- "BenchmarkT",
38
- "StatusBreakdown",
39
- "BenchmarkArgs",
40
- "BenchmarkRunStats",
41
37
  "Benchmark",
38
+ "BenchmarkArgs",
42
39
  "BenchmarkMetrics",
43
- "GenerativeTextResponseStats",
44
- "GenerativeTextErrorStats",
45
- "GenerativeMetrics",
40
+ "BenchmarkRunStats",
41
+ "BenchmarkT",
46
42
  "GenerativeBenchmark",
43
+ "GenerativeMetrics",
44
+ "GenerativeTextErrorStats",
45
+ "GenerativeTextResponseStats",
46
+ "StatusBreakdown",
47
47
  ]
48
48
 
49
49
 
@@ -815,12 +815,11 @@ class GenerativeBenchmark(Benchmark):
815
815
  req.first_token_time or req.start_time
816
816
  for req in total_with_output_first
817
817
  ],
818
- iter_counts=[
819
- req.prompt_tokens + req.output_tokens
820
- for req in total_with_output_first
821
- ],
818
+ iter_counts=[req.output_tokens for req in total_with_output_first],
822
819
  first_iter_counts=[
823
- req.prompt_tokens for req in total_with_output_first
820
+ # prompt tokens + first token
821
+ req.prompt_tokens + 1
822
+ for req in total_with_output_first
824
823
  ],
825
824
  ),
826
825
  ),
@@ -27,12 +27,12 @@ from guidellm.request import (
27
27
  GenerationRequest,
28
28
  GenerativeRequestLoaderDescription,
29
29
  RequestLoaderDescription,
30
+ RequestT,
31
+ ResponseT,
30
32
  )
31
33
  from guidellm.scheduler import (
32
34
  GenerativeRequestsWorker,
33
35
  RequestsWorker,
34
- RequestT,
35
- ResponseT,
36
36
  Scheduler,
37
37
  SchedulerRequestResult,
38
38
  SchedulingStrategy,
@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
15
15
  )
16
16
  from guidellm.benchmark.profile import ProfileType, create_profile
17
17
  from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
+ from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
18
19
  from guidellm.request import GenerativeRequestLoader
19
20
  from guidellm.scheduler import StrategyType
20
21
 
21
22
 
23
+ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
24
+ """
25
+ Run a benchmark using a scenario and specify any extra arguments
26
+ """
27
+
28
+ if isinstance(scenario, GenerativeTextScenario):
29
+ return await benchmark_generative_text(**vars(scenario), **kwargs)
30
+ else:
31
+ raise ValueError(f"Unsupported Scenario type {type(scenario)}")
32
+
33
+
22
34
  async def benchmark_generative_text(
23
35
  target: str,
24
36
  backend_type: BackendType,
@@ -38,18 +50,18 @@ async def benchmark_generative_text(
38
50
  data_args: Optional[dict[str, Any]],
39
51
  data_sampler: Optional[Literal["random"]],
40
52
  rate_type: Union[StrategyType, ProfileType],
41
- rate: Optional[Union[int, float, list[Union[int, float]]]],
53
+ rate: Optional[Union[float, list[float]]],
42
54
  max_seconds: Optional[float],
43
55
  max_requests: Optional[int],
44
56
  warmup_percent: Optional[float],
45
57
  cooldown_percent: Optional[float],
46
- show_progress: bool,
47
- show_progress_scheduler_stats: bool,
48
- output_console: bool,
49
58
  output_path: Optional[Union[str, Path]],
50
59
  output_extras: Optional[dict[str, Any]],
51
60
  output_sampling: Optional[int],
52
61
  random_seed: int,
62
+ show_progress: bool = True,
63
+ show_progress_scheduler_stats: bool = False,
64
+ output_console: bool = True,
53
65
  ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
54
66
  console = GenerativeBenchmarksConsole(enabled=show_progress)
55
67
  console.print_line("Creating backend...")
@@ -121,13 +133,8 @@ async def benchmark_generative_text(
121
133
  )
122
134
 
123
135
  if output_console:
124
- orig_enabled = console.enabled
125
- console.enabled = True
126
136
  console.benchmarks = report.benchmarks
127
- console.print_benchmarks_metadata()
128
- console.print_benchmarks_info()
129
- console.print_benchmarks_stats()
130
- console.enabled = orig_enabled
137
+ console.print_full_report()
131
138
 
132
139
  if output_path:
133
140
  console.print_line("\nSaving benchmarks report...")
@@ -139,3 +146,20 @@ async def benchmark_generative_text(
139
146
  console.print_line("\nBenchmarking complete.")
140
147
 
141
148
  return report, saved_path
149
+
150
+
151
+ def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
152
+ """
153
+ The command-line entry point for re-importing and displaying an
154
+ existing benchmarks report. Can also specify
155
+ Assumes the file provided exists.
156
+ """
157
+ console = GenerativeBenchmarksConsole(enabled=True)
158
+ report = GenerativeBenchmarksReport.load_file(file)
159
+ console.benchmarks = report.benchmarks
160
+ console.print_full_report()
161
+
162
+ if output_path:
163
+ console.print_line("\nSaving benchmarks report...")
164
+ saved_path = report.save_file(output_path)
165
+ console.print_line(f"Benchmarks report saved to {saved_path}")