guidellm 0.3.0a20__py3-none-any.whl → 0.3.0a26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

guidellm/__main__.py CHANGED
@@ -1,56 +1,55 @@
1
1
  import asyncio
2
2
  import codecs
3
- import json
4
3
  from pathlib import Path
5
4
  from typing import get_args
6
5
 
7
6
  import click
7
+ from pydantic import ValidationError
8
8
 
9
9
  from guidellm.backend import BackendType
10
- from guidellm.benchmark import ProfileType, benchmark_generative_text
10
+ from guidellm.benchmark import ProfileType
11
+ from guidellm.benchmark.entrypoints import benchmark_with_scenario
12
+ from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
11
13
  from guidellm.config import print_config
12
14
  from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
13
15
  from guidellm.scheduler import StrategyType
16
+ from guidellm.utils import cli as cli_tools
14
17
 
15
18
  STRATEGY_PROFILE_CHOICES = set(
16
19
  list(get_args(ProfileType)) + list(get_args(StrategyType))
17
20
  )
18
21
 
19
22
 
20
- def parse_json(ctx, param, value): # noqa: ARG001
21
- if value is None:
22
- return None
23
- try:
24
- return json.loads(value)
25
- except json.JSONDecodeError as err:
26
- raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
27
-
28
-
29
- def parse_number_str(ctx, param, value): # noqa: ARG001
30
- if value is None:
31
- return None
32
-
33
- values = value.split(",") if "," in value else [value]
34
-
35
- try:
36
- return [float(val) for val in values]
37
- except ValueError as err:
38
- raise click.BadParameter(
39
- f"{param.name} must be a number or comma-separated list of numbers."
40
- ) from err
41
-
42
-
43
23
  @click.group()
44
24
  def cli():
45
25
  pass
46
26
 
47
27
 
48
28
  @cli.command(
49
- help="Run a benchmark against a generative model using the specified arguments."
29
+ help="Run a benchmark against a generative model using the specified arguments.",
30
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
31
+ )
32
+ @click.option(
33
+ "--scenario",
34
+ type=cli_tools.Union(
35
+ click.Path(
36
+ exists=True,
37
+ readable=True,
38
+ file_okay=True,
39
+ dir_okay=False,
40
+ path_type=Path, # type: ignore[type-var]
41
+ ),
42
+ click.Choice(get_builtin_scenarios()),
43
+ ),
44
+ default=None,
45
+ help=(
46
+ "The name of a builtin scenario or path to a config file. "
47
+ "Missing values from the config will use defaults. "
48
+ "Options specified on the commandline will override the scenario."
49
+ ),
50
50
  )
51
51
  @click.option(
52
52
  "--target",
53
- required=True,
54
53
  type=str,
55
54
  help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
56
55
  )
@@ -61,12 +60,12 @@ def cli():
61
60
  "The type of backend to use to run requests against. Defaults to 'openai_http'."
62
61
  f" Supported types: {', '.join(get_args(BackendType))}"
63
62
  ),
64
- default="openai_http",
63
+ default=GenerativeTextScenario.get_default("backend_type"),
65
64
  )
66
65
  @click.option(
67
66
  "--backend-args",
68
- callback=parse_json,
69
- default=None,
67
+ callback=cli_tools.parse_json,
68
+ default=GenerativeTextScenario.get_default("backend_args"),
70
69
  help=(
71
70
  "A JSON string containing any arguments to pass to the backend as a "
72
71
  "dict with **kwargs."
@@ -74,7 +73,7 @@ def cli():
74
73
  )
75
74
  @click.option(
76
75
  "--model",
77
- default=None,
76
+ default=GenerativeTextScenario.get_default("model"),
78
77
  type=str,
79
78
  help=(
80
79
  "The ID of the model to benchmark within the backend. "
@@ -83,7 +82,7 @@ def cli():
83
82
  )
84
83
  @click.option(
85
84
  "--processor",
86
- default=None,
85
+ default=GenerativeTextScenario.get_default("processor"),
87
86
  type=str,
88
87
  help=(
89
88
  "The processor or tokenizer to use to calculate token counts for statistics "
@@ -93,8 +92,8 @@ def cli():
93
92
  )
94
93
  @click.option(
95
94
  "--processor-args",
96
- default=None,
97
- callback=parse_json,
95
+ default=GenerativeTextScenario.get_default("processor_args"),
96
+ callback=cli_tools.parse_json,
98
97
  help=(
99
98
  "A JSON string containing any arguments to pass to the processor constructor "
100
99
  "as a dict with **kwargs."
@@ -102,7 +101,6 @@ def cli():
102
101
  )
103
102
  @click.option(
104
103
  "--data",
105
- required=True,
106
104
  type=str,
107
105
  help=(
108
106
  "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
@@ -112,7 +110,8 @@ def cli():
112
110
  )
113
111
  @click.option(
114
112
  "--data-args",
115
- callback=parse_json,
113
+ default=GenerativeTextScenario.get_default("data_args"),
114
+ callback=cli_tools.parse_json,
116
115
  help=(
117
116
  "A JSON string containing any arguments to pass to the dataset creation "
118
117
  "as a dict with **kwargs."
@@ -120,7 +119,7 @@ def cli():
120
119
  )
121
120
  @click.option(
122
121
  "--data-sampler",
123
- default=None,
122
+ default=GenerativeTextScenario.get_default("data_sampler"),
124
123
  type=click.Choice(["random"]),
125
124
  help=(
126
125
  "The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -129,7 +128,6 @@ def cli():
129
128
  )
130
129
  @click.option(
131
130
  "--rate-type",
132
- required=True,
133
131
  type=click.Choice(STRATEGY_PROFILE_CHOICES),
134
132
  help=(
135
133
  "The type of benchmark to run. "
@@ -138,8 +136,7 @@ def cli():
138
136
  )
139
137
  @click.option(
140
138
  "--rate",
141
- default=None,
142
- callback=parse_number_str,
139
+ default=GenerativeTextScenario.get_default("rate"),
143
140
  help=(
144
141
  "The rates to run the benchmark at. "
145
142
  "Can be a single number or a comma-separated list of numbers. "
@@ -152,6 +149,7 @@ def cli():
152
149
  @click.option(
153
150
  "--max-seconds",
154
151
  type=float,
152
+ default=GenerativeTextScenario.get_default("max_seconds"),
155
153
  help=(
156
154
  "The maximum number of seconds each benchmark can run for. "
157
155
  "If None, will run until max_requests or the data is exhausted."
@@ -160,6 +158,7 @@ def cli():
160
158
  @click.option(
161
159
  "--max-requests",
162
160
  type=int,
161
+ default=GenerativeTextScenario.get_default("max_requests"),
163
162
  help=(
164
163
  "The maximum number of requests each benchmark can run for. "
165
164
  "If None, will run until max_seconds or the data is exhausted."
@@ -168,7 +167,7 @@ def cli():
168
167
  @click.option(
169
168
  "--warmup-percent",
170
169
  type=float,
171
- default=None,
170
+ default=GenerativeTextScenario.get_default("warmup_percent"),
172
171
  help=(
173
172
  "The percent of the benchmark (based on max-seconds, max-requets, "
174
173
  "or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -178,6 +177,7 @@ def cli():
178
177
  @click.option(
179
178
  "--cooldown-percent",
180
179
  type=float,
180
+ default=GenerativeTextScenario.get_default("cooldown_percent"),
181
181
  help=(
182
182
  "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
183
183
  "of dataset) to run as a cooldown and not include in the final results. "
@@ -212,7 +212,7 @@ def cli():
212
212
  )
213
213
  @click.option(
214
214
  "--output-extras",
215
- callback=parse_json,
215
+ callback=cli_tools.parse_json,
216
216
  help="A JSON string of extra data to save with the output benchmarks",
217
217
  )
218
218
  @click.option(
@@ -222,15 +222,16 @@ def cli():
222
222
  "The number of samples to save in the output file. "
223
223
  "If None (default), will save all samples."
224
224
  ),
225
- default=None,
225
+ default=GenerativeTextScenario.get_default("output_sampling"),
226
226
  )
227
227
  @click.option(
228
228
  "--random-seed",
229
- default=42,
229
+ default=GenerativeTextScenario.get_default("random_seed"),
230
230
  type=int,
231
231
  help="The random seed to use for benchmarking to ensure reproducibility.",
232
232
  )
233
233
  def benchmark(
234
+ scenario,
234
235
  target,
235
236
  backend_type,
236
237
  backend_args,
@@ -254,30 +255,53 @@ def benchmark(
254
255
  output_sampling,
255
256
  random_seed,
256
257
  ):
258
+ click_ctx = click.get_current_context()
259
+
260
+ overrides = cli_tools.set_if_not_default(
261
+ click_ctx,
262
+ target=target,
263
+ backend_type=backend_type,
264
+ backend_args=backend_args,
265
+ model=model,
266
+ processor=processor,
267
+ processor_args=processor_args,
268
+ data=data,
269
+ data_args=data_args,
270
+ data_sampler=data_sampler,
271
+ rate_type=rate_type,
272
+ rate=rate,
273
+ max_seconds=max_seconds,
274
+ max_requests=max_requests,
275
+ warmup_percent=warmup_percent,
276
+ cooldown_percent=cooldown_percent,
277
+ output_sampling=output_sampling,
278
+ random_seed=random_seed,
279
+ )
280
+
281
+ try:
282
+ # If a scenario file was specified read from it
283
+ if scenario is None:
284
+ _scenario = GenerativeTextScenario.model_validate(overrides)
285
+ elif isinstance(scenario, Path):
286
+ _scenario = GenerativeTextScenario.from_file(scenario, overrides)
287
+ else: # Only builtins can make it here; click will catch anything else
288
+ _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
289
+ except ValidationError as e:
290
+ # Translate pydantic valdation error to click argument error
291
+ errs = e.errors(include_url=False, include_context=True, include_input=True)
292
+ param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
293
+ raise click.BadParameter(
294
+ errs[0]["msg"], ctx=click_ctx, param_hint=param_name
295
+ ) from e
296
+
257
297
  asyncio.run(
258
- benchmark_generative_text(
259
- target=target,
260
- backend_type=backend_type,
261
- backend_args=backend_args,
262
- model=model,
263
- processor=processor,
264
- processor_args=processor_args,
265
- data=data,
266
- data_args=data_args,
267
- data_sampler=data_sampler,
268
- rate_type=rate_type,
269
- rate=rate,
270
- max_seconds=max_seconds,
271
- max_requests=max_requests,
272
- warmup_percent=warmup_percent,
273
- cooldown_percent=cooldown_percent,
298
+ benchmark_with_scenario(
299
+ scenario=_scenario,
274
300
  show_progress=not disable_progress,
275
301
  show_progress_scheduler_stats=display_scheduler_stats,
276
302
  output_console=not disable_console_outputs,
277
303
  output_path=output_path,
278
304
  output_extras=output_extras,
279
- output_sampling=output_sampling,
280
- random_seed=random_seed,
281
305
  )
282
306
  )
283
307
 
@@ -316,7 +340,8 @@ def preprocess():
316
340
  "Convert a dataset to have specific prompt and output token sizes.\n"
317
341
  "DATA: Path to the input dataset or dataset ID.\n"
318
342
  "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
319
- )
343
+ ),
344
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
320
345
  )
321
346
  @click.argument(
322
347
  "data",
@@ -340,7 +365,7 @@ def preprocess():
340
365
  @click.option(
341
366
  "--processor-args",
342
367
  default=None,
343
- callback=parse_json,
368
+ callback=cli_tools.parse_json,
344
369
  help=(
345
370
  "A JSON string containing any arguments to pass to the processor constructor "
346
371
  "as a dict with **kwargs."
@@ -348,7 +373,7 @@ def preprocess():
348
373
  )
349
374
  @click.option(
350
375
  "--data-args",
351
- callback=parse_json,
376
+ callback=cli_tools.parse_json,
352
377
  help=(
353
378
  "A JSON string containing any arguments to pass to the dataset creation "
354
379
  "as a dict with **kwargs."
@@ -110,6 +110,14 @@ class Backend(ABC):
110
110
  """
111
111
  ...
112
112
 
113
+ @abstractmethod
114
+ async def reset(self) -> None:
115
+ """
116
+ Reset the connection object. This is useful for backends that
117
+ reuse connections or have state that needs to be cleared.
118
+ """
119
+ ...
120
+
113
121
  async def validate(self):
114
122
  """
115
123
  Handle final setup and validate the backend is ready for use.
@@ -126,6 +134,8 @@ class Backend(ABC):
126
134
  ): # type: ignore[attr-defined]
127
135
  pass
128
136
 
137
+ await self.reset()
138
+
129
139
  @abstractmethod
130
140
  async def check_setup(self):
131
141
  """
@@ -70,6 +70,14 @@ class OpenAIHTTPBackend(Backend):
70
70
  the values of these keys will be used as the parameters for the respective
71
71
  endpoint.
72
72
  If not provided, no extra query parameters are added.
73
+ :param extra_body: Body parameters to include in requests to the OpenAI server.
74
+ If "chat_completions", "models", or "text_completions" are included as keys,
75
+ the values of these keys will be included in the body for the respective
76
+ endpoint.
77
+ If not provided, no extra body parameters are added.
78
+ :param remove_from_body: Parameters that should be removed from the body of each
79
+ request.
80
+ If not provided, no parameters are removed from the body.
73
81
  """
74
82
 
75
83
  def __init__(
@@ -85,6 +93,7 @@ class OpenAIHTTPBackend(Backend):
85
93
  max_output_tokens: Optional[int] = None,
86
94
  extra_query: Optional[dict] = None,
87
95
  extra_body: Optional[dict] = None,
96
+ remove_from_body: Optional[list[str]] = None,
88
97
  ):
89
98
  super().__init__(type_="openai_http")
90
99
  self._target = target or settings.openai.base_url
@@ -122,6 +131,7 @@ class OpenAIHTTPBackend(Backend):
122
131
  )
123
132
  self.extra_query = extra_query
124
133
  self.extra_body = extra_body
134
+ self.remove_from_body = remove_from_body
125
135
  self._async_client: Optional[httpx.AsyncClient] = None
126
136
 
127
137
  @property
@@ -157,6 +167,15 @@ class OpenAIHTTPBackend(Backend):
157
167
  "chat_completions_path": CHAT_COMPLETIONS_PATH,
158
168
  }
159
169
 
170
+ async def reset(self) -> None:
171
+ """
172
+ Reset the connection object. This is useful for backends that
173
+ reuse connections or have state that needs to be cleared.
174
+ For this backend, it closes the async client if it exists.
175
+ """
176
+ if self._async_client is not None:
177
+ await self._async_client.aclose()
178
+
160
179
  async def check_setup(self):
161
180
  """
162
181
  Check if the backend is setup correctly and can be used for requests.
@@ -244,9 +263,8 @@ class OpenAIHTTPBackend(Backend):
244
263
 
245
264
  headers = self._headers()
246
265
  params = self._params(TEXT_COMPLETIONS)
247
- body = self._body(TEXT_COMPLETIONS)
248
266
  payload = self._completions_payload(
249
- body=body,
267
+ endpoint_type=TEXT_COMPLETIONS,
250
268
  orig_kwargs=kwargs,
251
269
  max_output_tokens=output_token_count,
252
270
  prompt=prompt,
@@ -321,12 +339,11 @@ class OpenAIHTTPBackend(Backend):
321
339
  logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
322
340
  headers = self._headers()
323
341
  params = self._params(CHAT_COMPLETIONS)
324
- body = self._body(CHAT_COMPLETIONS)
325
342
  messages = (
326
343
  content if raw_content else self._create_chat_messages(content=content)
327
344
  )
328
345
  payload = self._completions_payload(
329
- body=body,
346
+ endpoint_type=CHAT_COMPLETIONS,
330
347
  orig_kwargs=kwargs,
331
348
  max_output_tokens=output_token_count,
332
349
  messages=messages,
@@ -361,7 +378,7 @@ class OpenAIHTTPBackend(Backend):
361
378
 
362
379
  :return: The async HTTP client.
363
380
  """
364
- if self._async_client is None:
381
+ if self._async_client is None or self._async_client.is_closed:
365
382
  client = httpx.AsyncClient(
366
383
  http2=self.http2,
367
384
  timeout=self.timeout,
@@ -402,7 +419,7 @@ class OpenAIHTTPBackend(Backend):
402
419
 
403
420
  return self.extra_query
404
421
 
405
- def _body(self, endpoint_type: EndpointType) -> dict[str, str]:
422
+ def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
406
423
  if self.extra_body is None:
407
424
  return {}
408
425
 
@@ -417,12 +434,12 @@ class OpenAIHTTPBackend(Backend):
417
434
 
418
435
  def _completions_payload(
419
436
  self,
420
- body: Optional[dict],
437
+ endpoint_type: EndpointType,
421
438
  orig_kwargs: Optional[dict],
422
439
  max_output_tokens: Optional[int],
423
440
  **kwargs,
424
441
  ) -> dict:
425
- payload = body or {}
442
+ payload = self._extra_body(endpoint_type)
426
443
  payload.update(orig_kwargs or {})
427
444
  payload.update(kwargs)
428
445
  payload["model"] = self.model
@@ -446,6 +463,10 @@ class OpenAIHTTPBackend(Backend):
446
463
  payload["stop"] = None
447
464
  payload["ignore_eos"] = True
448
465
 
466
+ if self.remove_from_body:
467
+ for key in self.remove_from_body:
468
+ payload.pop(key, None)
469
+
449
470
  return payload
450
471
 
451
472
  @staticmethod
@@ -815,10 +815,7 @@ class GenerativeBenchmark(Benchmark):
815
815
  req.first_token_time or req.start_time
816
816
  for req in total_with_output_first
817
817
  ],
818
- iter_counts=[
819
- req.output_tokens
820
- for req in total_with_output_first
821
- ],
818
+ iter_counts=[req.output_tokens for req in total_with_output_first],
822
819
  first_iter_counts=[
823
820
  req.prompt_tokens for req in total_with_output_first
824
821
  ],
@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
15
15
  )
16
16
  from guidellm.benchmark.profile import ProfileType, create_profile
17
17
  from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
+ from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
18
19
  from guidellm.request import GenerativeRequestLoader
19
20
  from guidellm.scheduler import StrategyType
20
21
 
21
22
 
23
+ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
24
+ """
25
+ Run a benchmark using a scenario and specify any extra arguments
26
+ """
27
+
28
+ if isinstance(scenario, GenerativeTextScenario):
29
+ return await benchmark_generative_text(**vars(scenario), **kwargs)
30
+ else:
31
+ raise ValueError(f"Unsupported Scenario type {type(scenario)}")
32
+
33
+
22
34
  async def benchmark_generative_text(
23
35
  target: str,
24
36
  backend_type: BackendType,
@@ -43,13 +55,13 @@ async def benchmark_generative_text(
43
55
  max_requests: Optional[int],
44
56
  warmup_percent: Optional[float],
45
57
  cooldown_percent: Optional[float],
46
- show_progress: bool,
47
- show_progress_scheduler_stats: bool,
48
- output_console: bool,
49
58
  output_path: Optional[Union[str, Path]],
50
59
  output_extras: Optional[dict[str, Any]],
51
60
  output_sampling: Optional[int],
52
61
  random_seed: int,
62
+ show_progress: bool = True,
63
+ show_progress_scheduler_stats: bool = False,
64
+ output_console: bool = True,
53
65
  ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
54
66
  console = GenerativeBenchmarksConsole(enabled=show_progress)
55
67
  console.print_line("Creating backend...")
@@ -0,0 +1,104 @@
1
+ from collections.abc import Iterable
2
+ from functools import cache
3
+ from pathlib import Path
4
+ from typing import Annotated, Any, Literal, Optional, TypeVar, Union
5
+
6
+ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
7
+ from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
8
+ from transformers.tokenization_utils_base import ( # type: ignore[import]
9
+ PreTrainedTokenizerBase,
10
+ )
11
+
12
+ from guidellm.backend.backend import BackendType
13
+ from guidellm.benchmark.profile import ProfileType
14
+ from guidellm.objects.pydantic import StandardBaseModel
15
+ from guidellm.scheduler.strategy import StrategyType
16
+
17
+ __ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
18
+
19
+ SCENARIO_DIR = Path(__file__).parent / "scenarios/"
20
+
21
+
22
+ @cache
23
+ def get_builtin_scenarios() -> list[str]:
24
+ """Returns list of builtin scenario names."""
25
+ return [p.stem for p in SCENARIO_DIR.glob("*.json")]
26
+
27
+
28
+ def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
29
+ """
30
+ Parse a comma separated string to a list of float
31
+ or convert single float list of one or pass float
32
+ list through.
33
+ """
34
+ if isinstance(value, (int, float)):
35
+ return [value]
36
+ elif isinstance(value, list):
37
+ return value
38
+
39
+ values = value.split(",") if "," in value else [value]
40
+
41
+ try:
42
+ return [float(val) for val in values]
43
+ except ValueError as err:
44
+ raise ValueError(
45
+ "must be a number or comma-separated list of numbers."
46
+ ) from err
47
+
48
+
49
+ T = TypeVar("T", bound="Scenario")
50
+
51
+
52
+ class Scenario(StandardBaseModel):
53
+ """
54
+ Parent Scenario class with common options for all benchmarking types.
55
+ """
56
+
57
+ target: str
58
+
59
+ @classmethod
60
+ def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
61
+ filename = SCENARIO_DIR / f"{name}.json"
62
+
63
+ if not filename.is_file():
64
+ raise ValueError(f"{name} is not a valid builtin scenario")
65
+
66
+ return cls.from_file(filename, overrides)
67
+
68
+
69
+ class GenerativeTextScenario(Scenario):
70
+ """
71
+ Scenario class for generative text benchmarks.
72
+ """
73
+
74
+ class Config:
75
+ # NOTE: This prevents errors due to unvalidatable
76
+ # types like PreTrainedTokenizerBase
77
+ arbitrary_types_allowed = True
78
+
79
+ backend_type: BackendType = "openai_http"
80
+ backend_args: Optional[dict[str, Any]] = None
81
+ model: Optional[str] = None
82
+ processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
83
+ processor_args: Optional[dict[str, Any]] = None
84
+ data: Union[
85
+ str,
86
+ Path,
87
+ Iterable[Union[str, dict[str, Any]]],
88
+ Dataset,
89
+ DatasetDict,
90
+ IterableDataset,
91
+ IterableDatasetDict,
92
+ ]
93
+ data_args: Optional[dict[str, Any]] = None
94
+ data_sampler: Optional[Literal["random"]] = None
95
+ rate_type: Union[StrategyType, ProfileType]
96
+ rate: Annotated[
97
+ Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
98
+ ] = None
99
+ max_seconds: Optional[PositiveFloat] = None
100
+ max_requests: Optional[PositiveInt] = None
101
+ warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
102
+ cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
103
+ output_sampling: Optional[NonNegativeInt] = None
104
+ random_seed: int = 42
File without changes
@@ -1,10 +1,15 @@
1
- from typing import Any, Generic, TypeVar
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Generic, Optional, TypeVar
2
4
 
5
+ import yaml
3
6
  from loguru import logger
4
7
  from pydantic import BaseModel, ConfigDict, Field
5
8
 
6
9
  __all__ = ["StandardBaseModel", "StatusBreakdown"]
7
10
 
11
+ T = TypeVar("T", bound="StandardBaseModel")
12
+
8
13
 
9
14
  class StandardBaseModel(BaseModel):
10
15
  """
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
27
32
  data,
28
33
  )
29
34
 
35
+ @classmethod
36
+ def get_default(cls: type[T], field: str) -> Any:
37
+ """Get default values for model fields"""
38
+ return cls.model_fields[field].default
39
+
40
+ @classmethod
41
+ def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
42
+ """
43
+ Attempt to create a new instance of the model using
44
+ data loaded from json or yaml file.
45
+ """
46
+ try:
47
+ with filename.open() as f:
48
+ if str(filename).endswith(".json"):
49
+ data = json.load(f)
50
+ else: # Assume everything else is yaml
51
+ data = yaml.safe_load(f)
52
+ except (json.JSONDecodeError, yaml.YAMLError) as e:
53
+ logger.error(f"Failed to parse {filename} as type {cls.__name__}")
54
+ raise ValueError(f"Error when parsing file: {filename}") from e
55
+
56
+ data.update(overrides)
57
+ return cls.model_validate(data)
58
+
30
59
 
31
60
  SuccessfulT = TypeVar("SuccessfulT")
32
61
  ErroredT = TypeVar("ErroredT")
guidellm/utils/cli.py ADDED
@@ -0,0 +1,62 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ import click
5
+
6
+
7
+ def parse_json(ctx, param, value): # noqa: ARG001
8
+ if value is None:
9
+ return None
10
+ try:
11
+ return json.loads(value)
12
+ except json.JSONDecodeError as err:
13
+ raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
14
+
15
+
16
+ def set_if_not_default(ctx: click.Context, **kwargs) -> dict[str, Any]:
17
+ """
18
+ Set the value of a click option if it is not the default value.
19
+ This is useful for setting options that are not None by default.
20
+ """
21
+ values = {}
22
+ for k, v in kwargs.items():
23
+ if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT: # type: ignore[attr-defined]
24
+ values[k] = v
25
+
26
+ return values
27
+
28
+
29
+ class Union(click.ParamType):
30
+ """
31
+ A custom click parameter type that allows for multiple types to be accepted.
32
+ """
33
+
34
+ def __init__(self, *types: click.ParamType):
35
+ self.types = types
36
+ self.name = "".join(t.name for t in types)
37
+
38
+ def convert(self, value, param, ctx):
39
+ fails = []
40
+ for t in self.types:
41
+ try:
42
+ return t.convert(value, param, ctx)
43
+ except click.BadParameter as e:
44
+ fails.append(str(e))
45
+ continue
46
+
47
+ self.fail("; ".join(fails) or f"Invalid value: {value}") # noqa: RET503
48
+
49
+ def get_metavar(self, param: click.Parameter) -> str:
50
+ def get_choices(t: click.ParamType) -> str:
51
+ meta = t.get_metavar(param)
52
+ return meta if meta is not None else t.name
53
+
54
+ # Get the choices for each type in the union.
55
+ choices_str = "|".join(map(get_choices, self.types))
56
+
57
+ # Use curly braces to indicate a required argument.
58
+ if param.required and param.param_type_name == "argument":
59
+ return f"{{{choices_str}}}"
60
+
61
+ # Use square braces to indicate an option or optional argument.
62
+ return f"[{choices_str}]"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.3.0a20
3
+ Version: 0.3.0a26
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License-Expression: Apache-2.0
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
208
208
 
209
209
  - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
210
210
 
211
+ ### GuideLLM UI
212
+
213
+ GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
214
+
215
+ ### 🛠 Running the UI
216
+
217
+ 1. Use the Hosted Build (Recommended for Most Users)
218
+
219
+ After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
220
+
221
+ ```
222
+ https://neuralmagic.github.io/guidellm/ui/dev/
223
+ ```
224
+
225
+ Open the file in your browser and you're done—no setup required.
226
+
227
+ 2. Build and Serve the UI Locally (For Development) This option is useful if:
228
+
229
+ - You are actively developing the UI
230
+
231
+ - You want to test changes to the UI before publishing
232
+
233
+ - You want full control over how the report is displayed
234
+
235
+ ```bash
236
+ npm install
237
+ npm run build
238
+ npx serve out
239
+ ```
240
+
241
+ This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
242
+
243
+ ### 🧪 Development Notes
244
+
245
+ During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
246
+
247
+ ```
248
+ src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
249
+ ```
250
+
251
+ In the future this will be replaced by a configurable untracked file for dev use.
252
+
211
253
  ## Resources
212
254
 
213
255
  ### Documentation
@@ -1,20 +1,22 @@
1
1
  guidellm/__init__.py,sha256=qXCx-HonNByJ2PDKqOUnD7CcbxA7cazNKyqKigwyuyE,1139
2
- guidellm/__main__.py,sha256=QwExTHhseN2JjHoclUPI1H_yspshOS_mtZ84466CnPE,11751
2
+ guidellm/__main__.py,sha256=ILU-GcMzioJOejVYEBvJzpWpeR09ar_hW55BP4scVrk,13546
3
3
  guidellm/config.py,sha256=-JuirSy1EDkQnXHfLKBV_PeQGFwi5nL088BKeCgh9Xo,6087
4
4
  guidellm/logger.py,sha256=O4sU2QKHn_swJIEmayiEt6nIXzGHGmXqZ_Mg8CdIE5Q,2609
5
5
  guidellm/version.py,sha256=XZeUwR24DzG1AjuKV0s8cRyc0Xv8cPiqXYSZTt7xQVg,127
6
6
  guidellm/backend/__init__.py,sha256=nQmxVNG9cDdNbY0NwjXxIRV4oyZKIhb6cGyuXOakFnk,489
7
- guidellm/backend/backend.py,sha256=C8zVY6Gq32OInBkc_Czm9SxByWWi4jeXSHt7rtQ3WAY,8971
8
- guidellm/backend/openai.py,sha256=FrMpJcQDtb49TAgrPINmcOguXy5XJDIfr613h6M9iVY,25196
7
+ guidellm/backend/backend.py,sha256=6jgfvUGbT46e7bEu5IZjm6YO3mVtme-vthJVuZBFEgQ,9227
8
+ guidellm/backend/openai.py,sha256=gB47-kYtFlQ4cFWuLG9i2mXJf1PW8kwDTKJ8hRmYXFs,26288
9
9
  guidellm/backend/response.py,sha256=ipv5amHOGHK0fW4rOIt1d8Vj1yXTizSh3Ux5MaN96KY,5139
10
10
  guidellm/benchmark/__init__.py,sha256=t5dEG93mmUtfQq6EycAG8awm28srOswlqsLp1hLZBQY,1775
11
11
  guidellm/benchmark/aggregator.py,sha256=UejpeLT2qAJDX4tgKHx-IUGxT6L4YP5bCq1vwSoxtKk,31312
12
- guidellm/benchmark/benchmark.py,sha256=aMEkGj0dYsVkhIu-KMpCPO0HmE4ngI0h3wss_--Z2T0,31794
12
+ guidellm/benchmark/benchmark.py,sha256=lIqYKfTMKvhdHKTty1GtcQPI8SnlCBRH41Mqchr6kfo,31724
13
13
  guidellm/benchmark/benchmarker.py,sha256=aiJGkOPZYhVDQiEaLDSC3925J2Z2ACyvsKXFCOLejj0,11738
14
- guidellm/benchmark/entrypoints.py,sha256=n1BFXctAuhj-tUtJcC7soj8kGQIwUJjGVhdciCu-00Y,4913
14
+ guidellm/benchmark/entrypoints.py,sha256=R88MM6nARXyIcqrI5kJPN3FWz40nmRx3TApJ2tCwBPM,5371
15
15
  guidellm/benchmark/output.py,sha256=Co2dotBH036Q7m_8s4DEuohIqJCS3NNejQvl9WXYPEU,34647
16
16
  guidellm/benchmark/profile.py,sha256=A-vovIl39d1GiI4cOYgWai_99yj2ExWVlx88LAbIm5I,13091
17
17
  guidellm/benchmark/progress.py,sha256=yydigV1E-jrF5cN__8m-1NiOlrdRaV68FgMBTZ7lK8E,25279
18
+ guidellm/benchmark/scenario.py,sha256=5DhqrcuIwwumLBD065yRvHQ-VBi2Vk3DWXPU1zeFGDE,3309
19
+ guidellm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  guidellm/data/__init__.py,sha256=sTZlJdpYcfFKkNk-CHtHm7_l8n9KwBhlM7qs247ELCY,120
19
21
  guidellm/data/prideandprejudice.txt.gz,sha256=dmze5vnnSZmTU5NP7E1zcH30bOOnpYu5Motlm57xpYk,241795
20
22
  guidellm/dataset/__init__.py,sha256=iWj58dDYdljYKHMeEVRol65YmsxC8C6ysPQw6v2mozw,593
@@ -25,7 +27,7 @@ guidellm/dataset/hf_datasets.py,sha256=oWHeC0Iup4E4QulWcZWiCcQ4jtjw9_sTINJ6EEBTC
25
27
  guidellm/dataset/in_memory.py,sha256=_p8CCW9xTTWG9aqbIE4mf9s3BVAZutneIPn-rEXT7xo,4699
26
28
  guidellm/dataset/synthetic.py,sha256=PXYj06X9QuSJ98oGR7PmFVQYuqwacbq5OCnt45X2LTs,8518
27
29
  guidellm/objects/__init__.py,sha256=FJg5REEBg9hSuMAYnGAud9IxBcASpZuDwYDYEmwdnTY,384
28
- guidellm/objects/pydantic.py,sha256=oF5SkXlNZLNXpnoGoJb3QHNvqDwFRLSsiPIecf8E-_g,1850
30
+ guidellm/objects/pydantic.py,sha256=l19IHTJVIet0547xZ18jdcT44rI5Bmf19ASV3vpxww4,2889
29
31
  guidellm/objects/statistics.py,sha256=xf0PyCnJjIcvgOyD_YHgqtvgN4W8-crGi9u3v5u78Tw,36950
30
32
  guidellm/preprocess/__init__.py,sha256=6mRs1atYwYkdX4txez_bEVk-_nCDsNt5Wo20eWZ24jA,112
31
33
  guidellm/preprocess/dataset.py,sha256=P-TJEebcWnUFGczImc7AOi90vJ4sakZtU8NSYWhJXlM,12318
@@ -39,14 +41,15 @@ guidellm/scheduler/strategy.py,sha256=MjSQvyBUK9-JIdPOHVEuTdNzzuIhVIfBeu2_Z98A7a
39
41
  guidellm/scheduler/types.py,sha256=zHZ94-zEYo4LkU3qrfT3BRoZioicDMCQDiY8hYHnkfI,130
40
42
  guidellm/scheduler/worker.py,sha256=f1FjI9JJRbz39rTIJRFJ-drcdavJGuPlJC_iQrUa4N0,17629
41
43
  guidellm/utils/__init__.py,sha256=l1PZxQvk6gYQshUCEPbB8CU42eB2mHn1TVDEKGYIA5c,651
44
+ guidellm/utils/cli.py,sha256=QTBgHBwcLZWI_VhviW09gfBwCcFXVsi-z7ldEVbqZcU,1945
42
45
  guidellm/utils/colors.py,sha256=D0IGz8A346-Pt5qgnP3S5uV-VgngJoXbfToVCOna41k,175
43
46
  guidellm/utils/hf_datasets.py,sha256=C99cB4StbhjC8XtnzLLGe6A0TYrs63EapQZJQmQr8dI,1023
44
47
  guidellm/utils/hf_transformers.py,sha256=3iF40l02VEWOcS8kasO8TSws0Lp3cE-NyiqoB9GnHuA,1021
45
48
  guidellm/utils/random.py,sha256=elA8HZ3AIN5T2pa7cgq35OVK__0SQmZVS4IzxJaOpvw,1310
46
49
  guidellm/utils/text.py,sha256=Xn6JUWy3B7gi1l0UkBFLizV9fnZ_kM3OQDMLlIZqsgE,6347
47
- guidellm-0.3.0a20.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
- guidellm-0.3.0a20.dist-info/METADATA,sha256=8-qSixd41x60736sdTudiXZeh8b3GmuSxEz5zV7CxRI,18061
49
- guidellm-0.3.0a20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
- guidellm-0.3.0a20.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
51
- guidellm-0.3.0a20.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
52
- guidellm-0.3.0a20.dist-info/RECORD,,
50
+ guidellm-0.3.0a26.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
+ guidellm-0.3.0a26.dist-info/METADATA,sha256=lnzfMGbcc2BK3JReXgfEFSAd7e2DLWHXEhmMjIYkZzc,19344
52
+ guidellm-0.3.0a26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ guidellm-0.3.0a26.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
54
+ guidellm-0.3.0a26.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
55
+ guidellm-0.3.0a26.dist-info/RECORD,,