guidellm 0.3.0a22__tar.gz → 0.3.0a26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (63) hide show
  1. {guidellm-0.3.0a22/src/guidellm.egg-info → guidellm-0.3.0a26}/PKG-INFO +43 -1
  2. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/README.md +42 -0
  3. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/__main__.py +90 -65
  4. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/openai.py +19 -7
  5. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/benchmark.py +1 -4
  6. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/entrypoints.py +15 -3
  7. guidellm-0.3.0a26/src/guidellm/benchmark/scenario.py +104 -0
  8. guidellm-0.3.0a26/src/guidellm/benchmark/scenarios/__init__.py +0 -0
  9. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/pydantic.py +30 -1
  10. guidellm-0.3.0a26/src/guidellm/utils/cli.py +62 -0
  11. guidellm-0.3.0a26/src/guidellm/version.py +6 -0
  12. {guidellm-0.3.0a22 → guidellm-0.3.0a26/src/guidellm.egg-info}/PKG-INFO +43 -1
  13. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/SOURCES.txt +3 -0
  14. guidellm-0.3.0a22/src/guidellm/version.py +0 -6
  15. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/LICENSE +0 -0
  16. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/MANIFEST.in +0 -0
  17. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/pyproject.toml +0 -0
  18. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/setup.cfg +0 -0
  19. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/setup.py +0 -0
  20. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/__init__.py +0 -0
  21. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/__init__.py +0 -0
  22. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/backend.py +0 -0
  23. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/response.py +0 -0
  24. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/__init__.py +0 -0
  25. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/aggregator.py +0 -0
  26. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/benchmarker.py +0 -0
  27. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/output.py +0 -0
  28. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/profile.py +0 -0
  29. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/progress.py +0 -0
  30. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/config.py +0 -0
  31. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/data/__init__.py +0 -0
  32. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/data/prideandprejudice.txt.gz +0 -0
  33. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/__init__.py +0 -0
  34. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/creator.py +0 -0
  35. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/entrypoints.py +0 -0
  36. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/file.py +0 -0
  37. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/hf_datasets.py +0 -0
  38. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/in_memory.py +0 -0
  39. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/synthetic.py +0 -0
  40. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/logger.py +0 -0
  41. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/__init__.py +0 -0
  42. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/statistics.py +0 -0
  43. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/preprocess/__init__.py +0 -0
  44. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/preprocess/dataset.py +0 -0
  45. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/request/__init__.py +0 -0
  46. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/request/loader.py +0 -0
  47. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/request/request.py +0 -0
  48. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/__init__.py +0 -0
  49. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/result.py +0 -0
  50. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/scheduler.py +0 -0
  51. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/strategy.py +0 -0
  52. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/types.py +0 -0
  53. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/worker.py +0 -0
  54. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/__init__.py +0 -0
  55. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/colors.py +0 -0
  56. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/hf_datasets.py +0 -0
  57. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/hf_transformers.py +0 -0
  58. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/random.py +0 -0
  59. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/text.py +0 -0
  60. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/dependency_links.txt +0 -0
  61. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/entry_points.txt +0 -0
  62. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/requires.txt +0 -0
  63. {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.3.0a22
3
+ Version: 0.3.0a26
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License-Expression: Apache-2.0
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
208
208
 
209
209
  - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
210
210
 
211
+ ### GuideLLM UI
212
+
213
+ GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
214
+
215
+ ### 🛠 Running the UI
216
+
217
+ 1. Use the Hosted Build (Recommended for Most Users)
218
+
219
+ After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
220
+
221
+ ```
222
+ https://neuralmagic.github.io/guidellm/ui/dev/
223
+ ```
224
+
225
+ Open the file in your browser and you're done—no setup required.
226
+
227
+ 2. Build and Serve the UI Locally (For Development) This option is useful if:
228
+
229
+ - You are actively developing the UI
230
+
231
+ - You want to test changes to the UI before publishing
232
+
233
+ - You want full control over how the report is displayed
234
+
235
+ ```bash
236
+ npm install
237
+ npm run build
238
+ npx serve out
239
+ ```
240
+
241
+ This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
242
+
243
+ ### 🧪 Development Notes
244
+
245
+ During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
246
+
247
+ ```
248
+ src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
249
+ ```
250
+
251
+ In the future this will be replaced by a configurable untracked file for dev use.
252
+
211
253
  ## Resources
212
254
 
213
255
  ### Documentation
@@ -153,6 +153,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
153
153
 
154
154
  - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
155
155
 
156
+ ### GuideLLM UI
157
+
158
+ GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
159
+
160
+ ### 🛠 Running the UI
161
+
162
+ 1. Use the Hosted Build (Recommended for Most Users)
163
+
164
+ After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
165
+
166
+ ```
167
+ https://neuralmagic.github.io/guidellm/ui/dev/
168
+ ```
169
+
170
+ Open the file in your browser and you're done—no setup required.
171
+
172
+ 2. Build and Serve the UI Locally (For Development) This option is useful if:
173
+
174
+ - You are actively developing the UI
175
+
176
+ - You want to test changes to the UI before publishing
177
+
178
+ - You want full control over how the report is displayed
179
+
180
+ ```bash
181
+ npm install
182
+ npm run build
183
+ npx serve out
184
+ ```
185
+
186
+ This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
187
+
188
+ ### 🧪 Development Notes
189
+
190
+ During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
191
+
192
+ ```
193
+ src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
194
+ ```
195
+
196
+ In the future this will be replaced by a configurable untracked file for dev use.
197
+
156
198
  ## Resources
157
199
 
158
200
  ### Documentation
@@ -1,56 +1,55 @@
1
1
  import asyncio
2
2
  import codecs
3
- import json
4
3
  from pathlib import Path
5
4
  from typing import get_args
6
5
 
7
6
  import click
7
+ from pydantic import ValidationError
8
8
 
9
9
  from guidellm.backend import BackendType
10
- from guidellm.benchmark import ProfileType, benchmark_generative_text
10
+ from guidellm.benchmark import ProfileType
11
+ from guidellm.benchmark.entrypoints import benchmark_with_scenario
12
+ from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
11
13
  from guidellm.config import print_config
12
14
  from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
13
15
  from guidellm.scheduler import StrategyType
16
+ from guidellm.utils import cli as cli_tools
14
17
 
15
18
  STRATEGY_PROFILE_CHOICES = set(
16
19
  list(get_args(ProfileType)) + list(get_args(StrategyType))
17
20
  )
18
21
 
19
22
 
20
- def parse_json(ctx, param, value): # noqa: ARG001
21
- if value is None:
22
- return None
23
- try:
24
- return json.loads(value)
25
- except json.JSONDecodeError as err:
26
- raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
27
-
28
-
29
- def parse_number_str(ctx, param, value): # noqa: ARG001
30
- if value is None:
31
- return None
32
-
33
- values = value.split(",") if "," in value else [value]
34
-
35
- try:
36
- return [float(val) for val in values]
37
- except ValueError as err:
38
- raise click.BadParameter(
39
- f"{param.name} must be a number or comma-separated list of numbers."
40
- ) from err
41
-
42
-
43
23
  @click.group()
44
24
  def cli():
45
25
  pass
46
26
 
47
27
 
48
28
  @cli.command(
49
- help="Run a benchmark against a generative model using the specified arguments."
29
+ help="Run a benchmark against a generative model using the specified arguments.",
30
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
31
+ )
32
+ @click.option(
33
+ "--scenario",
34
+ type=cli_tools.Union(
35
+ click.Path(
36
+ exists=True,
37
+ readable=True,
38
+ file_okay=True,
39
+ dir_okay=False,
40
+ path_type=Path, # type: ignore[type-var]
41
+ ),
42
+ click.Choice(get_builtin_scenarios()),
43
+ ),
44
+ default=None,
45
+ help=(
46
+ "The name of a builtin scenario or path to a config file. "
47
+ "Missing values from the config will use defaults. "
48
+ "Options specified on the commandline will override the scenario."
49
+ ),
50
50
  )
51
51
  @click.option(
52
52
  "--target",
53
- required=True,
54
53
  type=str,
55
54
  help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
56
55
  )
@@ -61,12 +60,12 @@ def cli():
61
60
  "The type of backend to use to run requests against. Defaults to 'openai_http'."
62
61
  f" Supported types: {', '.join(get_args(BackendType))}"
63
62
  ),
64
- default="openai_http",
63
+ default=GenerativeTextScenario.get_default("backend_type"),
65
64
  )
66
65
  @click.option(
67
66
  "--backend-args",
68
- callback=parse_json,
69
- default=None,
67
+ callback=cli_tools.parse_json,
68
+ default=GenerativeTextScenario.get_default("backend_args"),
70
69
  help=(
71
70
  "A JSON string containing any arguments to pass to the backend as a "
72
71
  "dict with **kwargs."
@@ -74,7 +73,7 @@ def cli():
74
73
  )
75
74
  @click.option(
76
75
  "--model",
77
- default=None,
76
+ default=GenerativeTextScenario.get_default("model"),
78
77
  type=str,
79
78
  help=(
80
79
  "The ID of the model to benchmark within the backend. "
@@ -83,7 +82,7 @@ def cli():
83
82
  )
84
83
  @click.option(
85
84
  "--processor",
86
- default=None,
85
+ default=GenerativeTextScenario.get_default("processor"),
87
86
  type=str,
88
87
  help=(
89
88
  "The processor or tokenizer to use to calculate token counts for statistics "
@@ -93,8 +92,8 @@ def cli():
93
92
  )
94
93
  @click.option(
95
94
  "--processor-args",
96
- default=None,
97
- callback=parse_json,
95
+ default=GenerativeTextScenario.get_default("processor_args"),
96
+ callback=cli_tools.parse_json,
98
97
  help=(
99
98
  "A JSON string containing any arguments to pass to the processor constructor "
100
99
  "as a dict with **kwargs."
@@ -102,7 +101,6 @@ def cli():
102
101
  )
103
102
  @click.option(
104
103
  "--data",
105
- required=True,
106
104
  type=str,
107
105
  help=(
108
106
  "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
@@ -112,7 +110,8 @@ def cli():
112
110
  )
113
111
  @click.option(
114
112
  "--data-args",
115
- callback=parse_json,
113
+ default=GenerativeTextScenario.get_default("data_args"),
114
+ callback=cli_tools.parse_json,
116
115
  help=(
117
116
  "A JSON string containing any arguments to pass to the dataset creation "
118
117
  "as a dict with **kwargs."
@@ -120,7 +119,7 @@ def cli():
120
119
  )
121
120
  @click.option(
122
121
  "--data-sampler",
123
- default=None,
122
+ default=GenerativeTextScenario.get_default("data_sampler"),
124
123
  type=click.Choice(["random"]),
125
124
  help=(
126
125
  "The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -129,7 +128,6 @@ def cli():
129
128
  )
130
129
  @click.option(
131
130
  "--rate-type",
132
- required=True,
133
131
  type=click.Choice(STRATEGY_PROFILE_CHOICES),
134
132
  help=(
135
133
  "The type of benchmark to run. "
@@ -138,8 +136,7 @@ def cli():
138
136
  )
139
137
  @click.option(
140
138
  "--rate",
141
- default=None,
142
- callback=parse_number_str,
139
+ default=GenerativeTextScenario.get_default("rate"),
143
140
  help=(
144
141
  "The rates to run the benchmark at. "
145
142
  "Can be a single number or a comma-separated list of numbers. "
@@ -152,6 +149,7 @@ def cli():
152
149
  @click.option(
153
150
  "--max-seconds",
154
151
  type=float,
152
+ default=GenerativeTextScenario.get_default("max_seconds"),
155
153
  help=(
156
154
  "The maximum number of seconds each benchmark can run for. "
157
155
  "If None, will run until max_requests or the data is exhausted."
@@ -160,6 +158,7 @@ def cli():
160
158
  @click.option(
161
159
  "--max-requests",
162
160
  type=int,
161
+ default=GenerativeTextScenario.get_default("max_requests"),
163
162
  help=(
164
163
  "The maximum number of requests each benchmark can run for. "
165
164
  "If None, will run until max_seconds or the data is exhausted."
@@ -168,7 +167,7 @@ def cli():
168
167
  @click.option(
169
168
  "--warmup-percent",
170
169
  type=float,
171
- default=None,
170
+ default=GenerativeTextScenario.get_default("warmup_percent"),
172
171
  help=(
173
172
  "The percent of the benchmark (based on max-seconds, max-requets, "
174
173
  "or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -178,6 +177,7 @@ def cli():
178
177
  @click.option(
179
178
  "--cooldown-percent",
180
179
  type=float,
180
+ default=GenerativeTextScenario.get_default("cooldown_percent"),
181
181
  help=(
182
182
  "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
183
183
  "of dataset) to run as a cooldown and not include in the final results. "
@@ -212,7 +212,7 @@ def cli():
212
212
  )
213
213
  @click.option(
214
214
  "--output-extras",
215
- callback=parse_json,
215
+ callback=cli_tools.parse_json,
216
216
  help="A JSON string of extra data to save with the output benchmarks",
217
217
  )
218
218
  @click.option(
@@ -222,15 +222,16 @@ def cli():
222
222
  "The number of samples to save in the output file. "
223
223
  "If None (default), will save all samples."
224
224
  ),
225
- default=None,
225
+ default=GenerativeTextScenario.get_default("output_sampling"),
226
226
  )
227
227
  @click.option(
228
228
  "--random-seed",
229
- default=42,
229
+ default=GenerativeTextScenario.get_default("random_seed"),
230
230
  type=int,
231
231
  help="The random seed to use for benchmarking to ensure reproducibility.",
232
232
  )
233
233
  def benchmark(
234
+ scenario,
234
235
  target,
235
236
  backend_type,
236
237
  backend_args,
@@ -254,30 +255,53 @@ def benchmark(
254
255
  output_sampling,
255
256
  random_seed,
256
257
  ):
258
+ click_ctx = click.get_current_context()
259
+
260
+ overrides = cli_tools.set_if_not_default(
261
+ click_ctx,
262
+ target=target,
263
+ backend_type=backend_type,
264
+ backend_args=backend_args,
265
+ model=model,
266
+ processor=processor,
267
+ processor_args=processor_args,
268
+ data=data,
269
+ data_args=data_args,
270
+ data_sampler=data_sampler,
271
+ rate_type=rate_type,
272
+ rate=rate,
273
+ max_seconds=max_seconds,
274
+ max_requests=max_requests,
275
+ warmup_percent=warmup_percent,
276
+ cooldown_percent=cooldown_percent,
277
+ output_sampling=output_sampling,
278
+ random_seed=random_seed,
279
+ )
280
+
281
+ try:
282
+ # If a scenario file was specified read from it
283
+ if scenario is None:
284
+ _scenario = GenerativeTextScenario.model_validate(overrides)
285
+ elif isinstance(scenario, Path):
286
+ _scenario = GenerativeTextScenario.from_file(scenario, overrides)
287
+ else: # Only builtins can make it here; click will catch anything else
288
+ _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
289
+ except ValidationError as e:
290
+ # Translate pydantic valdation error to click argument error
291
+ errs = e.errors(include_url=False, include_context=True, include_input=True)
292
+ param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
293
+ raise click.BadParameter(
294
+ errs[0]["msg"], ctx=click_ctx, param_hint=param_name
295
+ ) from e
296
+
257
297
  asyncio.run(
258
- benchmark_generative_text(
259
- target=target,
260
- backend_type=backend_type,
261
- backend_args=backend_args,
262
- model=model,
263
- processor=processor,
264
- processor_args=processor_args,
265
- data=data,
266
- data_args=data_args,
267
- data_sampler=data_sampler,
268
- rate_type=rate_type,
269
- rate=rate,
270
- max_seconds=max_seconds,
271
- max_requests=max_requests,
272
- warmup_percent=warmup_percent,
273
- cooldown_percent=cooldown_percent,
298
+ benchmark_with_scenario(
299
+ scenario=_scenario,
274
300
  show_progress=not disable_progress,
275
301
  show_progress_scheduler_stats=display_scheduler_stats,
276
302
  output_console=not disable_console_outputs,
277
303
  output_path=output_path,
278
304
  output_extras=output_extras,
279
- output_sampling=output_sampling,
280
- random_seed=random_seed,
281
305
  )
282
306
  )
283
307
 
@@ -316,7 +340,8 @@ def preprocess():
316
340
  "Convert a dataset to have specific prompt and output token sizes.\n"
317
341
  "DATA: Path to the input dataset or dataset ID.\n"
318
342
  "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
319
- )
343
+ ),
344
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
320
345
  )
321
346
  @click.argument(
322
347
  "data",
@@ -340,7 +365,7 @@ def preprocess():
340
365
  @click.option(
341
366
  "--processor-args",
342
367
  default=None,
343
- callback=parse_json,
368
+ callback=cli_tools.parse_json,
344
369
  help=(
345
370
  "A JSON string containing any arguments to pass to the processor constructor "
346
371
  "as a dict with **kwargs."
@@ -348,7 +373,7 @@ def preprocess():
348
373
  )
349
374
  @click.option(
350
375
  "--data-args",
351
- callback=parse_json,
376
+ callback=cli_tools.parse_json,
352
377
  help=(
353
378
  "A JSON string containing any arguments to pass to the dataset creation "
354
379
  "as a dict with **kwargs."
@@ -70,6 +70,14 @@ class OpenAIHTTPBackend(Backend):
70
70
  the values of these keys will be used as the parameters for the respective
71
71
  endpoint.
72
72
  If not provided, no extra query parameters are added.
73
+ :param extra_body: Body parameters to include in requests to the OpenAI server.
74
+ If "chat_completions", "models", or "text_completions" are included as keys,
75
+ the values of these keys will be included in the body for the respective
76
+ endpoint.
77
+ If not provided, no extra body parameters are added.
78
+ :param remove_from_body: Parameters that should be removed from the body of each
79
+ request.
80
+ If not provided, no parameters are removed from the body.
73
81
  """
74
82
 
75
83
  def __init__(
@@ -85,6 +93,7 @@ class OpenAIHTTPBackend(Backend):
85
93
  max_output_tokens: Optional[int] = None,
86
94
  extra_query: Optional[dict] = None,
87
95
  extra_body: Optional[dict] = None,
96
+ remove_from_body: Optional[list[str]] = None,
88
97
  ):
89
98
  super().__init__(type_="openai_http")
90
99
  self._target = target or settings.openai.base_url
@@ -122,6 +131,7 @@ class OpenAIHTTPBackend(Backend):
122
131
  )
123
132
  self.extra_query = extra_query
124
133
  self.extra_body = extra_body
134
+ self.remove_from_body = remove_from_body
125
135
  self._async_client: Optional[httpx.AsyncClient] = None
126
136
 
127
137
  @property
@@ -253,9 +263,8 @@ class OpenAIHTTPBackend(Backend):
253
263
 
254
264
  headers = self._headers()
255
265
  params = self._params(TEXT_COMPLETIONS)
256
- body = self._body(TEXT_COMPLETIONS)
257
266
  payload = self._completions_payload(
258
- body=body,
267
+ endpoint_type=TEXT_COMPLETIONS,
259
268
  orig_kwargs=kwargs,
260
269
  max_output_tokens=output_token_count,
261
270
  prompt=prompt,
@@ -330,12 +339,11 @@ class OpenAIHTTPBackend(Backend):
330
339
  logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
331
340
  headers = self._headers()
332
341
  params = self._params(CHAT_COMPLETIONS)
333
- body = self._body(CHAT_COMPLETIONS)
334
342
  messages = (
335
343
  content if raw_content else self._create_chat_messages(content=content)
336
344
  )
337
345
  payload = self._completions_payload(
338
- body=body,
346
+ endpoint_type=CHAT_COMPLETIONS,
339
347
  orig_kwargs=kwargs,
340
348
  max_output_tokens=output_token_count,
341
349
  messages=messages,
@@ -411,7 +419,7 @@ class OpenAIHTTPBackend(Backend):
411
419
 
412
420
  return self.extra_query
413
421
 
414
- def _body(self, endpoint_type: EndpointType) -> dict[str, str]:
422
+ def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
415
423
  if self.extra_body is None:
416
424
  return {}
417
425
 
@@ -426,12 +434,12 @@ class OpenAIHTTPBackend(Backend):
426
434
 
427
435
  def _completions_payload(
428
436
  self,
429
- body: Optional[dict],
437
+ endpoint_type: EndpointType,
430
438
  orig_kwargs: Optional[dict],
431
439
  max_output_tokens: Optional[int],
432
440
  **kwargs,
433
441
  ) -> dict:
434
- payload = body or {}
442
+ payload = self._extra_body(endpoint_type)
435
443
  payload.update(orig_kwargs or {})
436
444
  payload.update(kwargs)
437
445
  payload["model"] = self.model
@@ -455,6 +463,10 @@ class OpenAIHTTPBackend(Backend):
455
463
  payload["stop"] = None
456
464
  payload["ignore_eos"] = True
457
465
 
466
+ if self.remove_from_body:
467
+ for key in self.remove_from_body:
468
+ payload.pop(key, None)
469
+
458
470
  return payload
459
471
 
460
472
  @staticmethod
@@ -815,10 +815,7 @@ class GenerativeBenchmark(Benchmark):
815
815
  req.first_token_time or req.start_time
816
816
  for req in total_with_output_first
817
817
  ],
818
- iter_counts=[
819
- req.output_tokens
820
- for req in total_with_output_first
821
- ],
818
+ iter_counts=[req.output_tokens for req in total_with_output_first],
822
819
  first_iter_counts=[
823
820
  req.prompt_tokens for req in total_with_output_first
824
821
  ],
@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
15
15
  )
16
16
  from guidellm.benchmark.profile import ProfileType, create_profile
17
17
  from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18
+ from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
18
19
  from guidellm.request import GenerativeRequestLoader
19
20
  from guidellm.scheduler import StrategyType
20
21
 
21
22
 
23
+ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
24
+ """
25
+ Run a benchmark using a scenario and specify any extra arguments
26
+ """
27
+
28
+ if isinstance(scenario, GenerativeTextScenario):
29
+ return await benchmark_generative_text(**vars(scenario), **kwargs)
30
+ else:
31
+ raise ValueError(f"Unsupported Scenario type {type(scenario)}")
32
+
33
+
22
34
  async def benchmark_generative_text(
23
35
  target: str,
24
36
  backend_type: BackendType,
@@ -43,13 +55,13 @@ async def benchmark_generative_text(
43
55
  max_requests: Optional[int],
44
56
  warmup_percent: Optional[float],
45
57
  cooldown_percent: Optional[float],
46
- show_progress: bool,
47
- show_progress_scheduler_stats: bool,
48
- output_console: bool,
49
58
  output_path: Optional[Union[str, Path]],
50
59
  output_extras: Optional[dict[str, Any]],
51
60
  output_sampling: Optional[int],
52
61
  random_seed: int,
62
+ show_progress: bool = True,
63
+ show_progress_scheduler_stats: bool = False,
64
+ output_console: bool = True,
53
65
  ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
54
66
  console = GenerativeBenchmarksConsole(enabled=show_progress)
55
67
  console.print_line("Creating backend...")
@@ -0,0 +1,104 @@
1
+ from collections.abc import Iterable
2
+ from functools import cache
3
+ from pathlib import Path
4
+ from typing import Annotated, Any, Literal, Optional, TypeVar, Union
5
+
6
+ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
7
+ from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
8
+ from transformers.tokenization_utils_base import ( # type: ignore[import]
9
+ PreTrainedTokenizerBase,
10
+ )
11
+
12
+ from guidellm.backend.backend import BackendType
13
+ from guidellm.benchmark.profile import ProfileType
14
+ from guidellm.objects.pydantic import StandardBaseModel
15
+ from guidellm.scheduler.strategy import StrategyType
16
+
17
+ __ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
18
+
19
+ SCENARIO_DIR = Path(__file__).parent / "scenarios/"
20
+
21
+
22
+ @cache
23
+ def get_builtin_scenarios() -> list[str]:
24
+ """Returns list of builtin scenario names."""
25
+ return [p.stem for p in SCENARIO_DIR.glob("*.json")]
26
+
27
+
28
+ def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
29
+ """
30
+ Parse a comma separated string to a list of float
31
+ or convert single float list of one or pass float
32
+ list through.
33
+ """
34
+ if isinstance(value, (int, float)):
35
+ return [value]
36
+ elif isinstance(value, list):
37
+ return value
38
+
39
+ values = value.split(",") if "," in value else [value]
40
+
41
+ try:
42
+ return [float(val) for val in values]
43
+ except ValueError as err:
44
+ raise ValueError(
45
+ "must be a number or comma-separated list of numbers."
46
+ ) from err
47
+
48
+
49
+ T = TypeVar("T", bound="Scenario")
50
+
51
+
52
+ class Scenario(StandardBaseModel):
53
+ """
54
+ Parent Scenario class with common options for all benchmarking types.
55
+ """
56
+
57
+ target: str
58
+
59
+ @classmethod
60
+ def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
61
+ filename = SCENARIO_DIR / f"{name}.json"
62
+
63
+ if not filename.is_file():
64
+ raise ValueError(f"{name} is not a valid builtin scenario")
65
+
66
+ return cls.from_file(filename, overrides)
67
+
68
+
69
+ class GenerativeTextScenario(Scenario):
70
+ """
71
+ Scenario class for generative text benchmarks.
72
+ """
73
+
74
+ class Config:
75
+ # NOTE: This prevents errors due to unvalidatable
76
+ # types like PreTrainedTokenizerBase
77
+ arbitrary_types_allowed = True
78
+
79
+ backend_type: BackendType = "openai_http"
80
+ backend_args: Optional[dict[str, Any]] = None
81
+ model: Optional[str] = None
82
+ processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
83
+ processor_args: Optional[dict[str, Any]] = None
84
+ data: Union[
85
+ str,
86
+ Path,
87
+ Iterable[Union[str, dict[str, Any]]],
88
+ Dataset,
89
+ DatasetDict,
90
+ IterableDataset,
91
+ IterableDatasetDict,
92
+ ]
93
+ data_args: Optional[dict[str, Any]] = None
94
+ data_sampler: Optional[Literal["random"]] = None
95
+ rate_type: Union[StrategyType, ProfileType]
96
+ rate: Annotated[
97
+ Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
98
+ ] = None
99
+ max_seconds: Optional[PositiveFloat] = None
100
+ max_requests: Optional[PositiveInt] = None
101
+ warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
102
+ cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
103
+ output_sampling: Optional[NonNegativeInt] = None
104
+ random_seed: int = 42
@@ -1,10 +1,15 @@
1
- from typing import Any, Generic, TypeVar
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Generic, Optional, TypeVar
2
4
 
5
+ import yaml
3
6
  from loguru import logger
4
7
  from pydantic import BaseModel, ConfigDict, Field
5
8
 
6
9
  __all__ = ["StandardBaseModel", "StatusBreakdown"]
7
10
 
11
+ T = TypeVar("T", bound="StandardBaseModel")
12
+
8
13
 
9
14
  class StandardBaseModel(BaseModel):
10
15
  """
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
27
32
  data,
28
33
  )
29
34
 
35
+ @classmethod
36
+ def get_default(cls: type[T], field: str) -> Any:
37
+ """Get default values for model fields"""
38
+ return cls.model_fields[field].default
39
+
40
+ @classmethod
41
+ def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
42
+ """
43
+ Attempt to create a new instance of the model using
44
+ data loaded from json or yaml file.
45
+ """
46
+ try:
47
+ with filename.open() as f:
48
+ if str(filename).endswith(".json"):
49
+ data = json.load(f)
50
+ else: # Assume everything else is yaml
51
+ data = yaml.safe_load(f)
52
+ except (json.JSONDecodeError, yaml.YAMLError) as e:
53
+ logger.error(f"Failed to parse {filename} as type {cls.__name__}")
54
+ raise ValueError(f"Error when parsing file: {filename}") from e
55
+
56
+ data.update(overrides)
57
+ return cls.model_validate(data)
58
+
30
59
 
31
60
  SuccessfulT = TypeVar("SuccessfulT")
32
61
  ErroredT = TypeVar("ErroredT")
@@ -0,0 +1,62 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ import click
5
+
6
+
7
+ def parse_json(ctx, param, value): # noqa: ARG001
8
+ if value is None:
9
+ return None
10
+ try:
11
+ return json.loads(value)
12
+ except json.JSONDecodeError as err:
13
+ raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
14
+
15
+
16
+ def set_if_not_default(ctx: click.Context, **kwargs) -> dict[str, Any]:
17
+ """
18
+ Set the value of a click option if it is not the default value.
19
+ This is useful for setting options that are not None by default.
20
+ """
21
+ values = {}
22
+ for k, v in kwargs.items():
23
+ if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT: # type: ignore[attr-defined]
24
+ values[k] = v
25
+
26
+ return values
27
+
28
+
29
+ class Union(click.ParamType):
30
+ """
31
+ A custom click parameter type that allows for multiple types to be accepted.
32
+ """
33
+
34
+ def __init__(self, *types: click.ParamType):
35
+ self.types = types
36
+ self.name = "".join(t.name for t in types)
37
+
38
+ def convert(self, value, param, ctx):
39
+ fails = []
40
+ for t in self.types:
41
+ try:
42
+ return t.convert(value, param, ctx)
43
+ except click.BadParameter as e:
44
+ fails.append(str(e))
45
+ continue
46
+
47
+ self.fail("; ".join(fails) or f"Invalid value: {value}") # noqa: RET503
48
+
49
+ def get_metavar(self, param: click.Parameter) -> str:
50
+ def get_choices(t: click.ParamType) -> str:
51
+ meta = t.get_metavar(param)
52
+ return meta if meta is not None else t.name
53
+
54
+ # Get the choices for each type in the union.
55
+ choices_str = "|".join(map(get_choices, self.types))
56
+
57
+ # Use curly braces to indicate a required argument.
58
+ if param.required and param.param_type_name == "argument":
59
+ return f"{{{choices_str}}}"
60
+
61
+ # Use square braces to indicate an option or optional argument.
62
+ return f"[{choices_str}]"
@@ -0,0 +1,6 @@
1
+ version = "0.3.0a26"
2
+ build_type = "nightly"
3
+ build_iteration = "26"
4
+ git_commit = "0e78c65948eab356f2f846a0d5ae609ab650c290"
5
+ git_branch = "main"
6
+ git_last_tag = "v0.2.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.3.0a22
3
+ Version: 0.3.0a26
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License-Expression: Apache-2.0
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
208
208
 
209
209
  - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
210
210
 
211
+ ### GuideLLM UI
212
+
213
+ GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
214
+
215
+ ### 🛠 Running the UI
216
+
217
+ 1. Use the Hosted Build (Recommended for Most Users)
218
+
219
+ After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
220
+
221
+ ```
222
+ https://neuralmagic.github.io/guidellm/ui/dev/
223
+ ```
224
+
225
+ Open the file in your browser and you're done—no setup required.
226
+
227
+ 2. Build and Serve the UI Locally (For Development) This option is useful if:
228
+
229
+ - You are actively developing the UI
230
+
231
+ - You want to test changes to the UI before publishing
232
+
233
+ - You want full control over how the report is displayed
234
+
235
+ ```bash
236
+ npm install
237
+ npm run build
238
+ npx serve out
239
+ ```
240
+
241
+ This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
242
+
243
+ ### 🧪 Development Notes
244
+
245
+ During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
246
+
247
+ ```
248
+ src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
249
+ ```
250
+
251
+ In the future this will be replaced by a configurable untracked file for dev use.
252
+
211
253
  ## Resources
212
254
 
213
255
  ### Documentation
@@ -26,6 +26,8 @@ src/guidellm/benchmark/entrypoints.py
26
26
  src/guidellm/benchmark/output.py
27
27
  src/guidellm/benchmark/profile.py
28
28
  src/guidellm/benchmark/progress.py
29
+ src/guidellm/benchmark/scenario.py
30
+ src/guidellm/benchmark/scenarios/__init__.py
29
31
  src/guidellm/data/__init__.py
30
32
  src/guidellm/data/prideandprejudice.txt.gz
31
33
  src/guidellm/dataset/__init__.py
@@ -50,6 +52,7 @@ src/guidellm/scheduler/strategy.py
50
52
  src/guidellm/scheduler/types.py
51
53
  src/guidellm/scheduler/worker.py
52
54
  src/guidellm/utils/__init__.py
55
+ src/guidellm/utils/cli.py
53
56
  src/guidellm/utils/colors.py
54
57
  src/guidellm/utils/hf_datasets.py
55
58
  src/guidellm/utils/hf_transformers.py
@@ -1,6 +0,0 @@
1
- version = "0.3.0a22"
2
- build_type = "nightly"
3
- build_iteration = "22"
4
- git_commit = "023c8dd0a4ee9fc1be5ad60b7854f661423eb722"
5
- git_branch = "main"
6
- git_last_tag = "v0.2.1"
File without changes
File without changes
File without changes
File without changes
File without changes