guidellm 0.3.0a22__tar.gz → 0.3.0a26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- {guidellm-0.3.0a22/src/guidellm.egg-info → guidellm-0.3.0a26}/PKG-INFO +43 -1
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/README.md +42 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/__main__.py +90 -65
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/openai.py +19 -7
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/benchmark.py +1 -4
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/entrypoints.py +15 -3
- guidellm-0.3.0a26/src/guidellm/benchmark/scenario.py +104 -0
- guidellm-0.3.0a26/src/guidellm/benchmark/scenarios/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/pydantic.py +30 -1
- guidellm-0.3.0a26/src/guidellm/utils/cli.py +62 -0
- guidellm-0.3.0a26/src/guidellm/version.py +6 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26/src/guidellm.egg-info}/PKG-INFO +43 -1
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/SOURCES.txt +3 -0
- guidellm-0.3.0a22/src/guidellm/version.py +0 -6
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/LICENSE +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/MANIFEST.in +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/pyproject.toml +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/setup.cfg +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/setup.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/backend.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/response.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/aggregator.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/benchmarker.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/output.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/profile.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/progress.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/config.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/data/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/data/prideandprejudice.txt.gz +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/creator.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/entrypoints.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/file.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/hf_datasets.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/in_memory.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/dataset/synthetic.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/logger.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/statistics.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/preprocess/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/preprocess/dataset.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/request/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/request/loader.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/request/request.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/result.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/scheduler.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/strategy.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/types.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/scheduler/worker.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/__init__.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/colors.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/hf_datasets.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/hf_transformers.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/random.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/utils/text.py +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/dependency_links.txt +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/entry_points.txt +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/requires.txt +0 -0
- {guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: guidellm
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.0a26
|
|
4
4
|
Summary: Guidance platform for deploying and managing large language models.
|
|
5
5
|
Author: Red Hat
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
|
|
|
208
208
|
|
|
209
209
|
- `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
|
|
210
210
|
|
|
211
|
+
### GuideLLM UI
|
|
212
|
+
|
|
213
|
+
GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
|
|
214
|
+
|
|
215
|
+
### 🛠 Running the UI
|
|
216
|
+
|
|
217
|
+
1. Use the Hosted Build (Recommended for Most Users)
|
|
218
|
+
|
|
219
|
+
After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
|
|
220
|
+
|
|
221
|
+
```
|
|
222
|
+
https://neuralmagic.github.io/guidellm/ui/dev/
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Open the file in your browser and you're done—no setup required.
|
|
226
|
+
|
|
227
|
+
2. Build and Serve the UI Locally (For Development) This option is useful if:
|
|
228
|
+
|
|
229
|
+
- You are actively developing the UI
|
|
230
|
+
|
|
231
|
+
- You want to test changes to the UI before publishing
|
|
232
|
+
|
|
233
|
+
- You want full control over how the report is displayed
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
npm install
|
|
237
|
+
npm run build
|
|
238
|
+
npx serve out
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
|
|
242
|
+
|
|
243
|
+
### 🧪 Development Notes
|
|
244
|
+
|
|
245
|
+
During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
In the future this will be replaced by a configurable untracked file for dev use.
|
|
252
|
+
|
|
211
253
|
## Resources
|
|
212
254
|
|
|
213
255
|
### Documentation
|
|
@@ -153,6 +153,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
|
|
|
153
153
|
|
|
154
154
|
- `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
|
|
155
155
|
|
|
156
|
+
### GuideLLM UI
|
|
157
|
+
|
|
158
|
+
GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
|
|
159
|
+
|
|
160
|
+
### 🛠 Running the UI
|
|
161
|
+
|
|
162
|
+
1. Use the Hosted Build (Recommended for Most Users)
|
|
163
|
+
|
|
164
|
+
After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
https://neuralmagic.github.io/guidellm/ui/dev/
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Open the file in your browser and you're done—no setup required.
|
|
171
|
+
|
|
172
|
+
2. Build and Serve the UI Locally (For Development) This option is useful if:
|
|
173
|
+
|
|
174
|
+
- You are actively developing the UI
|
|
175
|
+
|
|
176
|
+
- You want to test changes to the UI before publishing
|
|
177
|
+
|
|
178
|
+
- You want full control over how the report is displayed
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
npm install
|
|
182
|
+
npm run build
|
|
183
|
+
npx serve out
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
|
|
187
|
+
|
|
188
|
+
### 🧪 Development Notes
|
|
189
|
+
|
|
190
|
+
During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
|
|
191
|
+
|
|
192
|
+
```
|
|
193
|
+
src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
In the future this will be replaced by a configurable untracked file for dev use.
|
|
197
|
+
|
|
156
198
|
## Resources
|
|
157
199
|
|
|
158
200
|
### Documentation
|
|
@@ -1,56 +1,55 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import codecs
|
|
3
|
-
import json
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import get_args
|
|
6
5
|
|
|
7
6
|
import click
|
|
7
|
+
from pydantic import ValidationError
|
|
8
8
|
|
|
9
9
|
from guidellm.backend import BackendType
|
|
10
|
-
from guidellm.benchmark import ProfileType
|
|
10
|
+
from guidellm.benchmark import ProfileType
|
|
11
|
+
from guidellm.benchmark.entrypoints import benchmark_with_scenario
|
|
12
|
+
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
|
|
11
13
|
from guidellm.config import print_config
|
|
12
14
|
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
|
|
13
15
|
from guidellm.scheduler import StrategyType
|
|
16
|
+
from guidellm.utils import cli as cli_tools
|
|
14
17
|
|
|
15
18
|
STRATEGY_PROFILE_CHOICES = set(
|
|
16
19
|
list(get_args(ProfileType)) + list(get_args(StrategyType))
|
|
17
20
|
)
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
def parse_json(ctx, param, value): # noqa: ARG001
|
|
21
|
-
if value is None:
|
|
22
|
-
return None
|
|
23
|
-
try:
|
|
24
|
-
return json.loads(value)
|
|
25
|
-
except json.JSONDecodeError as err:
|
|
26
|
-
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def parse_number_str(ctx, param, value): # noqa: ARG001
|
|
30
|
-
if value is None:
|
|
31
|
-
return None
|
|
32
|
-
|
|
33
|
-
values = value.split(",") if "," in value else [value]
|
|
34
|
-
|
|
35
|
-
try:
|
|
36
|
-
return [float(val) for val in values]
|
|
37
|
-
except ValueError as err:
|
|
38
|
-
raise click.BadParameter(
|
|
39
|
-
f"{param.name} must be a number or comma-separated list of numbers."
|
|
40
|
-
) from err
|
|
41
|
-
|
|
42
|
-
|
|
43
23
|
@click.group()
|
|
44
24
|
def cli():
|
|
45
25
|
pass
|
|
46
26
|
|
|
47
27
|
|
|
48
28
|
@cli.command(
|
|
49
|
-
help="Run a benchmark against a generative model using the specified arguments."
|
|
29
|
+
help="Run a benchmark against a generative model using the specified arguments.",
|
|
30
|
+
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
31
|
+
)
|
|
32
|
+
@click.option(
|
|
33
|
+
"--scenario",
|
|
34
|
+
type=cli_tools.Union(
|
|
35
|
+
click.Path(
|
|
36
|
+
exists=True,
|
|
37
|
+
readable=True,
|
|
38
|
+
file_okay=True,
|
|
39
|
+
dir_okay=False,
|
|
40
|
+
path_type=Path, # type: ignore[type-var]
|
|
41
|
+
),
|
|
42
|
+
click.Choice(get_builtin_scenarios()),
|
|
43
|
+
),
|
|
44
|
+
default=None,
|
|
45
|
+
help=(
|
|
46
|
+
"The name of a builtin scenario or path to a config file. "
|
|
47
|
+
"Missing values from the config will use defaults. "
|
|
48
|
+
"Options specified on the commandline will override the scenario."
|
|
49
|
+
),
|
|
50
50
|
)
|
|
51
51
|
@click.option(
|
|
52
52
|
"--target",
|
|
53
|
-
required=True,
|
|
54
53
|
type=str,
|
|
55
54
|
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
|
|
56
55
|
)
|
|
@@ -61,12 +60,12 @@ def cli():
|
|
|
61
60
|
"The type of backend to use to run requests against. Defaults to 'openai_http'."
|
|
62
61
|
f" Supported types: {', '.join(get_args(BackendType))}"
|
|
63
62
|
),
|
|
64
|
-
default="
|
|
63
|
+
default=GenerativeTextScenario.get_default("backend_type"),
|
|
65
64
|
)
|
|
66
65
|
@click.option(
|
|
67
66
|
"--backend-args",
|
|
68
|
-
callback=parse_json,
|
|
69
|
-
default=
|
|
67
|
+
callback=cli_tools.parse_json,
|
|
68
|
+
default=GenerativeTextScenario.get_default("backend_args"),
|
|
70
69
|
help=(
|
|
71
70
|
"A JSON string containing any arguments to pass to the backend as a "
|
|
72
71
|
"dict with **kwargs."
|
|
@@ -74,7 +73,7 @@ def cli():
|
|
|
74
73
|
)
|
|
75
74
|
@click.option(
|
|
76
75
|
"--model",
|
|
77
|
-
default=
|
|
76
|
+
default=GenerativeTextScenario.get_default("model"),
|
|
78
77
|
type=str,
|
|
79
78
|
help=(
|
|
80
79
|
"The ID of the model to benchmark within the backend. "
|
|
@@ -83,7 +82,7 @@ def cli():
|
|
|
83
82
|
)
|
|
84
83
|
@click.option(
|
|
85
84
|
"--processor",
|
|
86
|
-
default=
|
|
85
|
+
default=GenerativeTextScenario.get_default("processor"),
|
|
87
86
|
type=str,
|
|
88
87
|
help=(
|
|
89
88
|
"The processor or tokenizer to use to calculate token counts for statistics "
|
|
@@ -93,8 +92,8 @@ def cli():
|
|
|
93
92
|
)
|
|
94
93
|
@click.option(
|
|
95
94
|
"--processor-args",
|
|
96
|
-
default=
|
|
97
|
-
callback=parse_json,
|
|
95
|
+
default=GenerativeTextScenario.get_default("processor_args"),
|
|
96
|
+
callback=cli_tools.parse_json,
|
|
98
97
|
help=(
|
|
99
98
|
"A JSON string containing any arguments to pass to the processor constructor "
|
|
100
99
|
"as a dict with **kwargs."
|
|
@@ -102,7 +101,6 @@ def cli():
|
|
|
102
101
|
)
|
|
103
102
|
@click.option(
|
|
104
103
|
"--data",
|
|
105
|
-
required=True,
|
|
106
104
|
type=str,
|
|
107
105
|
help=(
|
|
108
106
|
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
|
|
@@ -112,7 +110,8 @@ def cli():
|
|
|
112
110
|
)
|
|
113
111
|
@click.option(
|
|
114
112
|
"--data-args",
|
|
115
|
-
|
|
113
|
+
default=GenerativeTextScenario.get_default("data_args"),
|
|
114
|
+
callback=cli_tools.parse_json,
|
|
116
115
|
help=(
|
|
117
116
|
"A JSON string containing any arguments to pass to the dataset creation "
|
|
118
117
|
"as a dict with **kwargs."
|
|
@@ -120,7 +119,7 @@ def cli():
|
|
|
120
119
|
)
|
|
121
120
|
@click.option(
|
|
122
121
|
"--data-sampler",
|
|
123
|
-
default=
|
|
122
|
+
default=GenerativeTextScenario.get_default("data_sampler"),
|
|
124
123
|
type=click.Choice(["random"]),
|
|
125
124
|
help=(
|
|
126
125
|
"The data sampler type to use. 'random' will add a random shuffle on the data. "
|
|
@@ -129,7 +128,6 @@ def cli():
|
|
|
129
128
|
)
|
|
130
129
|
@click.option(
|
|
131
130
|
"--rate-type",
|
|
132
|
-
required=True,
|
|
133
131
|
type=click.Choice(STRATEGY_PROFILE_CHOICES),
|
|
134
132
|
help=(
|
|
135
133
|
"The type of benchmark to run. "
|
|
@@ -138,8 +136,7 @@ def cli():
|
|
|
138
136
|
)
|
|
139
137
|
@click.option(
|
|
140
138
|
"--rate",
|
|
141
|
-
default=
|
|
142
|
-
callback=parse_number_str,
|
|
139
|
+
default=GenerativeTextScenario.get_default("rate"),
|
|
143
140
|
help=(
|
|
144
141
|
"The rates to run the benchmark at. "
|
|
145
142
|
"Can be a single number or a comma-separated list of numbers. "
|
|
@@ -152,6 +149,7 @@ def cli():
|
|
|
152
149
|
@click.option(
|
|
153
150
|
"--max-seconds",
|
|
154
151
|
type=float,
|
|
152
|
+
default=GenerativeTextScenario.get_default("max_seconds"),
|
|
155
153
|
help=(
|
|
156
154
|
"The maximum number of seconds each benchmark can run for. "
|
|
157
155
|
"If None, will run until max_requests or the data is exhausted."
|
|
@@ -160,6 +158,7 @@ def cli():
|
|
|
160
158
|
@click.option(
|
|
161
159
|
"--max-requests",
|
|
162
160
|
type=int,
|
|
161
|
+
default=GenerativeTextScenario.get_default("max_requests"),
|
|
163
162
|
help=(
|
|
164
163
|
"The maximum number of requests each benchmark can run for. "
|
|
165
164
|
"If None, will run until max_seconds or the data is exhausted."
|
|
@@ -168,7 +167,7 @@ def cli():
|
|
|
168
167
|
@click.option(
|
|
169
168
|
"--warmup-percent",
|
|
170
169
|
type=float,
|
|
171
|
-
default=
|
|
170
|
+
default=GenerativeTextScenario.get_default("warmup_percent"),
|
|
172
171
|
help=(
|
|
173
172
|
"The percent of the benchmark (based on max-seconds, max-requets, "
|
|
174
173
|
"or lenth of dataset) to run as a warmup and not include in the final results. "
|
|
@@ -178,6 +177,7 @@ def cli():
|
|
|
178
177
|
@click.option(
|
|
179
178
|
"--cooldown-percent",
|
|
180
179
|
type=float,
|
|
180
|
+
default=GenerativeTextScenario.get_default("cooldown_percent"),
|
|
181
181
|
help=(
|
|
182
182
|
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
|
|
183
183
|
"of dataset) to run as a cooldown and not include in the final results. "
|
|
@@ -212,7 +212,7 @@ def cli():
|
|
|
212
212
|
)
|
|
213
213
|
@click.option(
|
|
214
214
|
"--output-extras",
|
|
215
|
-
callback=parse_json,
|
|
215
|
+
callback=cli_tools.parse_json,
|
|
216
216
|
help="A JSON string of extra data to save with the output benchmarks",
|
|
217
217
|
)
|
|
218
218
|
@click.option(
|
|
@@ -222,15 +222,16 @@ def cli():
|
|
|
222
222
|
"The number of samples to save in the output file. "
|
|
223
223
|
"If None (default), will save all samples."
|
|
224
224
|
),
|
|
225
|
-
default=
|
|
225
|
+
default=GenerativeTextScenario.get_default("output_sampling"),
|
|
226
226
|
)
|
|
227
227
|
@click.option(
|
|
228
228
|
"--random-seed",
|
|
229
|
-
default=
|
|
229
|
+
default=GenerativeTextScenario.get_default("random_seed"),
|
|
230
230
|
type=int,
|
|
231
231
|
help="The random seed to use for benchmarking to ensure reproducibility.",
|
|
232
232
|
)
|
|
233
233
|
def benchmark(
|
|
234
|
+
scenario,
|
|
234
235
|
target,
|
|
235
236
|
backend_type,
|
|
236
237
|
backend_args,
|
|
@@ -254,30 +255,53 @@ def benchmark(
|
|
|
254
255
|
output_sampling,
|
|
255
256
|
random_seed,
|
|
256
257
|
):
|
|
258
|
+
click_ctx = click.get_current_context()
|
|
259
|
+
|
|
260
|
+
overrides = cli_tools.set_if_not_default(
|
|
261
|
+
click_ctx,
|
|
262
|
+
target=target,
|
|
263
|
+
backend_type=backend_type,
|
|
264
|
+
backend_args=backend_args,
|
|
265
|
+
model=model,
|
|
266
|
+
processor=processor,
|
|
267
|
+
processor_args=processor_args,
|
|
268
|
+
data=data,
|
|
269
|
+
data_args=data_args,
|
|
270
|
+
data_sampler=data_sampler,
|
|
271
|
+
rate_type=rate_type,
|
|
272
|
+
rate=rate,
|
|
273
|
+
max_seconds=max_seconds,
|
|
274
|
+
max_requests=max_requests,
|
|
275
|
+
warmup_percent=warmup_percent,
|
|
276
|
+
cooldown_percent=cooldown_percent,
|
|
277
|
+
output_sampling=output_sampling,
|
|
278
|
+
random_seed=random_seed,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
# If a scenario file was specified read from it
|
|
283
|
+
if scenario is None:
|
|
284
|
+
_scenario = GenerativeTextScenario.model_validate(overrides)
|
|
285
|
+
elif isinstance(scenario, Path):
|
|
286
|
+
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
|
|
287
|
+
else: # Only builtins can make it here; click will catch anything else
|
|
288
|
+
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
|
|
289
|
+
except ValidationError as e:
|
|
290
|
+
# Translate pydantic valdation error to click argument error
|
|
291
|
+
errs = e.errors(include_url=False, include_context=True, include_input=True)
|
|
292
|
+
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
|
|
293
|
+
raise click.BadParameter(
|
|
294
|
+
errs[0]["msg"], ctx=click_ctx, param_hint=param_name
|
|
295
|
+
) from e
|
|
296
|
+
|
|
257
297
|
asyncio.run(
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
backend_type=backend_type,
|
|
261
|
-
backend_args=backend_args,
|
|
262
|
-
model=model,
|
|
263
|
-
processor=processor,
|
|
264
|
-
processor_args=processor_args,
|
|
265
|
-
data=data,
|
|
266
|
-
data_args=data_args,
|
|
267
|
-
data_sampler=data_sampler,
|
|
268
|
-
rate_type=rate_type,
|
|
269
|
-
rate=rate,
|
|
270
|
-
max_seconds=max_seconds,
|
|
271
|
-
max_requests=max_requests,
|
|
272
|
-
warmup_percent=warmup_percent,
|
|
273
|
-
cooldown_percent=cooldown_percent,
|
|
298
|
+
benchmark_with_scenario(
|
|
299
|
+
scenario=_scenario,
|
|
274
300
|
show_progress=not disable_progress,
|
|
275
301
|
show_progress_scheduler_stats=display_scheduler_stats,
|
|
276
302
|
output_console=not disable_console_outputs,
|
|
277
303
|
output_path=output_path,
|
|
278
304
|
output_extras=output_extras,
|
|
279
|
-
output_sampling=output_sampling,
|
|
280
|
-
random_seed=random_seed,
|
|
281
305
|
)
|
|
282
306
|
)
|
|
283
307
|
|
|
@@ -316,7 +340,8 @@ def preprocess():
|
|
|
316
340
|
"Convert a dataset to have specific prompt and output token sizes.\n"
|
|
317
341
|
"DATA: Path to the input dataset or dataset ID.\n"
|
|
318
342
|
"OUTPUT_PATH: Path to save the converted dataset, including file suffix."
|
|
319
|
-
)
|
|
343
|
+
),
|
|
344
|
+
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
320
345
|
)
|
|
321
346
|
@click.argument(
|
|
322
347
|
"data",
|
|
@@ -340,7 +365,7 @@ def preprocess():
|
|
|
340
365
|
@click.option(
|
|
341
366
|
"--processor-args",
|
|
342
367
|
default=None,
|
|
343
|
-
callback=parse_json,
|
|
368
|
+
callback=cli_tools.parse_json,
|
|
344
369
|
help=(
|
|
345
370
|
"A JSON string containing any arguments to pass to the processor constructor "
|
|
346
371
|
"as a dict with **kwargs."
|
|
@@ -348,7 +373,7 @@ def preprocess():
|
|
|
348
373
|
)
|
|
349
374
|
@click.option(
|
|
350
375
|
"--data-args",
|
|
351
|
-
callback=parse_json,
|
|
376
|
+
callback=cli_tools.parse_json,
|
|
352
377
|
help=(
|
|
353
378
|
"A JSON string containing any arguments to pass to the dataset creation "
|
|
354
379
|
"as a dict with **kwargs."
|
|
@@ -70,6 +70,14 @@ class OpenAIHTTPBackend(Backend):
|
|
|
70
70
|
the values of these keys will be used as the parameters for the respective
|
|
71
71
|
endpoint.
|
|
72
72
|
If not provided, no extra query parameters are added.
|
|
73
|
+
:param extra_body: Body parameters to include in requests to the OpenAI server.
|
|
74
|
+
If "chat_completions", "models", or "text_completions" are included as keys,
|
|
75
|
+
the values of these keys will be included in the body for the respective
|
|
76
|
+
endpoint.
|
|
77
|
+
If not provided, no extra body parameters are added.
|
|
78
|
+
:param remove_from_body: Parameters that should be removed from the body of each
|
|
79
|
+
request.
|
|
80
|
+
If not provided, no parameters are removed from the body.
|
|
73
81
|
"""
|
|
74
82
|
|
|
75
83
|
def __init__(
|
|
@@ -85,6 +93,7 @@ class OpenAIHTTPBackend(Backend):
|
|
|
85
93
|
max_output_tokens: Optional[int] = None,
|
|
86
94
|
extra_query: Optional[dict] = None,
|
|
87
95
|
extra_body: Optional[dict] = None,
|
|
96
|
+
remove_from_body: Optional[list[str]] = None,
|
|
88
97
|
):
|
|
89
98
|
super().__init__(type_="openai_http")
|
|
90
99
|
self._target = target or settings.openai.base_url
|
|
@@ -122,6 +131,7 @@ class OpenAIHTTPBackend(Backend):
|
|
|
122
131
|
)
|
|
123
132
|
self.extra_query = extra_query
|
|
124
133
|
self.extra_body = extra_body
|
|
134
|
+
self.remove_from_body = remove_from_body
|
|
125
135
|
self._async_client: Optional[httpx.AsyncClient] = None
|
|
126
136
|
|
|
127
137
|
@property
|
|
@@ -253,9 +263,8 @@ class OpenAIHTTPBackend(Backend):
|
|
|
253
263
|
|
|
254
264
|
headers = self._headers()
|
|
255
265
|
params = self._params(TEXT_COMPLETIONS)
|
|
256
|
-
body = self._body(TEXT_COMPLETIONS)
|
|
257
266
|
payload = self._completions_payload(
|
|
258
|
-
|
|
267
|
+
endpoint_type=TEXT_COMPLETIONS,
|
|
259
268
|
orig_kwargs=kwargs,
|
|
260
269
|
max_output_tokens=output_token_count,
|
|
261
270
|
prompt=prompt,
|
|
@@ -330,12 +339,11 @@ class OpenAIHTTPBackend(Backend):
|
|
|
330
339
|
logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
|
|
331
340
|
headers = self._headers()
|
|
332
341
|
params = self._params(CHAT_COMPLETIONS)
|
|
333
|
-
body = self._body(CHAT_COMPLETIONS)
|
|
334
342
|
messages = (
|
|
335
343
|
content if raw_content else self._create_chat_messages(content=content)
|
|
336
344
|
)
|
|
337
345
|
payload = self._completions_payload(
|
|
338
|
-
|
|
346
|
+
endpoint_type=CHAT_COMPLETIONS,
|
|
339
347
|
orig_kwargs=kwargs,
|
|
340
348
|
max_output_tokens=output_token_count,
|
|
341
349
|
messages=messages,
|
|
@@ -411,7 +419,7 @@ class OpenAIHTTPBackend(Backend):
|
|
|
411
419
|
|
|
412
420
|
return self.extra_query
|
|
413
421
|
|
|
414
|
-
def
|
|
422
|
+
def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
|
|
415
423
|
if self.extra_body is None:
|
|
416
424
|
return {}
|
|
417
425
|
|
|
@@ -426,12 +434,12 @@ class OpenAIHTTPBackend(Backend):
|
|
|
426
434
|
|
|
427
435
|
def _completions_payload(
|
|
428
436
|
self,
|
|
429
|
-
|
|
437
|
+
endpoint_type: EndpointType,
|
|
430
438
|
orig_kwargs: Optional[dict],
|
|
431
439
|
max_output_tokens: Optional[int],
|
|
432
440
|
**kwargs,
|
|
433
441
|
) -> dict:
|
|
434
|
-
payload =
|
|
442
|
+
payload = self._extra_body(endpoint_type)
|
|
435
443
|
payload.update(orig_kwargs or {})
|
|
436
444
|
payload.update(kwargs)
|
|
437
445
|
payload["model"] = self.model
|
|
@@ -455,6 +463,10 @@ class OpenAIHTTPBackend(Backend):
|
|
|
455
463
|
payload["stop"] = None
|
|
456
464
|
payload["ignore_eos"] = True
|
|
457
465
|
|
|
466
|
+
if self.remove_from_body:
|
|
467
|
+
for key in self.remove_from_body:
|
|
468
|
+
payload.pop(key, None)
|
|
469
|
+
|
|
458
470
|
return payload
|
|
459
471
|
|
|
460
472
|
@staticmethod
|
|
@@ -815,10 +815,7 @@ class GenerativeBenchmark(Benchmark):
|
|
|
815
815
|
req.first_token_time or req.start_time
|
|
816
816
|
for req in total_with_output_first
|
|
817
817
|
],
|
|
818
|
-
iter_counts=[
|
|
819
|
-
req.output_tokens
|
|
820
|
-
for req in total_with_output_first
|
|
821
|
-
],
|
|
818
|
+
iter_counts=[req.output_tokens for req in total_with_output_first],
|
|
822
819
|
first_iter_counts=[
|
|
823
820
|
req.prompt_tokens for req in total_with_output_first
|
|
824
821
|
],
|
|
@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
|
|
|
15
15
|
)
|
|
16
16
|
from guidellm.benchmark.profile import ProfileType, create_profile
|
|
17
17
|
from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
|
|
18
|
+
from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
|
|
18
19
|
from guidellm.request import GenerativeRequestLoader
|
|
19
20
|
from guidellm.scheduler import StrategyType
|
|
20
21
|
|
|
21
22
|
|
|
23
|
+
async def benchmark_with_scenario(scenario: Scenario, **kwargs):
|
|
24
|
+
"""
|
|
25
|
+
Run a benchmark using a scenario and specify any extra arguments
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
if isinstance(scenario, GenerativeTextScenario):
|
|
29
|
+
return await benchmark_generative_text(**vars(scenario), **kwargs)
|
|
30
|
+
else:
|
|
31
|
+
raise ValueError(f"Unsupported Scenario type {type(scenario)}")
|
|
32
|
+
|
|
33
|
+
|
|
22
34
|
async def benchmark_generative_text(
|
|
23
35
|
target: str,
|
|
24
36
|
backend_type: BackendType,
|
|
@@ -43,13 +55,13 @@ async def benchmark_generative_text(
|
|
|
43
55
|
max_requests: Optional[int],
|
|
44
56
|
warmup_percent: Optional[float],
|
|
45
57
|
cooldown_percent: Optional[float],
|
|
46
|
-
show_progress: bool,
|
|
47
|
-
show_progress_scheduler_stats: bool,
|
|
48
|
-
output_console: bool,
|
|
49
58
|
output_path: Optional[Union[str, Path]],
|
|
50
59
|
output_extras: Optional[dict[str, Any]],
|
|
51
60
|
output_sampling: Optional[int],
|
|
52
61
|
random_seed: int,
|
|
62
|
+
show_progress: bool = True,
|
|
63
|
+
show_progress_scheduler_stats: bool = False,
|
|
64
|
+
output_console: bool = True,
|
|
53
65
|
) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
|
|
54
66
|
console = GenerativeBenchmarksConsole(enabled=show_progress)
|
|
55
67
|
console.print_line("Creating backend...")
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from functools import cache
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Any, Literal, Optional, TypeVar, Union
|
|
5
|
+
|
|
6
|
+
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
7
|
+
from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
|
|
8
|
+
from transformers.tokenization_utils_base import ( # type: ignore[import]
|
|
9
|
+
PreTrainedTokenizerBase,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from guidellm.backend.backend import BackendType
|
|
13
|
+
from guidellm.benchmark.profile import ProfileType
|
|
14
|
+
from guidellm.objects.pydantic import StandardBaseModel
|
|
15
|
+
from guidellm.scheduler.strategy import StrategyType
|
|
16
|
+
|
|
17
|
+
__ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
|
|
18
|
+
|
|
19
|
+
SCENARIO_DIR = Path(__file__).parent / "scenarios/"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@cache
|
|
23
|
+
def get_builtin_scenarios() -> list[str]:
|
|
24
|
+
"""Returns list of builtin scenario names."""
|
|
25
|
+
return [p.stem for p in SCENARIO_DIR.glob("*.json")]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
|
|
29
|
+
"""
|
|
30
|
+
Parse a comma separated string to a list of float
|
|
31
|
+
or convert single float list of one or pass float
|
|
32
|
+
list through.
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(value, (int, float)):
|
|
35
|
+
return [value]
|
|
36
|
+
elif isinstance(value, list):
|
|
37
|
+
return value
|
|
38
|
+
|
|
39
|
+
values = value.split(",") if "," in value else [value]
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
return [float(val) for val in values]
|
|
43
|
+
except ValueError as err:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"must be a number or comma-separated list of numbers."
|
|
46
|
+
) from err
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
T = TypeVar("T", bound="Scenario")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Scenario(StandardBaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Parent Scenario class with common options for all benchmarking types.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
target: str
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
|
|
61
|
+
filename = SCENARIO_DIR / f"{name}.json"
|
|
62
|
+
|
|
63
|
+
if not filename.is_file():
|
|
64
|
+
raise ValueError(f"{name} is not a valid builtin scenario")
|
|
65
|
+
|
|
66
|
+
return cls.from_file(filename, overrides)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GenerativeTextScenario(Scenario):
|
|
70
|
+
"""
|
|
71
|
+
Scenario class for generative text benchmarks.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
class Config:
|
|
75
|
+
# NOTE: This prevents errors due to unvalidatable
|
|
76
|
+
# types like PreTrainedTokenizerBase
|
|
77
|
+
arbitrary_types_allowed = True
|
|
78
|
+
|
|
79
|
+
backend_type: BackendType = "openai_http"
|
|
80
|
+
backend_args: Optional[dict[str, Any]] = None
|
|
81
|
+
model: Optional[str] = None
|
|
82
|
+
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
|
|
83
|
+
processor_args: Optional[dict[str, Any]] = None
|
|
84
|
+
data: Union[
|
|
85
|
+
str,
|
|
86
|
+
Path,
|
|
87
|
+
Iterable[Union[str, dict[str, Any]]],
|
|
88
|
+
Dataset,
|
|
89
|
+
DatasetDict,
|
|
90
|
+
IterableDataset,
|
|
91
|
+
IterableDatasetDict,
|
|
92
|
+
]
|
|
93
|
+
data_args: Optional[dict[str, Any]] = None
|
|
94
|
+
data_sampler: Optional[Literal["random"]] = None
|
|
95
|
+
rate_type: Union[StrategyType, ProfileType]
|
|
96
|
+
rate: Annotated[
|
|
97
|
+
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
|
|
98
|
+
] = None
|
|
99
|
+
max_seconds: Optional[PositiveFloat] = None
|
|
100
|
+
max_requests: Optional[PositiveInt] = None
|
|
101
|
+
warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
102
|
+
cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
103
|
+
output_sampling: Optional[NonNegativeInt] = None
|
|
104
|
+
random_seed: int = 42
|
|
File without changes
|
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Generic, Optional, TypeVar
|
|
2
4
|
|
|
5
|
+
import yaml
|
|
3
6
|
from loguru import logger
|
|
4
7
|
from pydantic import BaseModel, ConfigDict, Field
|
|
5
8
|
|
|
6
9
|
__all__ = ["StandardBaseModel", "StatusBreakdown"]
|
|
7
10
|
|
|
11
|
+
T = TypeVar("T", bound="StandardBaseModel")
|
|
12
|
+
|
|
8
13
|
|
|
9
14
|
class StandardBaseModel(BaseModel):
|
|
10
15
|
"""
|
|
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
|
|
|
27
32
|
data,
|
|
28
33
|
)
|
|
29
34
|
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_default(cls: type[T], field: str) -> Any:
|
|
37
|
+
"""Get default values for model fields"""
|
|
38
|
+
return cls.model_fields[field].default
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
|
|
42
|
+
"""
|
|
43
|
+
Attempt to create a new instance of the model using
|
|
44
|
+
data loaded from json or yaml file.
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
with filename.open() as f:
|
|
48
|
+
if str(filename).endswith(".json"):
|
|
49
|
+
data = json.load(f)
|
|
50
|
+
else: # Assume everything else is yaml
|
|
51
|
+
data = yaml.safe_load(f)
|
|
52
|
+
except (json.JSONDecodeError, yaml.YAMLError) as e:
|
|
53
|
+
logger.error(f"Failed to parse {filename} as type {cls.__name__}")
|
|
54
|
+
raise ValueError(f"Error when parsing file: {filename}") from e
|
|
55
|
+
|
|
56
|
+
data.update(overrides)
|
|
57
|
+
return cls.model_validate(data)
|
|
58
|
+
|
|
30
59
|
|
|
31
60
|
SuccessfulT = TypeVar("SuccessfulT")
|
|
32
61
|
ErroredT = TypeVar("ErroredT")
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def parse_json(ctx, param, value): # noqa: ARG001
|
|
8
|
+
if value is None:
|
|
9
|
+
return None
|
|
10
|
+
try:
|
|
11
|
+
return json.loads(value)
|
|
12
|
+
except json.JSONDecodeError as err:
|
|
13
|
+
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def set_if_not_default(ctx: click.Context, **kwargs) -> dict[str, Any]:
|
|
17
|
+
"""
|
|
18
|
+
Set the value of a click option if it is not the default value.
|
|
19
|
+
This is useful for setting options that are not None by default.
|
|
20
|
+
"""
|
|
21
|
+
values = {}
|
|
22
|
+
for k, v in kwargs.items():
|
|
23
|
+
if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT: # type: ignore[attr-defined]
|
|
24
|
+
values[k] = v
|
|
25
|
+
|
|
26
|
+
return values
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Union(click.ParamType):
|
|
30
|
+
"""
|
|
31
|
+
A custom click parameter type that allows for multiple types to be accepted.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, *types: click.ParamType):
|
|
35
|
+
self.types = types
|
|
36
|
+
self.name = "".join(t.name for t in types)
|
|
37
|
+
|
|
38
|
+
def convert(self, value, param, ctx):
|
|
39
|
+
fails = []
|
|
40
|
+
for t in self.types:
|
|
41
|
+
try:
|
|
42
|
+
return t.convert(value, param, ctx)
|
|
43
|
+
except click.BadParameter as e:
|
|
44
|
+
fails.append(str(e))
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
self.fail("; ".join(fails) or f"Invalid value: {value}") # noqa: RET503
|
|
48
|
+
|
|
49
|
+
def get_metavar(self, param: click.Parameter) -> str:
|
|
50
|
+
def get_choices(t: click.ParamType) -> str:
|
|
51
|
+
meta = t.get_metavar(param)
|
|
52
|
+
return meta if meta is not None else t.name
|
|
53
|
+
|
|
54
|
+
# Get the choices for each type in the union.
|
|
55
|
+
choices_str = "|".join(map(get_choices, self.types))
|
|
56
|
+
|
|
57
|
+
# Use curly braces to indicate a required argument.
|
|
58
|
+
if param.required and param.param_type_name == "argument":
|
|
59
|
+
return f"{{{choices_str}}}"
|
|
60
|
+
|
|
61
|
+
# Use square braces to indicate an option or optional argument.
|
|
62
|
+
return f"[{choices_str}]"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: guidellm
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.0a26
|
|
4
4
|
Summary: Guidance platform for deploying and managing large language models.
|
|
5
5
|
Author: Red Hat
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
|
|
|
208
208
|
|
|
209
209
|
- `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
|
|
210
210
|
|
|
211
|
+
### GuideLLM UI
|
|
212
|
+
|
|
213
|
+
GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
|
|
214
|
+
|
|
215
|
+
### 🛠 Running the UI
|
|
216
|
+
|
|
217
|
+
1. Use the Hosted Build (Recommended for Most Users)
|
|
218
|
+
|
|
219
|
+
After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
|
|
220
|
+
|
|
221
|
+
```
|
|
222
|
+
https://neuralmagic.github.io/guidellm/ui/dev/
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Open the file in your browser and you're done—no setup required.
|
|
226
|
+
|
|
227
|
+
2. Build and Serve the UI Locally (For Development) This option is useful if:
|
|
228
|
+
|
|
229
|
+
- You are actively developing the UI
|
|
230
|
+
|
|
231
|
+
- You want to test changes to the UI before publishing
|
|
232
|
+
|
|
233
|
+
- You want full control over how the report is displayed
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
npm install
|
|
237
|
+
npm run build
|
|
238
|
+
npx serve out
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
|
|
242
|
+
|
|
243
|
+
### 🧪 Development Notes
|
|
244
|
+
|
|
245
|
+
During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
In the future this will be replaced by a configurable untracked file for dev use.
|
|
252
|
+
|
|
211
253
|
## Resources
|
|
212
254
|
|
|
213
255
|
### Documentation
|
|
@@ -26,6 +26,8 @@ src/guidellm/benchmark/entrypoints.py
|
|
|
26
26
|
src/guidellm/benchmark/output.py
|
|
27
27
|
src/guidellm/benchmark/profile.py
|
|
28
28
|
src/guidellm/benchmark/progress.py
|
|
29
|
+
src/guidellm/benchmark/scenario.py
|
|
30
|
+
src/guidellm/benchmark/scenarios/__init__.py
|
|
29
31
|
src/guidellm/data/__init__.py
|
|
30
32
|
src/guidellm/data/prideandprejudice.txt.gz
|
|
31
33
|
src/guidellm/dataset/__init__.py
|
|
@@ -50,6 +52,7 @@ src/guidellm/scheduler/strategy.py
|
|
|
50
52
|
src/guidellm/scheduler/types.py
|
|
51
53
|
src/guidellm/scheduler/worker.py
|
|
52
54
|
src/guidellm/utils/__init__.py
|
|
55
|
+
src/guidellm/utils/cli.py
|
|
53
56
|
src/guidellm/utils/colors.py
|
|
54
57
|
src/guidellm/utils/hf_datasets.py
|
|
55
58
|
src/guidellm/utils/hf_transformers.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|