guidellm 0.3.0rc20250507__py3-none-any.whl → 0.4.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +8 -13
- guidellm/__main__.py +290 -69
- guidellm/backend/__init__.py +6 -6
- guidellm/backend/backend.py +25 -4
- guidellm/backend/openai.py +147 -27
- guidellm/backend/response.py +6 -2
- guidellm/benchmark/__init__.py +16 -22
- guidellm/benchmark/aggregator.py +3 -3
- guidellm/benchmark/benchmark.py +11 -12
- guidellm/benchmark/benchmarker.py +2 -2
- guidellm/benchmark/entrypoints.py +34 -10
- guidellm/benchmark/output.py +57 -5
- guidellm/benchmark/profile.py +4 -4
- guidellm/benchmark/progress.py +2 -2
- guidellm/benchmark/scenario.py +104 -0
- guidellm/benchmark/scenarios/__init__.py +0 -0
- guidellm/config.py +28 -7
- guidellm/dataset/__init__.py +4 -4
- guidellm/dataset/creator.py +1 -1
- guidellm/dataset/synthetic.py +36 -11
- guidellm/logger.py +8 -4
- guidellm/objects/__init__.py +2 -2
- guidellm/objects/pydantic.py +30 -1
- guidellm/objects/statistics.py +20 -14
- guidellm/preprocess/__init__.py +3 -0
- guidellm/preprocess/dataset.py +374 -0
- guidellm/presentation/__init__.py +28 -0
- guidellm/presentation/builder.py +27 -0
- guidellm/presentation/data_models.py +232 -0
- guidellm/presentation/injector.py +66 -0
- guidellm/request/__init__.py +6 -3
- guidellm/request/loader.py +5 -5
- guidellm/{scheduler → request}/types.py +4 -1
- guidellm/scheduler/__init__.py +10 -15
- guidellm/scheduler/queues.py +25 -0
- guidellm/scheduler/result.py +21 -3
- guidellm/scheduler/scheduler.py +68 -60
- guidellm/scheduler/strategy.py +26 -24
- guidellm/scheduler/worker.py +64 -103
- guidellm/utils/__init__.py +17 -5
- guidellm/utils/cli.py +62 -0
- guidellm/utils/default_group.py +105 -0
- guidellm/utils/dict.py +23 -0
- guidellm/utils/hf_datasets.py +36 -0
- guidellm/utils/random.py +1 -1
- guidellm/utils/text.py +12 -5
- guidellm/version.py +6 -0
- guidellm-0.4.0a0.dist-info/METADATA +317 -0
- guidellm-0.4.0a0.dist-info/RECORD +62 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a0.dist-info}/WHEEL +1 -1
- guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
- guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a0.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a0.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a0.dist-info}/top_level.txt +0 -0
guidellm/__init__.py
CHANGED
|
@@ -3,15 +3,12 @@ Guidellm is a package that provides an easy and intuitive interface for
|
|
|
3
3
|
evaluating and benchmarking large language models (LLMs).
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
# flake8: noqa
|
|
7
|
-
|
|
8
|
-
import os
|
|
9
|
-
import logging
|
|
10
6
|
import contextlib
|
|
11
|
-
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
12
9
|
|
|
13
10
|
with (
|
|
14
|
-
open(os.devnull, "w") as devnull,
|
|
11
|
+
open(os.devnull, "w") as devnull, # noqa: PTH123
|
|
15
12
|
contextlib.redirect_stderr(devnull),
|
|
16
13
|
contextlib.redirect_stdout(devnull),
|
|
17
14
|
):
|
|
@@ -24,28 +21,26 @@ with (
|
|
|
24
21
|
logging.getLogger("transformers").setLevel(logging.ERROR)
|
|
25
22
|
|
|
26
23
|
from .config import (
|
|
27
|
-
settings,
|
|
28
24
|
DatasetSettings,
|
|
29
25
|
Environment,
|
|
30
26
|
LoggingSettings,
|
|
31
27
|
OpenAISettings,
|
|
32
|
-
print_config,
|
|
33
28
|
Settings,
|
|
29
|
+
print_config,
|
|
34
30
|
reload_settings,
|
|
31
|
+
settings,
|
|
35
32
|
)
|
|
36
33
|
from .logger import configure_logger, logger
|
|
37
34
|
|
|
38
35
|
__all__ = [
|
|
39
|
-
# Config
|
|
40
36
|
"DatasetSettings",
|
|
41
37
|
"Environment",
|
|
42
38
|
"LoggingSettings",
|
|
43
39
|
"OpenAISettings",
|
|
44
|
-
"print_config",
|
|
45
40
|
"Settings",
|
|
41
|
+
"configure_logger",
|
|
42
|
+
"logger",
|
|
43
|
+
"print_config",
|
|
46
44
|
"reload_settings",
|
|
47
45
|
"settings",
|
|
48
|
-
# Logger
|
|
49
|
-
"logger",
|
|
50
|
-
"configure_logger",
|
|
51
46
|
]
|
guidellm/__main__.py
CHANGED
|
@@ -1,54 +1,70 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import
|
|
2
|
+
import codecs
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import get_args
|
|
5
5
|
|
|
6
6
|
import click
|
|
7
|
+
from pydantic import ValidationError
|
|
7
8
|
|
|
8
9
|
from guidellm.backend import BackendType
|
|
9
|
-
from guidellm.benchmark import
|
|
10
|
+
from guidellm.benchmark import (
|
|
11
|
+
ProfileType,
|
|
12
|
+
reimport_benchmarks_report,
|
|
13
|
+
)
|
|
14
|
+
from guidellm.benchmark.entrypoints import benchmark_with_scenario
|
|
15
|
+
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
|
|
10
16
|
from guidellm.config import print_config
|
|
17
|
+
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
|
|
11
18
|
from guidellm.scheduler import StrategyType
|
|
19
|
+
from guidellm.utils import DefaultGroupHandler
|
|
20
|
+
from guidellm.utils import cli as cli_tools
|
|
12
21
|
|
|
13
|
-
STRATEGY_PROFILE_CHOICES =
|
|
14
|
-
list(get_args(ProfileType)) + list(get_args(StrategyType))
|
|
22
|
+
STRATEGY_PROFILE_CHOICES = list(
|
|
23
|
+
set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
|
|
15
24
|
)
|
|
16
25
|
|
|
17
26
|
|
|
18
|
-
def parse_json(ctx, param, value): # noqa: ARG001
|
|
19
|
-
if value is None:
|
|
20
|
-
return None
|
|
21
|
-
try:
|
|
22
|
-
return json.loads(value)
|
|
23
|
-
except json.JSONDecodeError as err:
|
|
24
|
-
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def parse_number_str(ctx, param, value): # noqa: ARG001
|
|
28
|
-
if value is None:
|
|
29
|
-
return None
|
|
30
|
-
|
|
31
|
-
values = value.split(",") if "," in value else [value]
|
|
32
|
-
|
|
33
|
-
try:
|
|
34
|
-
return [int(val) if val.isdigit() else float(val) for val in values]
|
|
35
|
-
except ValueError as err:
|
|
36
|
-
raise click.BadParameter(
|
|
37
|
-
f"{param.name} must be a number or comma-separated list of numbers."
|
|
38
|
-
) from err
|
|
39
|
-
|
|
40
|
-
|
|
41
27
|
@click.group()
|
|
28
|
+
@click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
|
|
42
29
|
def cli():
|
|
43
30
|
pass
|
|
44
31
|
|
|
45
32
|
|
|
46
|
-
@cli.
|
|
47
|
-
help="
|
|
33
|
+
@cli.group(
|
|
34
|
+
help="Commands to run a new benchmark or load a prior one.",
|
|
35
|
+
cls=DefaultGroupHandler,
|
|
36
|
+
default="run",
|
|
37
|
+
)
|
|
38
|
+
def benchmark():
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@benchmark.command(
|
|
43
|
+
"run",
|
|
44
|
+
help="Run a benchmark against a generative model using the specified arguments.",
|
|
45
|
+
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
46
|
+
)
|
|
47
|
+
@click.option(
|
|
48
|
+
"--scenario",
|
|
49
|
+
type=cli_tools.Union(
|
|
50
|
+
click.Path(
|
|
51
|
+
exists=True,
|
|
52
|
+
readable=True,
|
|
53
|
+
file_okay=True,
|
|
54
|
+
dir_okay=False,
|
|
55
|
+
path_type=Path,
|
|
56
|
+
),
|
|
57
|
+
click.Choice(get_builtin_scenarios()),
|
|
58
|
+
),
|
|
59
|
+
default=None,
|
|
60
|
+
help=(
|
|
61
|
+
"The name of a builtin scenario or path to a config file. "
|
|
62
|
+
"Missing values from the config will use defaults. "
|
|
63
|
+
"Options specified on the commandline will override the scenario."
|
|
64
|
+
),
|
|
48
65
|
)
|
|
49
66
|
@click.option(
|
|
50
67
|
"--target",
|
|
51
|
-
required=True,
|
|
52
68
|
type=str,
|
|
53
69
|
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
|
|
54
70
|
)
|
|
@@ -59,20 +75,22 @@ def cli():
|
|
|
59
75
|
"The type of backend to use to run requests against. Defaults to 'openai_http'."
|
|
60
76
|
f" Supported types: {', '.join(get_args(BackendType))}"
|
|
61
77
|
),
|
|
62
|
-
default="
|
|
78
|
+
default=GenerativeTextScenario.get_default("backend_type"),
|
|
63
79
|
)
|
|
64
80
|
@click.option(
|
|
65
81
|
"--backend-args",
|
|
66
|
-
callback=parse_json,
|
|
67
|
-
default=
|
|
82
|
+
callback=cli_tools.parse_json,
|
|
83
|
+
default=GenerativeTextScenario.get_default("backend_args"),
|
|
68
84
|
help=(
|
|
69
85
|
"A JSON string containing any arguments to pass to the backend as a "
|
|
70
|
-
"dict with **kwargs."
|
|
86
|
+
"dict with **kwargs. Headers can be removed by setting their value to "
|
|
87
|
+
"null. For example: "
|
|
88
|
+
"""'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
|
|
71
89
|
),
|
|
72
90
|
)
|
|
73
91
|
@click.option(
|
|
74
92
|
"--model",
|
|
75
|
-
default=
|
|
93
|
+
default=GenerativeTextScenario.get_default("model"),
|
|
76
94
|
type=str,
|
|
77
95
|
help=(
|
|
78
96
|
"The ID of the model to benchmark within the backend. "
|
|
@@ -81,7 +99,7 @@ def cli():
|
|
|
81
99
|
)
|
|
82
100
|
@click.option(
|
|
83
101
|
"--processor",
|
|
84
|
-
default=
|
|
102
|
+
default=GenerativeTextScenario.get_default("processor"),
|
|
85
103
|
type=str,
|
|
86
104
|
help=(
|
|
87
105
|
"The processor or tokenizer to use to calculate token counts for statistics "
|
|
@@ -91,8 +109,8 @@ def cli():
|
|
|
91
109
|
)
|
|
92
110
|
@click.option(
|
|
93
111
|
"--processor-args",
|
|
94
|
-
default=
|
|
95
|
-
callback=parse_json,
|
|
112
|
+
default=GenerativeTextScenario.get_default("processor_args"),
|
|
113
|
+
callback=cli_tools.parse_json,
|
|
96
114
|
help=(
|
|
97
115
|
"A JSON string containing any arguments to pass to the processor constructor "
|
|
98
116
|
"as a dict with **kwargs."
|
|
@@ -100,7 +118,6 @@ def cli():
|
|
|
100
118
|
)
|
|
101
119
|
@click.option(
|
|
102
120
|
"--data",
|
|
103
|
-
required=True,
|
|
104
121
|
type=str,
|
|
105
122
|
help=(
|
|
106
123
|
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
|
|
@@ -110,7 +127,8 @@ def cli():
|
|
|
110
127
|
)
|
|
111
128
|
@click.option(
|
|
112
129
|
"--data-args",
|
|
113
|
-
|
|
130
|
+
default=GenerativeTextScenario.get_default("data_args"),
|
|
131
|
+
callback=cli_tools.parse_json,
|
|
114
132
|
help=(
|
|
115
133
|
"A JSON string containing any arguments to pass to the dataset creation "
|
|
116
134
|
"as a dict with **kwargs."
|
|
@@ -118,7 +136,7 @@ def cli():
|
|
|
118
136
|
)
|
|
119
137
|
@click.option(
|
|
120
138
|
"--data-sampler",
|
|
121
|
-
default=
|
|
139
|
+
default=GenerativeTextScenario.get_default("data_sampler"),
|
|
122
140
|
type=click.Choice(["random"]),
|
|
123
141
|
help=(
|
|
124
142
|
"The data sampler type to use. 'random' will add a random shuffle on the data. "
|
|
@@ -127,7 +145,6 @@ def cli():
|
|
|
127
145
|
)
|
|
128
146
|
@click.option(
|
|
129
147
|
"--rate-type",
|
|
130
|
-
required=True,
|
|
131
148
|
type=click.Choice(STRATEGY_PROFILE_CHOICES),
|
|
132
149
|
help=(
|
|
133
150
|
"The type of benchmark to run. "
|
|
@@ -136,8 +153,7 @@ def cli():
|
|
|
136
153
|
)
|
|
137
154
|
@click.option(
|
|
138
155
|
"--rate",
|
|
139
|
-
default=
|
|
140
|
-
callback=parse_number_str,
|
|
156
|
+
default=GenerativeTextScenario.get_default("rate"),
|
|
141
157
|
help=(
|
|
142
158
|
"The rates to run the benchmark at. "
|
|
143
159
|
"Can be a single number or a comma-separated list of numbers. "
|
|
@@ -150,6 +166,7 @@ def cli():
|
|
|
150
166
|
@click.option(
|
|
151
167
|
"--max-seconds",
|
|
152
168
|
type=float,
|
|
169
|
+
default=GenerativeTextScenario.get_default("max_seconds"),
|
|
153
170
|
help=(
|
|
154
171
|
"The maximum number of seconds each benchmark can run for. "
|
|
155
172
|
"If None, will run until max_requests or the data is exhausted."
|
|
@@ -158,6 +175,7 @@ def cli():
|
|
|
158
175
|
@click.option(
|
|
159
176
|
"--max-requests",
|
|
160
177
|
type=int,
|
|
178
|
+
default=GenerativeTextScenario.get_default("max_requests"),
|
|
161
179
|
help=(
|
|
162
180
|
"The maximum number of requests each benchmark can run for. "
|
|
163
181
|
"If None, will run until max_seconds or the data is exhausted."
|
|
@@ -166,7 +184,7 @@ def cli():
|
|
|
166
184
|
@click.option(
|
|
167
185
|
"--warmup-percent",
|
|
168
186
|
type=float,
|
|
169
|
-
default=
|
|
187
|
+
default=GenerativeTextScenario.get_default("warmup_percent"),
|
|
170
188
|
help=(
|
|
171
189
|
"The percent of the benchmark (based on max-seconds, max-requets, "
|
|
172
190
|
"or lenth of dataset) to run as a warmup and not include in the final results. "
|
|
@@ -176,6 +194,7 @@ def cli():
|
|
|
176
194
|
@click.option(
|
|
177
195
|
"--cooldown-percent",
|
|
178
196
|
type=float,
|
|
197
|
+
default=GenerativeTextScenario.get_default("cooldown_percent"),
|
|
179
198
|
help=(
|
|
180
199
|
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
|
|
181
200
|
"of dataset) to run as a cooldown and not include in the final results. "
|
|
@@ -204,13 +223,13 @@ def cli():
|
|
|
204
223
|
help=(
|
|
205
224
|
"The path to save the output to. If it is a directory, "
|
|
206
225
|
"it will save benchmarks.json under it. "
|
|
207
|
-
"Otherwise, json, yaml, or
|
|
226
|
+
"Otherwise, json, yaml, csv, or html files are supported for output types "
|
|
208
227
|
"which will be read from the extension for the file path."
|
|
209
228
|
),
|
|
210
229
|
)
|
|
211
230
|
@click.option(
|
|
212
231
|
"--output-extras",
|
|
213
|
-
callback=parse_json,
|
|
232
|
+
callback=cli_tools.parse_json,
|
|
214
233
|
help="A JSON string of extra data to save with the output benchmarks",
|
|
215
234
|
)
|
|
216
235
|
@click.option(
|
|
@@ -220,15 +239,16 @@ def cli():
|
|
|
220
239
|
"The number of samples to save in the output file. "
|
|
221
240
|
"If None (default), will save all samples."
|
|
222
241
|
),
|
|
223
|
-
default=
|
|
242
|
+
default=GenerativeTextScenario.get_default("output_sampling"),
|
|
224
243
|
)
|
|
225
244
|
@click.option(
|
|
226
245
|
"--random-seed",
|
|
227
|
-
default=
|
|
246
|
+
default=GenerativeTextScenario.get_default("random_seed"),
|
|
228
247
|
type=int,
|
|
229
248
|
help="The random seed to use for benchmarking to ensure reproducibility.",
|
|
230
249
|
)
|
|
231
|
-
def
|
|
250
|
+
def run(
|
|
251
|
+
scenario,
|
|
232
252
|
target,
|
|
233
253
|
backend_type,
|
|
234
254
|
backend_args,
|
|
@@ -252,43 +272,244 @@ def benchmark(
|
|
|
252
272
|
output_sampling,
|
|
253
273
|
random_seed,
|
|
254
274
|
):
|
|
275
|
+
click_ctx = click.get_current_context()
|
|
276
|
+
|
|
277
|
+
overrides = cli_tools.set_if_not_default(
|
|
278
|
+
click_ctx,
|
|
279
|
+
target=target,
|
|
280
|
+
backend_type=backend_type,
|
|
281
|
+
backend_args=backend_args,
|
|
282
|
+
model=model,
|
|
283
|
+
processor=processor,
|
|
284
|
+
processor_args=processor_args,
|
|
285
|
+
data=data,
|
|
286
|
+
data_args=data_args,
|
|
287
|
+
data_sampler=data_sampler,
|
|
288
|
+
rate_type=rate_type,
|
|
289
|
+
rate=rate,
|
|
290
|
+
max_seconds=max_seconds,
|
|
291
|
+
max_requests=max_requests,
|
|
292
|
+
warmup_percent=warmup_percent,
|
|
293
|
+
cooldown_percent=cooldown_percent,
|
|
294
|
+
output_sampling=output_sampling,
|
|
295
|
+
random_seed=random_seed,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
# If a scenario file was specified read from it
|
|
300
|
+
if scenario is None:
|
|
301
|
+
_scenario = GenerativeTextScenario.model_validate(overrides)
|
|
302
|
+
elif isinstance(scenario, Path):
|
|
303
|
+
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
|
|
304
|
+
else: # Only builtins can make it here; click will catch anything else
|
|
305
|
+
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
|
|
306
|
+
except ValidationError as e:
|
|
307
|
+
# Translate pydantic valdation error to click argument error
|
|
308
|
+
errs = e.errors(include_url=False, include_context=True, include_input=True)
|
|
309
|
+
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
|
|
310
|
+
raise click.BadParameter(
|
|
311
|
+
errs[0]["msg"], ctx=click_ctx, param_hint=param_name
|
|
312
|
+
) from e
|
|
313
|
+
|
|
255
314
|
asyncio.run(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
backend_type=backend_type,
|
|
259
|
-
backend_args=backend_args,
|
|
260
|
-
model=model,
|
|
261
|
-
processor=processor,
|
|
262
|
-
processor_args=processor_args,
|
|
263
|
-
data=data,
|
|
264
|
-
data_args=data_args,
|
|
265
|
-
data_sampler=data_sampler,
|
|
266
|
-
rate_type=rate_type,
|
|
267
|
-
rate=rate,
|
|
268
|
-
max_seconds=max_seconds,
|
|
269
|
-
max_requests=max_requests,
|
|
270
|
-
warmup_percent=warmup_percent,
|
|
271
|
-
cooldown_percent=cooldown_percent,
|
|
315
|
+
benchmark_with_scenario(
|
|
316
|
+
scenario=_scenario,
|
|
272
317
|
show_progress=not disable_progress,
|
|
273
318
|
show_progress_scheduler_stats=display_scheduler_stats,
|
|
274
319
|
output_console=not disable_console_outputs,
|
|
275
320
|
output_path=output_path,
|
|
276
321
|
output_extras=output_extras,
|
|
277
|
-
output_sampling=output_sampling,
|
|
278
|
-
random_seed=random_seed,
|
|
279
322
|
)
|
|
280
323
|
)
|
|
281
324
|
|
|
282
325
|
|
|
326
|
+
@benchmark.command("from-file", help="Load a saved benchmark report.")
|
|
327
|
+
@click.argument(
|
|
328
|
+
"path",
|
|
329
|
+
type=click.Path(file_okay=True, dir_okay=False, exists=True),
|
|
330
|
+
default=Path.cwd() / "benchmarks.json",
|
|
331
|
+
)
|
|
332
|
+
@click.option(
|
|
333
|
+
"--output-path",
|
|
334
|
+
type=click.Path(file_okay=True, dir_okay=True, exists=False),
|
|
335
|
+
default=None,
|
|
336
|
+
is_flag=False,
|
|
337
|
+
flag_value=Path.cwd() / "benchmarks_reexported.json",
|
|
338
|
+
help=(
|
|
339
|
+
"Allows re-exporting the benchmarks to another format. "
|
|
340
|
+
"The path to save the output to. If it is a directory, "
|
|
341
|
+
"it will save benchmarks.json under it. "
|
|
342
|
+
"Otherwise, json, yaml, or csv files are supported for output types "
|
|
343
|
+
"which will be read from the extension for the file path. "
|
|
344
|
+
"This input is optional. If the output path flag is not provided, "
|
|
345
|
+
"the benchmarks will not be reexported. If the flag is present but "
|
|
346
|
+
"no value is specified, it will default to the current directory "
|
|
347
|
+
"with the file name `benchmarks_reexported.json`."
|
|
348
|
+
),
|
|
349
|
+
)
|
|
350
|
+
def from_file(path, output_path):
|
|
351
|
+
reimport_benchmarks_report(path, output_path)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def decode_escaped_str(_ctx, _param, value):
|
|
355
|
+
"""
|
|
356
|
+
Click auto adds characters. For example, when using --pad-char "\n",
|
|
357
|
+
it parses it as "\\n". This method decodes the string to handle escape
|
|
358
|
+
sequences correctly.
|
|
359
|
+
"""
|
|
360
|
+
if value is None:
|
|
361
|
+
return None
|
|
362
|
+
try:
|
|
363
|
+
return codecs.decode(value, "unicode_escape")
|
|
364
|
+
except Exception as e:
|
|
365
|
+
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
|
|
366
|
+
|
|
367
|
+
|
|
283
368
|
@cli.command(
|
|
369
|
+
short_help="Prints environment variable settings.",
|
|
284
370
|
help=(
|
|
285
371
|
"Print out the available configuration settings that can be set "
|
|
286
372
|
"through environment variables."
|
|
287
|
-
)
|
|
373
|
+
),
|
|
288
374
|
)
|
|
289
375
|
def config():
|
|
290
376
|
print_config()
|
|
291
377
|
|
|
292
378
|
|
|
379
|
+
@cli.group(help="General preprocessing tools and utilities.")
|
|
380
|
+
def preprocess():
|
|
381
|
+
pass
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@preprocess.command(
|
|
385
|
+
help=(
|
|
386
|
+
"Convert a dataset to have specific prompt and output token sizes.\n"
|
|
387
|
+
"DATA: Path to the input dataset or dataset ID.\n"
|
|
388
|
+
"OUTPUT_PATH: Path to save the converted dataset, including file suffix."
|
|
389
|
+
),
|
|
390
|
+
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
391
|
+
)
|
|
392
|
+
@click.argument(
|
|
393
|
+
"data",
|
|
394
|
+
type=str,
|
|
395
|
+
required=True,
|
|
396
|
+
)
|
|
397
|
+
@click.argument(
|
|
398
|
+
"output_path",
|
|
399
|
+
type=click.Path(file_okay=True, dir_okay=False, writable=True, resolve_path=True),
|
|
400
|
+
required=True,
|
|
401
|
+
)
|
|
402
|
+
@click.option(
|
|
403
|
+
"--processor",
|
|
404
|
+
type=str,
|
|
405
|
+
required=True,
|
|
406
|
+
help=(
|
|
407
|
+
"The processor or tokenizer to use to calculate token counts for statistics "
|
|
408
|
+
"and synthetic data generation."
|
|
409
|
+
),
|
|
410
|
+
)
|
|
411
|
+
@click.option(
|
|
412
|
+
"--processor-args",
|
|
413
|
+
default=None,
|
|
414
|
+
callback=cli_tools.parse_json,
|
|
415
|
+
help=(
|
|
416
|
+
"A JSON string containing any arguments to pass to the processor constructor "
|
|
417
|
+
"as a dict with **kwargs."
|
|
418
|
+
),
|
|
419
|
+
)
|
|
420
|
+
@click.option(
|
|
421
|
+
"--data-args",
|
|
422
|
+
callback=cli_tools.parse_json,
|
|
423
|
+
help=(
|
|
424
|
+
"A JSON string containing any arguments to pass to the dataset creation "
|
|
425
|
+
"as a dict with **kwargs."
|
|
426
|
+
),
|
|
427
|
+
)
|
|
428
|
+
@click.option(
|
|
429
|
+
"--short-prompt-strategy",
|
|
430
|
+
type=click.Choice([s.value for s in ShortPromptStrategy]),
|
|
431
|
+
default=ShortPromptStrategy.IGNORE.value,
|
|
432
|
+
show_default=True,
|
|
433
|
+
help="Strategy to handle prompts shorter than the target length. ",
|
|
434
|
+
)
|
|
435
|
+
@click.option(
|
|
436
|
+
"--pad-char",
|
|
437
|
+
type=str,
|
|
438
|
+
default="",
|
|
439
|
+
callback=decode_escaped_str,
|
|
440
|
+
help="The token to pad short prompts with when using the 'pad' strategy.",
|
|
441
|
+
)
|
|
442
|
+
@click.option(
|
|
443
|
+
"--concat-delimiter",
|
|
444
|
+
type=str,
|
|
445
|
+
default="",
|
|
446
|
+
help=(
|
|
447
|
+
"The delimiter to use when concatenating prompts that are too short."
|
|
448
|
+
" Used when strategy is 'concatenate'."
|
|
449
|
+
),
|
|
450
|
+
)
|
|
451
|
+
@click.option(
|
|
452
|
+
"--prompt-tokens",
|
|
453
|
+
type=str,
|
|
454
|
+
default=None,
|
|
455
|
+
help="Prompt tokens config (JSON, YAML file or key=value string)",
|
|
456
|
+
)
|
|
457
|
+
@click.option(
|
|
458
|
+
"--output-tokens",
|
|
459
|
+
type=str,
|
|
460
|
+
default=None,
|
|
461
|
+
help="Output tokens config (JSON, YAML file or key=value string)",
|
|
462
|
+
)
|
|
463
|
+
@click.option(
|
|
464
|
+
"--push-to-hub",
|
|
465
|
+
is_flag=True,
|
|
466
|
+
help="Set this flag to push the converted dataset to the Hugging Face Hub.",
|
|
467
|
+
)
|
|
468
|
+
@click.option(
|
|
469
|
+
"--hub-dataset-id",
|
|
470
|
+
type=str,
|
|
471
|
+
default=None,
|
|
472
|
+
help="The Hugging Face Hub dataset ID to push to. "
|
|
473
|
+
"Required if --push-to-hub is used.",
|
|
474
|
+
)
|
|
475
|
+
@click.option(
|
|
476
|
+
"--random-seed",
|
|
477
|
+
type=int,
|
|
478
|
+
default=42,
|
|
479
|
+
show_default=True,
|
|
480
|
+
help="Random seed for prompt token sampling and output tokens sampling.",
|
|
481
|
+
)
|
|
482
|
+
def dataset(
|
|
483
|
+
data,
|
|
484
|
+
output_path,
|
|
485
|
+
processor,
|
|
486
|
+
processor_args,
|
|
487
|
+
data_args,
|
|
488
|
+
short_prompt_strategy,
|
|
489
|
+
pad_char,
|
|
490
|
+
concat_delimiter,
|
|
491
|
+
prompt_tokens,
|
|
492
|
+
output_tokens,
|
|
493
|
+
push_to_hub,
|
|
494
|
+
hub_dataset_id,
|
|
495
|
+
random_seed,
|
|
496
|
+
):
|
|
497
|
+
process_dataset(
|
|
498
|
+
data=data,
|
|
499
|
+
output_path=output_path,
|
|
500
|
+
processor=processor,
|
|
501
|
+
prompt_tokens=prompt_tokens,
|
|
502
|
+
output_tokens=output_tokens,
|
|
503
|
+
processor_args=processor_args,
|
|
504
|
+
data_args=data_args,
|
|
505
|
+
short_prompt_strategy=short_prompt_strategy,
|
|
506
|
+
pad_char=pad_char,
|
|
507
|
+
concat_delimiter=concat_delimiter,
|
|
508
|
+
push_to_hub=push_to_hub,
|
|
509
|
+
hub_dataset_id=hub_dataset_id,
|
|
510
|
+
random_seed=random_seed,
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
293
514
|
if __name__ == "__main__":
|
|
294
515
|
cli()
|
guidellm/backend/__init__.py
CHANGED
|
@@ -11,13 +11,13 @@ from .response import (
|
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"RequestArgs",
|
|
17
|
-
"ResponseSummary",
|
|
14
|
+
"CHAT_COMPLETIONS_PATH",
|
|
15
|
+
"TEXT_COMPLETIONS_PATH",
|
|
18
16
|
"Backend",
|
|
19
17
|
"BackendType",
|
|
20
18
|
"OpenAIHTTPBackend",
|
|
21
|
-
"
|
|
22
|
-
"
|
|
19
|
+
"RequestArgs",
|
|
20
|
+
"ResponseSummary",
|
|
21
|
+
"StreamingResponseType",
|
|
22
|
+
"StreamingTextResponse",
|
|
23
23
|
]
|
guidellm/backend/backend.py
CHANGED
|
@@ -7,6 +7,7 @@ from loguru import logger
|
|
|
7
7
|
from PIL import Image
|
|
8
8
|
|
|
9
9
|
from guidellm.backend.response import ResponseSummary, StreamingTextResponse
|
|
10
|
+
from guidellm.config import settings
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"Backend",
|
|
@@ -110,6 +111,14 @@ class Backend(ABC):
|
|
|
110
111
|
"""
|
|
111
112
|
...
|
|
112
113
|
|
|
114
|
+
@abstractmethod
|
|
115
|
+
async def reset(self) -> None:
|
|
116
|
+
"""
|
|
117
|
+
Reset the connection object. This is useful for backends that
|
|
118
|
+
reuse connections or have state that needs to be cleared.
|
|
119
|
+
"""
|
|
120
|
+
...
|
|
121
|
+
|
|
113
122
|
async def validate(self):
|
|
114
123
|
"""
|
|
115
124
|
Handle final setup and validate the backend is ready for use.
|
|
@@ -121,10 +130,22 @@ class Backend(ABC):
|
|
|
121
130
|
if not models:
|
|
122
131
|
raise ValueError("No models available for the backend")
|
|
123
132
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
133
|
+
# Use the preferred route defined in the global settings when performing the
|
|
134
|
+
# validation request. This avoids calling an unavailable endpoint (ie
|
|
135
|
+
# /v1/completions) when the deployment only supports the chat completions
|
|
136
|
+
# endpoint.
|
|
137
|
+
if settings.preferred_route == "chat_completions":
|
|
138
|
+
async for _ in self.chat_completions( # type: ignore[attr-defined]
|
|
139
|
+
content="Test connection", output_token_count=1
|
|
140
|
+
):
|
|
141
|
+
pass
|
|
142
|
+
else:
|
|
143
|
+
async for _ in self.text_completions( # type: ignore[attr-defined]
|
|
144
|
+
prompt="Test connection", output_token_count=1
|
|
145
|
+
):
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
await self.reset()
|
|
128
149
|
|
|
129
150
|
@abstractmethod
|
|
130
151
|
async def check_setup(self):
|