guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
guidellm/__main__.py
CHANGED
|
@@ -1,51 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GuideLLM command-line interface entry point.
|
|
3
|
+
|
|
4
|
+
Primary CLI application providing benchmark execution, dataset preprocessing, and
|
|
5
|
+
mock server functionality for language model evaluation. Organizes commands into
|
|
6
|
+
three main groups: benchmark operations for performance testing, preprocessing
|
|
7
|
+
utilities for data transformation, and mock server capabilities for development
|
|
8
|
+
and testing. Supports multiple backends, output formats, and flexible configuration
|
|
9
|
+
through CLI options and environment variables.
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
::
|
|
13
|
+
# Run a benchmark against a model
|
|
14
|
+
guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
|
|
15
|
+
--profile sweep
|
|
16
|
+
|
|
17
|
+
# Preprocess a dataset
|
|
18
|
+
guidellm preprocess dataset input.json output.json --processor gpt2
|
|
19
|
+
|
|
20
|
+
# Start a mock server for testing
|
|
21
|
+
guidellm mock-server --host 0.0.0.0 --port 8080
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
1
26
|
import asyncio
|
|
2
27
|
import codecs
|
|
3
28
|
from pathlib import Path
|
|
4
|
-
from typing import get_args
|
|
5
29
|
|
|
6
30
|
import click
|
|
7
31
|
from pydantic import ValidationError
|
|
8
32
|
|
|
9
|
-
from guidellm.
|
|
33
|
+
from guidellm.data import ShortPromptStrategy, process_dataset
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
import uvloop
|
|
37
|
+
except ImportError:
|
|
38
|
+
uvloop = None # type: ignore[assignment] # Optional dependency
|
|
39
|
+
|
|
40
|
+
from guidellm.backends import BackendType
|
|
10
41
|
from guidellm.benchmark import (
|
|
42
|
+
BenchmarkGenerativeTextArgs,
|
|
43
|
+
GenerativeConsoleBenchmarkerProgress,
|
|
11
44
|
ProfileType,
|
|
45
|
+
benchmark_generative_text,
|
|
46
|
+
get_builtin_scenarios,
|
|
12
47
|
reimport_benchmarks_report,
|
|
13
48
|
)
|
|
14
|
-
from guidellm.
|
|
15
|
-
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
|
|
16
|
-
from guidellm.config import print_config
|
|
17
|
-
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
|
|
49
|
+
from guidellm.mock_server import MockServer, MockServerConfig
|
|
18
50
|
from guidellm.scheduler import StrategyType
|
|
19
|
-
from guidellm.
|
|
51
|
+
from guidellm.schemas import GenerativeRequestType
|
|
52
|
+
from guidellm.settings import print_config
|
|
53
|
+
from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
|
|
20
54
|
from guidellm.utils import cli as cli_tools
|
|
21
55
|
|
|
22
|
-
STRATEGY_PROFILE_CHOICES = list(
|
|
23
|
-
|
|
24
|
-
|
|
56
|
+
STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
|
|
57
|
+
"""Available strategy and profile type choices for benchmark execution."""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def decode_escaped_str(_ctx, _param, value):
|
|
61
|
+
"""
|
|
62
|
+
Decode escape sequences in Click option values.
|
|
63
|
+
|
|
64
|
+
Click automatically escapes characters converting sequences like "\\n" to
|
|
65
|
+
"\\\\n". This function decodes these sequences to their intended characters.
|
|
66
|
+
|
|
67
|
+
:param _ctx: Click context (unused)
|
|
68
|
+
:param _param: Click parameter (unused)
|
|
69
|
+
:param value: String value to decode
|
|
70
|
+
:return: Decoded string with proper escape sequences, or None if input is None
|
|
71
|
+
:raises click.BadParameter: When escape sequence decoding fails
|
|
72
|
+
"""
|
|
73
|
+
if value is None:
|
|
74
|
+
return None
|
|
75
|
+
try:
|
|
76
|
+
return codecs.decode(value, "unicode_escape")
|
|
77
|
+
except Exception as e:
|
|
78
|
+
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
|
|
25
79
|
|
|
26
80
|
|
|
27
81
|
@click.group()
|
|
28
82
|
@click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
|
|
29
83
|
def cli():
|
|
30
|
-
|
|
84
|
+
"""GuideLLM CLI for benchmarking, preprocessing, and testing language models."""
|
|
31
85
|
|
|
32
86
|
|
|
33
87
|
@cli.group(
|
|
34
|
-
help="
|
|
88
|
+
help="Run a benchmark or load a previously saved benchmark report.",
|
|
35
89
|
cls=DefaultGroupHandler,
|
|
36
90
|
default="run",
|
|
37
91
|
)
|
|
38
92
|
def benchmark():
|
|
39
|
-
|
|
93
|
+
"""Benchmark commands for performance testing generative models."""
|
|
40
94
|
|
|
41
95
|
|
|
42
96
|
@benchmark.command(
|
|
43
97
|
"run",
|
|
44
|
-
help=
|
|
98
|
+
help=(
|
|
99
|
+
"Run a benchmark against a generative model. "
|
|
100
|
+
"Supports multiple backends, data sources, strategies, and output formats. "
|
|
101
|
+
"Configuration can be loaded from a scenario file or specified via options."
|
|
102
|
+
),
|
|
45
103
|
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
46
104
|
)
|
|
47
105
|
@click.option(
|
|
48
106
|
"--scenario",
|
|
107
|
+
"-c",
|
|
49
108
|
type=cli_tools.Union(
|
|
50
109
|
click.Path(
|
|
51
110
|
exists=True,
|
|
@@ -54,276 +113,375 @@ def benchmark():
|
|
|
54
113
|
dir_okay=False,
|
|
55
114
|
path_type=Path,
|
|
56
115
|
),
|
|
57
|
-
click.Choice(get_builtin_scenarios()),
|
|
116
|
+
click.Choice(tuple(get_builtin_scenarios().keys())),
|
|
58
117
|
),
|
|
59
118
|
default=None,
|
|
60
119
|
help=(
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"Options specified on the commandline will override the scenario."
|
|
120
|
+
"Builtin scenario name or path to config file. "
|
|
121
|
+
"CLI options override scenario settings."
|
|
64
122
|
),
|
|
65
123
|
)
|
|
66
124
|
@click.option(
|
|
67
125
|
"--target",
|
|
68
126
|
type=str,
|
|
69
|
-
help="
|
|
127
|
+
help="Target backend URL (e.g., http://localhost:8000).",
|
|
70
128
|
)
|
|
71
129
|
@click.option(
|
|
72
|
-
"--
|
|
73
|
-
type=
|
|
130
|
+
"--data",
|
|
131
|
+
type=str,
|
|
132
|
+
multiple=True,
|
|
74
133
|
help=(
|
|
75
|
-
"
|
|
76
|
-
|
|
134
|
+
"HuggingFace dataset ID, path to dataset, path to data file "
|
|
135
|
+
"(csv/json/jsonl/txt), or synthetic data config (json/key=value)."
|
|
77
136
|
),
|
|
78
|
-
default=GenerativeTextScenario.get_default("backend_type"),
|
|
79
137
|
)
|
|
80
138
|
@click.option(
|
|
81
|
-
"--
|
|
82
|
-
|
|
83
|
-
|
|
139
|
+
"--profile",
|
|
140
|
+
"--rate-type", # legacy alias
|
|
141
|
+
"profile",
|
|
142
|
+
default=BenchmarkGenerativeTextArgs.get_default("profile"),
|
|
143
|
+
type=click.Choice(STRATEGY_PROFILE_CHOICES),
|
|
144
|
+
help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.",
|
|
145
|
+
)
|
|
146
|
+
@click.option(
|
|
147
|
+
"--rate",
|
|
148
|
+
callback=cli_tools.parse_list_floats,
|
|
149
|
+
multiple=True,
|
|
150
|
+
default=BenchmarkGenerativeTextArgs.get_default("rate"),
|
|
84
151
|
help=(
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"""'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
|
|
152
|
+
"Benchmark rate(s) to test. Meaning depends on profile: "
|
|
153
|
+
"sweep=number of benchmarks, concurrent=concurrent requests, "
|
|
154
|
+
"async/constant/poisson=requests per second."
|
|
89
155
|
),
|
|
90
156
|
)
|
|
157
|
+
# Backend configuration
|
|
158
|
+
@click.option(
|
|
159
|
+
"--backend",
|
|
160
|
+
"--backend-type", # legacy alias
|
|
161
|
+
"backend",
|
|
162
|
+
type=click.Choice(list(get_literal_vals(BackendType))),
|
|
163
|
+
default=BenchmarkGenerativeTextArgs.get_default("backend"),
|
|
164
|
+
help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.",
|
|
165
|
+
)
|
|
166
|
+
@click.option(
|
|
167
|
+
"--backend-kwargs",
|
|
168
|
+
"--backend-args", # legacy alias
|
|
169
|
+
"backend_kwargs",
|
|
170
|
+
callback=cli_tools.parse_json,
|
|
171
|
+
default=BenchmarkGenerativeTextArgs.get_default("backend_kwargs"),
|
|
172
|
+
help="JSON string of arguments to pass to the backend.",
|
|
173
|
+
)
|
|
91
174
|
@click.option(
|
|
92
175
|
"--model",
|
|
93
|
-
default=
|
|
176
|
+
default=BenchmarkGenerativeTextArgs.get_default("model"),
|
|
94
177
|
type=str,
|
|
178
|
+
help="Model ID to benchmark. If not provided, uses first available model.",
|
|
179
|
+
)
|
|
180
|
+
# Data configuration
|
|
181
|
+
@click.option(
|
|
182
|
+
"--request-type",
|
|
183
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_request_formatter"),
|
|
184
|
+
type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
|
|
95
185
|
help=(
|
|
96
|
-
"
|
|
97
|
-
"
|
|
186
|
+
f"Request type to create for each data sample. "
|
|
187
|
+
f"Options: {', '.join(get_literal_vals(GenerativeRequestType))}."
|
|
98
188
|
),
|
|
99
189
|
)
|
|
190
|
+
@click.option(
|
|
191
|
+
"--request-formatter-kwargs",
|
|
192
|
+
default=None,
|
|
193
|
+
callback=cli_tools.parse_json,
|
|
194
|
+
help="JSON string of arguments to pass to the request formatter.",
|
|
195
|
+
)
|
|
100
196
|
@click.option(
|
|
101
197
|
"--processor",
|
|
102
|
-
default=
|
|
198
|
+
default=BenchmarkGenerativeTextArgs.get_default("processor"),
|
|
103
199
|
type=str,
|
|
104
200
|
help=(
|
|
105
|
-
"
|
|
106
|
-
"
|
|
107
|
-
"using the model arg, if needed."
|
|
201
|
+
"Processor or tokenizer for token count calculations. "
|
|
202
|
+
"If not provided, loads from model."
|
|
108
203
|
),
|
|
109
204
|
)
|
|
110
205
|
@click.option(
|
|
111
206
|
"--processor-args",
|
|
112
|
-
default=
|
|
207
|
+
default=BenchmarkGenerativeTextArgs.get_default("processor_args"),
|
|
113
208
|
callback=cli_tools.parse_json,
|
|
209
|
+
help="JSON string of arguments to pass to the processor constructor.",
|
|
210
|
+
)
|
|
211
|
+
@click.option(
|
|
212
|
+
"--data-args",
|
|
213
|
+
multiple=True,
|
|
214
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_args"),
|
|
215
|
+
callback=cli_tools.parse_json,
|
|
216
|
+
help="JSON string of arguments to pass to dataset creation.",
|
|
217
|
+
)
|
|
218
|
+
@click.option(
|
|
219
|
+
"--data-samples",
|
|
220
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_samples"),
|
|
221
|
+
type=int,
|
|
114
222
|
help=(
|
|
115
|
-
"
|
|
116
|
-
"
|
|
223
|
+
"Number of samples from dataset. -1 (default) uses all samples "
|
|
224
|
+
"and dynamically generates more."
|
|
117
225
|
),
|
|
118
226
|
)
|
|
119
227
|
@click.option(
|
|
120
|
-
"--data",
|
|
121
|
-
|
|
228
|
+
"--data-column-mapper",
|
|
229
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_column_mapper"),
|
|
230
|
+
callback=cli_tools.parse_json,
|
|
231
|
+
help="JSON string of column mappings to apply to the dataset.",
|
|
232
|
+
)
|
|
233
|
+
@click.option(
|
|
234
|
+
"--data-sampler",
|
|
235
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_sampler"),
|
|
236
|
+
type=click.Choice(["shuffle"]),
|
|
237
|
+
help="Data sampler type.",
|
|
238
|
+
)
|
|
239
|
+
@click.option(
|
|
240
|
+
"--data-num-workers",
|
|
241
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_num_workers"),
|
|
242
|
+
type=int,
|
|
243
|
+
help="Number of worker processes for data loading.",
|
|
244
|
+
)
|
|
245
|
+
@click.option(
|
|
246
|
+
"--dataloader-kwargs",
|
|
247
|
+
default=BenchmarkGenerativeTextArgs.get_default("dataloader_kwargs"),
|
|
248
|
+
callback=cli_tools.parse_json,
|
|
249
|
+
help="JSON string of arguments to pass to the dataloader constructor.",
|
|
250
|
+
)
|
|
251
|
+
@click.option(
|
|
252
|
+
"--random-seed",
|
|
253
|
+
default=BenchmarkGenerativeTextArgs.get_default("random_seed"),
|
|
254
|
+
type=int,
|
|
255
|
+
help="Random seed for reproducibility.",
|
|
256
|
+
)
|
|
257
|
+
# Output configuration
|
|
258
|
+
@click.option(
|
|
259
|
+
"--output-dir",
|
|
260
|
+
type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
|
|
261
|
+
default=BenchmarkGenerativeTextArgs.get_default("output_dir"),
|
|
262
|
+
help="The directory path to save file output types in",
|
|
263
|
+
)
|
|
264
|
+
@click.option(
|
|
265
|
+
"--outputs",
|
|
266
|
+
callback=cli_tools.parse_list,
|
|
267
|
+
multiple=True,
|
|
268
|
+
default=BenchmarkGenerativeTextArgs.get_default("outputs"),
|
|
122
269
|
help=(
|
|
123
|
-
"The
|
|
124
|
-
"
|
|
125
|
-
"
|
|
270
|
+
"The filename.ext for each of the outputs to create or the "
|
|
271
|
+
"alises (json, csv, html) for the output files to create with "
|
|
272
|
+
"their default file names (benchmark.[EXT])"
|
|
126
273
|
),
|
|
127
274
|
)
|
|
128
275
|
@click.option(
|
|
129
|
-
"--
|
|
130
|
-
|
|
131
|
-
|
|
276
|
+
"--output-path",
|
|
277
|
+
type=click.Path(),
|
|
278
|
+
default=None,
|
|
132
279
|
help=(
|
|
133
|
-
"
|
|
134
|
-
"
|
|
280
|
+
"Legacy parameter for the output path to save the output result to. "
|
|
281
|
+
"Resolves to fill in output-dir and outputs based on input path."
|
|
135
282
|
),
|
|
136
283
|
)
|
|
137
284
|
@click.option(
|
|
138
|
-
"--
|
|
139
|
-
|
|
140
|
-
|
|
285
|
+
"--disable-console",
|
|
286
|
+
"--disable-console-outputs", # legacy alias
|
|
287
|
+
"disable_console",
|
|
288
|
+
is_flag=True,
|
|
141
289
|
help=(
|
|
142
|
-
"
|
|
143
|
-
"Defaults to None"
|
|
290
|
+
"Disable all outputs to the console (updates, interactive progress, results)."
|
|
144
291
|
),
|
|
145
292
|
)
|
|
146
293
|
@click.option(
|
|
147
|
-
"--
|
|
148
|
-
|
|
294
|
+
"--disable-console-interactive",
|
|
295
|
+
"--disable-progress", # legacy alias
|
|
296
|
+
"disable_console_interactive",
|
|
297
|
+
is_flag=True,
|
|
298
|
+
help="Disable interactive console progress updates.",
|
|
299
|
+
)
|
|
300
|
+
# Aggregators configuration
|
|
301
|
+
@click.option(
|
|
302
|
+
"--warmup",
|
|
303
|
+
"--warmup-percent", # legacy alias
|
|
304
|
+
"warmup",
|
|
305
|
+
default=BenchmarkGenerativeTextArgs.get_default("warmup"),
|
|
306
|
+
callback=cli_tools.parse_json,
|
|
149
307
|
help=(
|
|
150
|
-
"
|
|
151
|
-
|
|
308
|
+
"Warmup specification: int, float, or dict as string "
|
|
309
|
+
"(json or key=value). "
|
|
310
|
+
"Controls time or requests before measurement starts. "
|
|
311
|
+
"Numeric in (0, 1): percent of duration or request count. "
|
|
312
|
+
"Numeric >=1: duration in seconds or request count. "
|
|
313
|
+
"Advanced config: see TransientPhaseConfig schema."
|
|
152
314
|
),
|
|
153
315
|
)
|
|
154
316
|
@click.option(
|
|
155
|
-
"--
|
|
156
|
-
|
|
317
|
+
"--cooldown",
|
|
318
|
+
"--cooldown-percent", # legacy alias
|
|
319
|
+
"cooldown",
|
|
320
|
+
default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
|
|
321
|
+
callback=cli_tools.parse_json,
|
|
157
322
|
help=(
|
|
158
|
-
"
|
|
159
|
-
"
|
|
160
|
-
"
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"
|
|
323
|
+
"Cooldown specification: int, float, or dict as string "
|
|
324
|
+
"(json or key=value). "
|
|
325
|
+
"Controls time or requests after measurement ends. "
|
|
326
|
+
"Numeric in (0, 1): percent of duration or request count. "
|
|
327
|
+
"Numeric >=1: duration in seconds or request count. "
|
|
328
|
+
"Advanced config: see TransientPhaseConfig schema."
|
|
164
329
|
),
|
|
165
330
|
)
|
|
166
331
|
@click.option(
|
|
167
|
-
"--
|
|
332
|
+
"--rampup",
|
|
168
333
|
type=float,
|
|
169
|
-
default=
|
|
334
|
+
default=BenchmarkGenerativeTextArgs.get_default("rampup"),
|
|
170
335
|
help=(
|
|
171
|
-
"The
|
|
172
|
-
"
|
|
336
|
+
"The time, in seconds, to ramp up the request rate over. "
|
|
337
|
+
"Only applicable for Throughput/Concurrent strategies"
|
|
173
338
|
),
|
|
174
339
|
)
|
|
175
340
|
@click.option(
|
|
176
|
-
"--
|
|
341
|
+
"--sample-requests",
|
|
342
|
+
"--output-sampling", # legacy alias
|
|
343
|
+
"sample_requests",
|
|
177
344
|
type=int,
|
|
178
|
-
default=GenerativeTextScenario.get_default("max_requests"),
|
|
179
345
|
help=(
|
|
180
|
-
"
|
|
181
|
-
"
|
|
346
|
+
"Number of sample requests per status to save. "
|
|
347
|
+
"None (default) saves all, recommended: 20."
|
|
182
348
|
),
|
|
183
349
|
)
|
|
350
|
+
# Constraints configuration
|
|
184
351
|
@click.option(
|
|
185
|
-
"--
|
|
352
|
+
"--max-seconds",
|
|
186
353
|
type=float,
|
|
187
|
-
default=
|
|
354
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_seconds"),
|
|
188
355
|
help=(
|
|
189
|
-
"
|
|
190
|
-
"
|
|
191
|
-
"Defaults to None."
|
|
356
|
+
"Maximum seconds per benchmark. "
|
|
357
|
+
"If None, runs until max_requests or data exhaustion."
|
|
192
358
|
),
|
|
193
359
|
)
|
|
194
360
|
@click.option(
|
|
195
|
-
"--
|
|
196
|
-
type=
|
|
197
|
-
default=
|
|
361
|
+
"--max-requests",
|
|
362
|
+
type=int,
|
|
363
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_requests"),
|
|
198
364
|
help=(
|
|
199
|
-
"
|
|
200
|
-
"
|
|
201
|
-
"Defaults to None."
|
|
365
|
+
"Maximum requests per benchmark. "
|
|
366
|
+
"If None, runs until max_seconds or data exhaustion."
|
|
202
367
|
),
|
|
203
368
|
)
|
|
204
369
|
@click.option(
|
|
205
|
-
"--
|
|
206
|
-
|
|
207
|
-
|
|
370
|
+
"--max-errors",
|
|
371
|
+
type=int,
|
|
372
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_errors"),
|
|
373
|
+
help="Maximum errors before stopping the benchmark.",
|
|
208
374
|
)
|
|
209
375
|
@click.option(
|
|
210
|
-
"--
|
|
211
|
-
|
|
212
|
-
|
|
376
|
+
"--max-error-rate",
|
|
377
|
+
type=float,
|
|
378
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_error_rate"),
|
|
379
|
+
help="Maximum error rate before stopping the benchmark.",
|
|
213
380
|
)
|
|
214
381
|
@click.option(
|
|
215
|
-
"--
|
|
216
|
-
|
|
217
|
-
|
|
382
|
+
"--max-global-error-rate",
|
|
383
|
+
type=float,
|
|
384
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
|
|
385
|
+
help="Maximum global error rate across all benchmarks.",
|
|
218
386
|
)
|
|
219
387
|
@click.option(
|
|
220
|
-
"--
|
|
221
|
-
|
|
222
|
-
|
|
388
|
+
"--over-saturation",
|
|
389
|
+
"over_saturation",
|
|
390
|
+
callback=cli_tools.parse_json,
|
|
391
|
+
default=None,
|
|
223
392
|
help=(
|
|
224
|
-
"
|
|
225
|
-
"
|
|
226
|
-
"
|
|
227
|
-
"
|
|
393
|
+
"Enable over-saturation detection. "
|
|
394
|
+
"Pass a JSON dict with configuration "
|
|
395
|
+
'(e.g., \'{"enabled": true, "min_seconds": 30}\'). '
|
|
396
|
+
"Defaults to None (disabled)."
|
|
228
397
|
),
|
|
229
398
|
)
|
|
230
399
|
@click.option(
|
|
231
|
-
"--
|
|
400
|
+
"--detect-saturation",
|
|
401
|
+
"--default-over-saturation",
|
|
402
|
+
"over_saturation",
|
|
232
403
|
callback=cli_tools.parse_json,
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
@click.option(
|
|
236
|
-
"--output-sampling",
|
|
237
|
-
type=int,
|
|
238
|
-
help=(
|
|
239
|
-
"The number of samples to save in the output file. "
|
|
240
|
-
"If None (default), will save all samples."
|
|
241
|
-
),
|
|
242
|
-
default=GenerativeTextScenario.get_default("output_sampling"),
|
|
404
|
+
flag_value='{"enabled": true}',
|
|
405
|
+
help="Enable over-saturation detection with default settings.",
|
|
243
406
|
)
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
type=int,
|
|
248
|
-
help="The random seed to use for benchmarking to ensure reproducibility.",
|
|
249
|
-
)
|
|
250
|
-
def run(
|
|
251
|
-
scenario,
|
|
252
|
-
target,
|
|
253
|
-
backend_type,
|
|
254
|
-
backend_args,
|
|
255
|
-
model,
|
|
256
|
-
processor,
|
|
257
|
-
processor_args,
|
|
258
|
-
data,
|
|
259
|
-
data_args,
|
|
260
|
-
data_sampler,
|
|
261
|
-
rate_type,
|
|
262
|
-
rate,
|
|
263
|
-
max_seconds,
|
|
264
|
-
max_requests,
|
|
265
|
-
warmup_percent,
|
|
266
|
-
cooldown_percent,
|
|
267
|
-
disable_progress,
|
|
268
|
-
display_scheduler_stats,
|
|
269
|
-
disable_console_outputs,
|
|
270
|
-
output_path,
|
|
271
|
-
output_extras,
|
|
272
|
-
output_sampling,
|
|
273
|
-
random_seed,
|
|
274
|
-
):
|
|
275
|
-
click_ctx = click.get_current_context()
|
|
407
|
+
def run(**kwargs): # noqa: C901
|
|
408
|
+
# Only set CLI args that differ from click defaults
|
|
409
|
+
kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
|
|
276
410
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
411
|
+
# Handle remapping for request params
|
|
412
|
+
request_type = kwargs.pop("request_type", None)
|
|
413
|
+
request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
|
|
414
|
+
if request_type is not None:
|
|
415
|
+
kwargs["data_request_formatter"] = (
|
|
416
|
+
request_type
|
|
417
|
+
if not request_formatter_kwargs
|
|
418
|
+
else {"request_type": request_type, **request_formatter_kwargs}
|
|
419
|
+
)
|
|
420
|
+
elif request_formatter_kwargs is not None:
|
|
421
|
+
kwargs["data_request_formatter"] = request_formatter_kwargs
|
|
422
|
+
|
|
423
|
+
# Handle output path remapping
|
|
424
|
+
if (output_path := kwargs.pop("output_path", None)) is not None:
|
|
425
|
+
if kwargs.get("outputs_dir", None) is not None:
|
|
426
|
+
raise click.BadParameter("Cannot use --output-path with --output-dir.")
|
|
427
|
+
path = Path(output_path)
|
|
428
|
+
if path.is_dir():
|
|
429
|
+
kwargs["output_dir"] = path
|
|
430
|
+
else:
|
|
431
|
+
kwargs["output_dir"] = path.parent
|
|
432
|
+
kwargs["outputs"] = (path.name,)
|
|
433
|
+
|
|
434
|
+
# Handle console options
|
|
435
|
+
disable_console = kwargs.pop("disable_console", False)
|
|
436
|
+
disable_console_interactive = (
|
|
437
|
+
kwargs.pop("disable_console_interactive", False) or disable_console
|
|
296
438
|
)
|
|
439
|
+
console = Console() if not disable_console else None
|
|
440
|
+
envs = cli_tools.list_set_env()
|
|
441
|
+
if console and envs:
|
|
442
|
+
console.print_update(
|
|
443
|
+
title=(
|
|
444
|
+
"Note: the following environment variables "
|
|
445
|
+
"are set and **may** affect configuration"
|
|
446
|
+
),
|
|
447
|
+
details=", ".join(envs),
|
|
448
|
+
status="warning",
|
|
449
|
+
)
|
|
297
450
|
|
|
298
451
|
try:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
|
|
304
|
-
else: # Only builtins can make it here; click will catch anything else
|
|
305
|
-
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
|
|
306
|
-
except ValidationError as e:
|
|
452
|
+
args = BenchmarkGenerativeTextArgs.create(
|
|
453
|
+
scenario=kwargs.pop("scenario", None), **kwargs
|
|
454
|
+
)
|
|
455
|
+
except ValidationError as err:
|
|
307
456
|
# Translate pydantic valdation error to click argument error
|
|
308
|
-
errs =
|
|
457
|
+
errs = err.errors(include_url=False, include_context=True, include_input=True)
|
|
309
458
|
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
|
|
310
459
|
raise click.BadParameter(
|
|
311
|
-
errs[0]["msg"], ctx=
|
|
312
|
-
) from
|
|
460
|
+
errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name
|
|
461
|
+
) from err
|
|
313
462
|
|
|
463
|
+
if uvloop is not None:
|
|
464
|
+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
|
314
465
|
asyncio.run(
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
466
|
+
benchmark_generative_text(
|
|
467
|
+
args=args,
|
|
468
|
+
progress=(
|
|
469
|
+
GenerativeConsoleBenchmarkerProgress()
|
|
470
|
+
if not disable_console_interactive
|
|
471
|
+
else None
|
|
472
|
+
),
|
|
473
|
+
console=console,
|
|
322
474
|
)
|
|
323
475
|
)
|
|
324
476
|
|
|
325
477
|
|
|
326
|
-
@benchmark.command(
|
|
478
|
+
@benchmark.command(
|
|
479
|
+
"from-file",
|
|
480
|
+
help=(
|
|
481
|
+
"Load a saved benchmark report and optionally re-export to other formats. "
|
|
482
|
+
"PATH: Path to the saved benchmark report file (default: ./benchmarks.json)."
|
|
483
|
+
),
|
|
484
|
+
)
|
|
327
485
|
@click.argument(
|
|
328
486
|
"path",
|
|
329
487
|
type=click.Path(file_okay=True, dir_okay=False, exists=True),
|
|
@@ -331,61 +489,46 @@ def run(
|
|
|
331
489
|
)
|
|
332
490
|
@click.option(
|
|
333
491
|
"--output-path",
|
|
334
|
-
type=click.Path(
|
|
335
|
-
default=
|
|
336
|
-
is_flag=False,
|
|
337
|
-
flag_value=Path.cwd() / "benchmarks_reexported.json",
|
|
492
|
+
type=click.Path(),
|
|
493
|
+
default=Path.cwd(),
|
|
338
494
|
help=(
|
|
339
|
-
"
|
|
340
|
-
"
|
|
341
|
-
"
|
|
342
|
-
"Otherwise, json, yaml, or csv files are supported for output types "
|
|
343
|
-
"which will be read from the extension for the file path. "
|
|
344
|
-
"This input is optional. If the output path flag is not provided, "
|
|
345
|
-
"the benchmarks will not be reexported. If the flag is present but "
|
|
346
|
-
"no value is specified, it will default to the current directory "
|
|
347
|
-
"with the file name `benchmarks_reexported.json`."
|
|
495
|
+
"Directory or file path to save re-exported benchmark results. "
|
|
496
|
+
"If a directory, all output formats will be saved there. "
|
|
497
|
+
"If a file, the matching format will be saved to that file."
|
|
348
498
|
),
|
|
349
499
|
)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
""
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
"""
|
|
360
|
-
if value is None:
|
|
361
|
-
return None
|
|
362
|
-
try:
|
|
363
|
-
return codecs.decode(value, "unicode_escape")
|
|
364
|
-
except Exception as e:
|
|
365
|
-
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
|
|
500
|
+
@click.option(
|
|
501
|
+
"--output-formats",
|
|
502
|
+
multiple=True,
|
|
503
|
+
type=str,
|
|
504
|
+
default=("console", "json"), # ("console", "json", "html", "csv")
|
|
505
|
+
help="Output formats for benchmark results (e.g., console, json, html, csv).",
|
|
506
|
+
)
|
|
507
|
+
def from_file(path, output_path, output_formats):
|
|
508
|
+
asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
|
|
366
509
|
|
|
367
510
|
|
|
368
511
|
@cli.command(
|
|
369
|
-
short_help="
|
|
370
|
-
help=
|
|
371
|
-
"Print out the available configuration settings that can be set "
|
|
372
|
-
"through environment variables."
|
|
373
|
-
),
|
|
512
|
+
short_help="Show configuration settings.",
|
|
513
|
+
help="Display environment variables for configuring GuideLLM behavior.",
|
|
374
514
|
)
|
|
375
515
|
def config():
|
|
376
516
|
print_config()
|
|
377
517
|
|
|
378
518
|
|
|
379
|
-
@cli.group(help="
|
|
519
|
+
@cli.group(help="Tools for preprocessing datasets for use in benchmarks.")
|
|
380
520
|
def preprocess():
|
|
381
|
-
|
|
521
|
+
"""Dataset preprocessing utilities."""
|
|
382
522
|
|
|
383
523
|
|
|
384
524
|
@preprocess.command(
|
|
525
|
+
"dataset",
|
|
385
526
|
help=(
|
|
386
|
-
"
|
|
387
|
-
"
|
|
388
|
-
"
|
|
527
|
+
"Process a dataset to have specific prompt and output token sizes. "
|
|
528
|
+
"Supports multiple strategies for handling prompts and optional "
|
|
529
|
+
"Hugging Face Hub upload.\n\n"
|
|
530
|
+
"DATA: Path to the input dataset or dataset ID.\n\n"
|
|
531
|
+
"OUTPUT_PATH: Path to save the processed dataset, including file suffix."
|
|
389
532
|
),
|
|
390
533
|
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
391
534
|
)
|
|
@@ -403,93 +546,95 @@ def preprocess():
|
|
|
403
546
|
"--processor",
|
|
404
547
|
type=str,
|
|
405
548
|
required=True,
|
|
549
|
+
help="Processor or tokenizer name for calculating token counts.",
|
|
550
|
+
)
|
|
551
|
+
@click.option(
|
|
552
|
+
"--config",
|
|
553
|
+
type=str,
|
|
554
|
+
required=True,
|
|
406
555
|
help=(
|
|
407
|
-
"
|
|
408
|
-
"
|
|
556
|
+
"PreprocessDatasetConfig as JSON string, key=value pairs, "
|
|
557
|
+
"or file path (.json, .yaml, .yml, .config). "
|
|
558
|
+
"Example: 'prompt_tokens=100,output_tokens=50,prefix_tokens_max=10'"
|
|
559
|
+
' or \'{"prompt_tokens": 100, "output_tokens": 50, '
|
|
560
|
+
'"prefix_tokens_max": 10}\''
|
|
409
561
|
),
|
|
410
562
|
)
|
|
411
563
|
@click.option(
|
|
412
564
|
"--processor-args",
|
|
413
565
|
default=None,
|
|
414
566
|
callback=cli_tools.parse_json,
|
|
415
|
-
help=
|
|
416
|
-
"A JSON string containing any arguments to pass to the processor constructor "
|
|
417
|
-
"as a dict with **kwargs."
|
|
418
|
-
),
|
|
567
|
+
help="JSON string of arguments to pass to the processor constructor.",
|
|
419
568
|
)
|
|
420
569
|
@click.option(
|
|
421
570
|
"--data-args",
|
|
422
571
|
callback=cli_tools.parse_json,
|
|
423
|
-
help=
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
572
|
+
help="JSON string of arguments to pass to dataset creation.",
|
|
573
|
+
)
|
|
574
|
+
@click.option(
|
|
575
|
+
"--data-column-mapper",
|
|
576
|
+
default=None,
|
|
577
|
+
callback=cli_tools.parse_json,
|
|
578
|
+
help="JSON string of column mappings to apply to the dataset.",
|
|
427
579
|
)
|
|
428
580
|
@click.option(
|
|
429
581
|
"--short-prompt-strategy",
|
|
430
582
|
type=click.Choice([s.value for s in ShortPromptStrategy]),
|
|
431
583
|
default=ShortPromptStrategy.IGNORE.value,
|
|
432
584
|
show_default=True,
|
|
433
|
-
help="Strategy
|
|
585
|
+
help="Strategy for handling prompts shorter than target length.",
|
|
434
586
|
)
|
|
435
587
|
@click.option(
|
|
436
588
|
"--pad-char",
|
|
437
589
|
type=str,
|
|
438
590
|
default="",
|
|
439
591
|
callback=decode_escaped_str,
|
|
440
|
-
help="
|
|
592
|
+
help="Character to pad short prompts with when using 'pad' strategy.",
|
|
441
593
|
)
|
|
442
594
|
@click.option(
|
|
443
595
|
"--concat-delimiter",
|
|
444
596
|
type=str,
|
|
445
597
|
default="",
|
|
446
598
|
help=(
|
|
447
|
-
"
|
|
448
|
-
" Used when strategy is 'concatenate'."
|
|
599
|
+
"Delimiter for concatenating short prompts (used with 'concatenate' strategy)."
|
|
449
600
|
),
|
|
450
601
|
)
|
|
451
602
|
@click.option(
|
|
452
|
-
"--
|
|
453
|
-
|
|
454
|
-
default=
|
|
455
|
-
help="
|
|
456
|
-
)
|
|
457
|
-
@click.option(
|
|
458
|
-
"--output-tokens",
|
|
459
|
-
type=str,
|
|
460
|
-
default=None,
|
|
461
|
-
help="Output tokens config (JSON, YAML file or key=value string)",
|
|
603
|
+
"--include-prefix-in-token-count",
|
|
604
|
+
is_flag=True,
|
|
605
|
+
default=False,
|
|
606
|
+
help="Include prefix tokens in prompt token count calculation.",
|
|
462
607
|
)
|
|
463
608
|
@click.option(
|
|
464
609
|
"--push-to-hub",
|
|
465
610
|
is_flag=True,
|
|
466
|
-
help="
|
|
611
|
+
help="Push the processed dataset to Hugging Face Hub.",
|
|
467
612
|
)
|
|
468
613
|
@click.option(
|
|
469
614
|
"--hub-dataset-id",
|
|
470
615
|
type=str,
|
|
471
616
|
default=None,
|
|
472
|
-
help="
|
|
473
|
-
"Required if --push-to-hub is used.",
|
|
617
|
+
help=("Hugging Face Hub dataset ID for upload (required if --push-to-hub is set)."),
|
|
474
618
|
)
|
|
475
619
|
@click.option(
|
|
476
620
|
"--random-seed",
|
|
477
621
|
type=int,
|
|
478
622
|
default=42,
|
|
479
623
|
show_default=True,
|
|
480
|
-
help="Random seed for
|
|
624
|
+
help="Random seed for reproducible token sampling.",
|
|
481
625
|
)
|
|
482
626
|
def dataset(
|
|
483
627
|
data,
|
|
484
628
|
output_path,
|
|
485
629
|
processor,
|
|
630
|
+
config,
|
|
486
631
|
processor_args,
|
|
487
632
|
data_args,
|
|
633
|
+
data_column_mapper,
|
|
488
634
|
short_prompt_strategy,
|
|
489
635
|
pad_char,
|
|
490
636
|
concat_delimiter,
|
|
491
|
-
|
|
492
|
-
output_tokens,
|
|
637
|
+
include_prefix_in_token_count,
|
|
493
638
|
push_to_hub,
|
|
494
639
|
hub_dataset_id,
|
|
495
640
|
random_seed,
|
|
@@ -498,18 +643,142 @@ def dataset(
|
|
|
498
643
|
data=data,
|
|
499
644
|
output_path=output_path,
|
|
500
645
|
processor=processor,
|
|
501
|
-
|
|
502
|
-
output_tokens=output_tokens,
|
|
646
|
+
config=config,
|
|
503
647
|
processor_args=processor_args,
|
|
504
648
|
data_args=data_args,
|
|
649
|
+
data_column_mapper=data_column_mapper,
|
|
505
650
|
short_prompt_strategy=short_prompt_strategy,
|
|
506
651
|
pad_char=pad_char,
|
|
507
652
|
concat_delimiter=concat_delimiter,
|
|
653
|
+
include_prefix_in_token_count=include_prefix_in_token_count,
|
|
508
654
|
push_to_hub=push_to_hub,
|
|
509
655
|
hub_dataset_id=hub_dataset_id,
|
|
510
656
|
random_seed=random_seed,
|
|
511
657
|
)
|
|
512
658
|
|
|
513
659
|
|
|
660
|
+
@cli.command(
|
|
661
|
+
"mock-server",
|
|
662
|
+
help=(
|
|
663
|
+
"Start a mock OpenAI/vLLM-compatible server for testing. "
|
|
664
|
+
"Simulates model inference with configurable latency and token generation."
|
|
665
|
+
),
|
|
666
|
+
)
|
|
667
|
+
@click.option(
|
|
668
|
+
"--host",
|
|
669
|
+
default="127.0.0.1",
|
|
670
|
+
help="Host address to bind the server to.",
|
|
671
|
+
)
|
|
672
|
+
@click.option(
|
|
673
|
+
"--port",
|
|
674
|
+
default=8000,
|
|
675
|
+
type=int,
|
|
676
|
+
help="Port number to bind the server to.",
|
|
677
|
+
)
|
|
678
|
+
@click.option(
|
|
679
|
+
"--workers",
|
|
680
|
+
default=1,
|
|
681
|
+
type=int,
|
|
682
|
+
help="Number of worker processes.",
|
|
683
|
+
)
|
|
684
|
+
@click.option(
|
|
685
|
+
"--model",
|
|
686
|
+
default="llama-3.1-8b-instruct",
|
|
687
|
+
help="Name of the model to mock.",
|
|
688
|
+
)
|
|
689
|
+
@click.option(
|
|
690
|
+
"--processor",
|
|
691
|
+
default=None,
|
|
692
|
+
help="Processor or tokenizer to use for requests.",
|
|
693
|
+
)
|
|
694
|
+
@click.option(
|
|
695
|
+
"--request-latency",
|
|
696
|
+
default=3,
|
|
697
|
+
type=float,
|
|
698
|
+
help="Request latency in seconds for non-streaming requests.",
|
|
699
|
+
)
|
|
700
|
+
@click.option(
|
|
701
|
+
"--request-latency-std",
|
|
702
|
+
default=0,
|
|
703
|
+
type=float,
|
|
704
|
+
help="Request latency standard deviation in seconds (normal distribution).",
|
|
705
|
+
)
|
|
706
|
+
@click.option(
|
|
707
|
+
"--ttft-ms",
|
|
708
|
+
default=150,
|
|
709
|
+
type=float,
|
|
710
|
+
help="Time to first token in milliseconds for streaming requests.",
|
|
711
|
+
)
|
|
712
|
+
@click.option(
|
|
713
|
+
"--ttft-ms-std",
|
|
714
|
+
default=0,
|
|
715
|
+
type=float,
|
|
716
|
+
help="Time to first token standard deviation in milliseconds.",
|
|
717
|
+
)
|
|
718
|
+
@click.option(
|
|
719
|
+
"--itl-ms",
|
|
720
|
+
default=10,
|
|
721
|
+
type=float,
|
|
722
|
+
help="Inter-token latency in milliseconds for streaming requests.",
|
|
723
|
+
)
|
|
724
|
+
@click.option(
|
|
725
|
+
"--itl-ms-std",
|
|
726
|
+
default=0,
|
|
727
|
+
type=float,
|
|
728
|
+
help="Inter-token latency standard deviation in milliseconds.",
|
|
729
|
+
)
|
|
730
|
+
@click.option(
|
|
731
|
+
"--output-tokens",
|
|
732
|
+
default=128,
|
|
733
|
+
type=int,
|
|
734
|
+
help="Number of output tokens for streaming requests.",
|
|
735
|
+
)
|
|
736
|
+
@click.option(
|
|
737
|
+
"--output-tokens-std",
|
|
738
|
+
default=0,
|
|
739
|
+
type=float,
|
|
740
|
+
help="Output tokens standard deviation (normal distribution).",
|
|
741
|
+
)
|
|
742
|
+
def mock_server(
|
|
743
|
+
host: str,
|
|
744
|
+
port: int,
|
|
745
|
+
workers: int,
|
|
746
|
+
model: str,
|
|
747
|
+
processor: str | None,
|
|
748
|
+
request_latency: float,
|
|
749
|
+
request_latency_std: float,
|
|
750
|
+
ttft_ms: float,
|
|
751
|
+
ttft_ms_std: float,
|
|
752
|
+
itl_ms: float,
|
|
753
|
+
itl_ms_std: float,
|
|
754
|
+
output_tokens: int,
|
|
755
|
+
output_tokens_std: float,
|
|
756
|
+
):
|
|
757
|
+
config = MockServerConfig(
|
|
758
|
+
host=host,
|
|
759
|
+
port=port,
|
|
760
|
+
workers=workers,
|
|
761
|
+
model=model,
|
|
762
|
+
processor=processor,
|
|
763
|
+
request_latency=request_latency,
|
|
764
|
+
request_latency_std=request_latency_std,
|
|
765
|
+
ttft_ms=ttft_ms,
|
|
766
|
+
ttft_ms_std=ttft_ms_std,
|
|
767
|
+
itl_ms=itl_ms,
|
|
768
|
+
itl_ms_std=itl_ms_std,
|
|
769
|
+
output_tokens=output_tokens,
|
|
770
|
+
output_tokens_std=output_tokens_std,
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
server = MockServer(config)
|
|
774
|
+
console = Console()
|
|
775
|
+
console.print_update(
|
|
776
|
+
title="GuideLLM mock server starting...",
|
|
777
|
+
details=f"Listening on http://{host}:{port} for model {model}",
|
|
778
|
+
status="success",
|
|
779
|
+
)
|
|
780
|
+
server.run()
|
|
781
|
+
|
|
782
|
+
|
|
514
783
|
if __name__ == "__main__":
|
|
515
784
|
cli()
|