guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +452 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +150 -317
- guidellm/benchmark/entrypoints.py +467 -128
- guidellm/benchmark/output.py +519 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2086 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +144 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +348 -0
- guidellm/data/loaders.py +149 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +404 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +31 -0
- guidellm/data/processor.py +31 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +226 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +71 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
- guidellm-0.4.0a169.dist-info/RECORD +95 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
guidellm/__main__.py
CHANGED
|
@@ -1,47 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GuideLLM command-line interface entry point.
|
|
3
|
+
|
|
4
|
+
Primary CLI application providing benchmark execution, dataset preprocessing, and
|
|
5
|
+
mock server functionality for language model evaluation. Organizes commands into
|
|
6
|
+
three main groups: benchmark operations for performance testing, preprocessing
|
|
7
|
+
utilities for data transformation, and mock server capabilities for development
|
|
8
|
+
and testing. Supports multiple backends, output formats, and flexible configuration
|
|
9
|
+
through CLI options and environment variables.
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
::
|
|
13
|
+
# Run a benchmark against a model
|
|
14
|
+
guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
|
|
15
|
+
--profile sweep
|
|
16
|
+
|
|
17
|
+
# Preprocess a dataset
|
|
18
|
+
guidellm preprocess dataset input.json output.json --processor gpt2
|
|
19
|
+
|
|
20
|
+
# Start a mock server for testing
|
|
21
|
+
guidellm mock-server --host 0.0.0.0 --port 8080
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
1
26
|
import asyncio
|
|
2
27
|
import codecs
|
|
3
28
|
from pathlib import Path
|
|
4
|
-
from typing import get_args
|
|
5
29
|
|
|
6
30
|
import click
|
|
7
31
|
from pydantic import ValidationError
|
|
8
32
|
|
|
9
|
-
|
|
33
|
+
try:
|
|
34
|
+
import uvloop
|
|
35
|
+
except ImportError:
|
|
36
|
+
uvloop = None # type: ignore[assignment] # Optional dependency
|
|
37
|
+
|
|
38
|
+
from guidellm.backends import BackendType
|
|
10
39
|
from guidellm.benchmark import (
|
|
40
|
+
BenchmarkGenerativeTextArgs,
|
|
41
|
+
GenerativeConsoleBenchmarkerProgress,
|
|
11
42
|
ProfileType,
|
|
43
|
+
benchmark_generative_text,
|
|
44
|
+
get_builtin_scenarios,
|
|
12
45
|
reimport_benchmarks_report,
|
|
13
46
|
)
|
|
14
|
-
from guidellm.
|
|
15
|
-
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
|
|
16
|
-
from guidellm.config import print_config
|
|
47
|
+
from guidellm.mock_server import MockServer, MockServerConfig
|
|
17
48
|
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
|
|
18
49
|
from guidellm.scheduler import StrategyType
|
|
19
|
-
from guidellm.
|
|
50
|
+
from guidellm.schemas import GenerativeRequestType
|
|
51
|
+
from guidellm.settings import print_config
|
|
52
|
+
from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
|
|
20
53
|
from guidellm.utils import cli as cli_tools
|
|
21
54
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
55
|
+
__all__ = [
|
|
56
|
+
"STRATEGY_PROFILE_CHOICES",
|
|
57
|
+
"benchmark",
|
|
58
|
+
"cli",
|
|
59
|
+
"config",
|
|
60
|
+
"dataset",
|
|
61
|
+
"decode_escaped_str",
|
|
62
|
+
"from_file",
|
|
63
|
+
"mock_server",
|
|
64
|
+
"preprocess",
|
|
65
|
+
"run",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
|
|
69
|
+
"""Available strategy and profile type choices for benchmark execution."""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def decode_escaped_str(_ctx, _param, value):
|
|
73
|
+
"""
|
|
74
|
+
Decode escape sequences in Click option values.
|
|
75
|
+
|
|
76
|
+
Click automatically escapes characters converting sequences like "\\n" to
|
|
77
|
+
"\\\\n". This function decodes these sequences to their intended characters.
|
|
78
|
+
|
|
79
|
+
:param _ctx: Click context (unused)
|
|
80
|
+
:param _param: Click parameter (unused)
|
|
81
|
+
:param value: String value to decode
|
|
82
|
+
:return: Decoded string with proper escape sequences, or None if input is None
|
|
83
|
+
:raises click.BadParameter: When escape sequence decoding fails
|
|
84
|
+
"""
|
|
85
|
+
if value is None:
|
|
86
|
+
return None
|
|
87
|
+
try:
|
|
88
|
+
return codecs.decode(value, "unicode_escape")
|
|
89
|
+
except Exception as e:
|
|
90
|
+
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
|
|
25
91
|
|
|
26
92
|
|
|
27
93
|
@click.group()
|
|
28
94
|
@click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
|
|
29
95
|
def cli():
|
|
30
|
-
|
|
96
|
+
"""GuideLLM CLI for benchmarking, preprocessing, and testing language models."""
|
|
31
97
|
|
|
32
98
|
|
|
33
99
|
@cli.group(
|
|
34
|
-
help="
|
|
100
|
+
help="Run a benchmark or load a previously saved benchmark report.",
|
|
35
101
|
cls=DefaultGroupHandler,
|
|
36
102
|
default="run",
|
|
37
103
|
)
|
|
38
104
|
def benchmark():
|
|
39
|
-
|
|
105
|
+
"""Benchmark commands for performance testing generative models."""
|
|
40
106
|
|
|
41
107
|
|
|
42
108
|
@benchmark.command(
|
|
43
109
|
"run",
|
|
44
|
-
help=
|
|
110
|
+
help=(
|
|
111
|
+
"Run a benchmark against a generative model. "
|
|
112
|
+
"Supports multiple backends, data sources, strategies, and output formats. "
|
|
113
|
+
"Configuration can be loaded from a scenario file or specified via options."
|
|
114
|
+
),
|
|
45
115
|
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
46
116
|
)
|
|
47
117
|
@click.option(
|
|
@@ -54,276 +124,309 @@ def benchmark():
|
|
|
54
124
|
dir_okay=False,
|
|
55
125
|
path_type=Path,
|
|
56
126
|
),
|
|
57
|
-
click.Choice(get_builtin_scenarios()),
|
|
127
|
+
click.Choice(tuple(get_builtin_scenarios().keys())),
|
|
58
128
|
),
|
|
59
129
|
default=None,
|
|
60
130
|
help=(
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"Options specified on the commandline will override the scenario."
|
|
131
|
+
"Builtin scenario name or path to config file. "
|
|
132
|
+
"CLI options override scenario settings."
|
|
64
133
|
),
|
|
65
134
|
)
|
|
66
135
|
@click.option(
|
|
67
136
|
"--target",
|
|
68
137
|
type=str,
|
|
69
|
-
help="
|
|
138
|
+
help="Target backend URL (e.g., http://localhost:8000).",
|
|
70
139
|
)
|
|
71
140
|
@click.option(
|
|
72
|
-
"--
|
|
73
|
-
type=
|
|
141
|
+
"--data",
|
|
142
|
+
type=str,
|
|
143
|
+
multiple=True,
|
|
74
144
|
help=(
|
|
75
|
-
"
|
|
76
|
-
|
|
145
|
+
"HuggingFace dataset ID, path to dataset, path to data file "
|
|
146
|
+
"(csv/json/jsonl/txt), or synthetic data config (json/key=value)."
|
|
77
147
|
),
|
|
78
|
-
default=GenerativeTextScenario.get_default("backend_type"),
|
|
79
148
|
)
|
|
80
149
|
@click.option(
|
|
81
|
-
"--
|
|
82
|
-
|
|
83
|
-
|
|
150
|
+
"--profile",
|
|
151
|
+
"--rate-type", # legacy alias
|
|
152
|
+
"profile",
|
|
153
|
+
default=BenchmarkGenerativeTextArgs.get_default("profile"),
|
|
154
|
+
type=click.Choice(STRATEGY_PROFILE_CHOICES),
|
|
155
|
+
help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.",
|
|
156
|
+
)
|
|
157
|
+
@click.option(
|
|
158
|
+
"--rate",
|
|
159
|
+
type=str,
|
|
160
|
+
callback=cli_tools.parse_list_floats,
|
|
161
|
+
multiple=False,
|
|
162
|
+
default=BenchmarkGenerativeTextArgs.get_default("rate"),
|
|
84
163
|
help=(
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"""'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
|
|
164
|
+
"Benchmark rate(s) to test. Meaning depends on profile: "
|
|
165
|
+
"sweep=number of benchmarks, concurrent=concurrent requests, "
|
|
166
|
+
"async/constant/poisson=requests per second."
|
|
89
167
|
),
|
|
90
168
|
)
|
|
169
|
+
# Backend configuration
|
|
170
|
+
@click.option(
|
|
171
|
+
"--backend",
|
|
172
|
+
"--backend-type", # legacy alias
|
|
173
|
+
"backend",
|
|
174
|
+
type=click.Choice(list(get_literal_vals(BackendType))),
|
|
175
|
+
default=BenchmarkGenerativeTextArgs.get_default("backend"),
|
|
176
|
+
help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.",
|
|
177
|
+
)
|
|
178
|
+
@click.option(
|
|
179
|
+
"--backend-kwargs",
|
|
180
|
+
"--backend-args", # legacy alias
|
|
181
|
+
"backend_kwargs",
|
|
182
|
+
callback=cli_tools.parse_json,
|
|
183
|
+
default=BenchmarkGenerativeTextArgs.get_default("backend_kwargs"),
|
|
184
|
+
help="JSON string of arguments to pass to the backend.",
|
|
185
|
+
)
|
|
91
186
|
@click.option(
|
|
92
187
|
"--model",
|
|
93
|
-
default=
|
|
188
|
+
default=BenchmarkGenerativeTextArgs.get_default("model"),
|
|
94
189
|
type=str,
|
|
190
|
+
help="Model ID to benchmark. If not provided, uses first available model.",
|
|
191
|
+
)
|
|
192
|
+
# Data configuration
|
|
193
|
+
@click.option(
|
|
194
|
+
"--request-type",
|
|
195
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_request_formatter"),
|
|
196
|
+
type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
|
|
95
197
|
help=(
|
|
96
|
-
"
|
|
97
|
-
"
|
|
198
|
+
f"Request type to create for each data sample. "
|
|
199
|
+
f"Options: {', '.join(get_literal_vals(GenerativeRequestType))}."
|
|
98
200
|
),
|
|
99
201
|
)
|
|
202
|
+
@click.option(
|
|
203
|
+
"--request-formatter-kwargs",
|
|
204
|
+
default=None,
|
|
205
|
+
callback=cli_tools.parse_json,
|
|
206
|
+
help="JSON string of arguments to pass to the request formatter.",
|
|
207
|
+
)
|
|
100
208
|
@click.option(
|
|
101
209
|
"--processor",
|
|
102
|
-
default=
|
|
210
|
+
default=BenchmarkGenerativeTextArgs.get_default("processor"),
|
|
103
211
|
type=str,
|
|
104
212
|
help=(
|
|
105
|
-
"
|
|
106
|
-
"
|
|
107
|
-
"using the model arg, if needed."
|
|
213
|
+
"Processor or tokenizer for token count calculations. "
|
|
214
|
+
"If not provided, loads from model."
|
|
108
215
|
),
|
|
109
216
|
)
|
|
110
217
|
@click.option(
|
|
111
218
|
"--processor-args",
|
|
112
|
-
default=
|
|
219
|
+
default=BenchmarkGenerativeTextArgs.get_default("processor_args"),
|
|
113
220
|
callback=cli_tools.parse_json,
|
|
114
|
-
help=
|
|
115
|
-
"A JSON string containing any arguments to pass to the processor constructor "
|
|
116
|
-
"as a dict with **kwargs."
|
|
117
|
-
),
|
|
221
|
+
help="JSON string of arguments to pass to the processor constructor.",
|
|
118
222
|
)
|
|
119
223
|
@click.option(
|
|
120
|
-
"--data",
|
|
121
|
-
|
|
224
|
+
"--data-args",
|
|
225
|
+
multiple=True,
|
|
226
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_args"),
|
|
227
|
+
callback=cli_tools.parse_json,
|
|
228
|
+
help="JSON string of arguments to pass to dataset creation.",
|
|
229
|
+
)
|
|
230
|
+
@click.option(
|
|
231
|
+
"--data-samples",
|
|
232
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_samples"),
|
|
233
|
+
type=int,
|
|
122
234
|
help=(
|
|
123
|
-
"
|
|
124
|
-
"
|
|
125
|
-
"or a synthetic data config as a json or key=value string."
|
|
235
|
+
"Number of samples from dataset. -1 (default) uses all samples "
|
|
236
|
+
"and dynamically generates more."
|
|
126
237
|
),
|
|
127
238
|
)
|
|
128
239
|
@click.option(
|
|
129
|
-
"--data-
|
|
130
|
-
default=
|
|
240
|
+
"--data-column-mapper",
|
|
241
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_column_mapper"),
|
|
131
242
|
callback=cli_tools.parse_json,
|
|
132
|
-
help=
|
|
133
|
-
"A JSON string containing any arguments to pass to the dataset creation "
|
|
134
|
-
"as a dict with **kwargs."
|
|
135
|
-
),
|
|
243
|
+
help="JSON string of column mappings to apply to the dataset.",
|
|
136
244
|
)
|
|
137
245
|
@click.option(
|
|
138
246
|
"--data-sampler",
|
|
139
|
-
default=
|
|
140
|
-
type=click.Choice(["
|
|
141
|
-
help=
|
|
142
|
-
"The data sampler type to use. 'random' will add a random shuffle on the data. "
|
|
143
|
-
"Defaults to None"
|
|
144
|
-
),
|
|
247
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_sampler"),
|
|
248
|
+
type=click.Choice(["shuffle"]),
|
|
249
|
+
help="Data sampler type.",
|
|
145
250
|
)
|
|
146
251
|
@click.option(
|
|
147
|
-
"--
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
|
|
152
|
-
),
|
|
252
|
+
"--data-num-workers",
|
|
253
|
+
default=BenchmarkGenerativeTextArgs.get_default("data_num_workers"),
|
|
254
|
+
type=int,
|
|
255
|
+
help="Number of worker processes for data loading.",
|
|
153
256
|
)
|
|
154
257
|
@click.option(
|
|
155
|
-
"--
|
|
156
|
-
default=
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
"Can be a single number or a comma-separated list of numbers. "
|
|
160
|
-
"For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
|
|
161
|
-
"For rate-type=concurrent, this is the number of concurrent requests. "
|
|
162
|
-
"For rate-type=async,constant,poisson, this is the rate requests per second. "
|
|
163
|
-
"For rate-type=synchronous,throughput, this must not be set."
|
|
164
|
-
),
|
|
258
|
+
"--dataloader_kwargs",
|
|
259
|
+
default=BenchmarkGenerativeTextArgs.get_default("dataloader_kwargs"),
|
|
260
|
+
callback=cli_tools.parse_json,
|
|
261
|
+
help="JSON string of arguments to pass to the dataloader constructor.",
|
|
165
262
|
)
|
|
166
263
|
@click.option(
|
|
167
|
-
"--
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
help=
|
|
171
|
-
"The maximum number of seconds each benchmark can run for. "
|
|
172
|
-
"If None, will run until max_requests or the data is exhausted."
|
|
173
|
-
),
|
|
264
|
+
"--random-seed",
|
|
265
|
+
default=BenchmarkGenerativeTextArgs.get_default("random_seed"),
|
|
266
|
+
type=int,
|
|
267
|
+
help="Random seed for reproducibility.",
|
|
174
268
|
)
|
|
269
|
+
# Output configuration
|
|
175
270
|
@click.option(
|
|
176
|
-
"--
|
|
177
|
-
type=
|
|
178
|
-
default=
|
|
271
|
+
"--output-path",
|
|
272
|
+
type=click.Path(),
|
|
273
|
+
default=BenchmarkGenerativeTextArgs.get_default("output_path"),
|
|
179
274
|
help=(
|
|
180
|
-
"
|
|
181
|
-
"If
|
|
275
|
+
"Path to save output files. Can be a directory or file. "
|
|
276
|
+
"If a file, saves that format; mismatched formats save to parent directory."
|
|
182
277
|
),
|
|
183
278
|
)
|
|
184
279
|
@click.option(
|
|
185
|
-
"--
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
"or lenth of dataset) to run as a warmup and not include in the final results. "
|
|
191
|
-
"Defaults to None."
|
|
192
|
-
),
|
|
280
|
+
"--output-formats",
|
|
281
|
+
multiple=True,
|
|
282
|
+
type=str,
|
|
283
|
+
default=BenchmarkGenerativeTextArgs.get_default("output_formats"),
|
|
284
|
+
help="Output formats for results (e.g., console, json, html, csv).",
|
|
193
285
|
)
|
|
194
286
|
@click.option(
|
|
195
|
-
"--
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
help=(
|
|
199
|
-
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
|
|
200
|
-
"of dataset) to run as a cooldown and not include in the final results. "
|
|
201
|
-
"Defaults to None."
|
|
202
|
-
),
|
|
287
|
+
"--disable-console-outputs",
|
|
288
|
+
is_flag=True,
|
|
289
|
+
help="Disable console output.",
|
|
203
290
|
)
|
|
291
|
+
# Updates configuration
|
|
204
292
|
@click.option(
|
|
205
293
|
"--disable-progress",
|
|
206
294
|
is_flag=True,
|
|
207
|
-
help="
|
|
295
|
+
help="Disable progress updates to the console.",
|
|
208
296
|
)
|
|
209
297
|
@click.option(
|
|
210
298
|
"--display-scheduler-stats",
|
|
211
299
|
is_flag=True,
|
|
212
|
-
help="
|
|
300
|
+
help="Display scheduler process statistics.",
|
|
213
301
|
)
|
|
302
|
+
# Aggregators configuration
|
|
214
303
|
@click.option(
|
|
215
|
-
"--
|
|
216
|
-
|
|
217
|
-
|
|
304
|
+
"--warmup",
|
|
305
|
+
"--warmup-percent", # legacy alias
|
|
306
|
+
"warmup",
|
|
307
|
+
type=float,
|
|
308
|
+
default=BenchmarkGenerativeTextArgs.get_default("warmup"),
|
|
309
|
+
help=(
|
|
310
|
+
"Warmup specification: if in (0,1) = percent, if >=1 = number of "
|
|
311
|
+
"requests/seconds (depends on active constraint)."
|
|
312
|
+
),
|
|
218
313
|
)
|
|
219
314
|
@click.option(
|
|
220
|
-
"--
|
|
221
|
-
|
|
222
|
-
|
|
315
|
+
"--cooldown",
|
|
316
|
+
"--cooldown-percent", # legacy alias
|
|
317
|
+
"cooldown",
|
|
318
|
+
type=float,
|
|
319
|
+
default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
|
|
223
320
|
help=(
|
|
224
|
-
"
|
|
225
|
-
"
|
|
226
|
-
"Otherwise, json, yaml, csv, or html files are supported for output types "
|
|
227
|
-
"which will be read from the extension for the file path."
|
|
321
|
+
"Cooldown specification: if in (0,1) = percent, if >=1 = number of "
|
|
322
|
+
"requests/seconds (depends on active constraint)."
|
|
228
323
|
),
|
|
229
324
|
)
|
|
230
325
|
@click.option(
|
|
231
|
-
"--
|
|
232
|
-
|
|
233
|
-
|
|
326
|
+
"--sample-requests",
|
|
327
|
+
"--output-sampling", # legacy alias
|
|
328
|
+
"sample_requests",
|
|
329
|
+
type=int,
|
|
330
|
+
help=(
|
|
331
|
+
"Number of sample requests per status to save. "
|
|
332
|
+
"None (default) saves all, recommended: 20."
|
|
333
|
+
),
|
|
234
334
|
)
|
|
335
|
+
# Constraints configuration
|
|
235
336
|
@click.option(
|
|
236
|
-
"--
|
|
337
|
+
"--max-seconds",
|
|
338
|
+
type=float,
|
|
339
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_seconds"),
|
|
340
|
+
help=(
|
|
341
|
+
"Maximum seconds per benchmark. "
|
|
342
|
+
"If None, runs until max_requests or data exhaustion."
|
|
343
|
+
),
|
|
344
|
+
)
|
|
345
|
+
@click.option(
|
|
346
|
+
"--max-requests",
|
|
237
347
|
type=int,
|
|
348
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_requests"),
|
|
238
349
|
help=(
|
|
239
|
-
"
|
|
240
|
-
"If None
|
|
350
|
+
"Maximum requests per benchmark. "
|
|
351
|
+
"If None, runs until max_seconds or data exhaustion."
|
|
241
352
|
),
|
|
242
|
-
default=GenerativeTextScenario.get_default("output_sampling"),
|
|
243
353
|
)
|
|
244
354
|
@click.option(
|
|
245
|
-
"--
|
|
246
|
-
default=GenerativeTextScenario.get_default("random_seed"),
|
|
355
|
+
"--max-errors",
|
|
247
356
|
type=int,
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
random_seed,
|
|
274
|
-
):
|
|
275
|
-
click_ctx = click.get_current_context()
|
|
276
|
-
|
|
277
|
-
overrides = cli_tools.set_if_not_default(
|
|
278
|
-
click_ctx,
|
|
279
|
-
target=target,
|
|
280
|
-
backend_type=backend_type,
|
|
281
|
-
backend_args=backend_args,
|
|
282
|
-
model=model,
|
|
283
|
-
processor=processor,
|
|
284
|
-
processor_args=processor_args,
|
|
285
|
-
data=data,
|
|
286
|
-
data_args=data_args,
|
|
287
|
-
data_sampler=data_sampler,
|
|
288
|
-
rate_type=rate_type,
|
|
289
|
-
rate=rate,
|
|
290
|
-
max_seconds=max_seconds,
|
|
291
|
-
max_requests=max_requests,
|
|
292
|
-
warmup_percent=warmup_percent,
|
|
293
|
-
cooldown_percent=cooldown_percent,
|
|
294
|
-
output_sampling=output_sampling,
|
|
295
|
-
random_seed=random_seed,
|
|
357
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_errors"),
|
|
358
|
+
help="Maximum errors before stopping the benchmark.",
|
|
359
|
+
)
|
|
360
|
+
@click.option(
|
|
361
|
+
"--max-error-rate",
|
|
362
|
+
type=float,
|
|
363
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_error_rate"),
|
|
364
|
+
help="Maximum error rate before stopping the benchmark.",
|
|
365
|
+
)
|
|
366
|
+
@click.option(
|
|
367
|
+
"--max-global-error-rate",
|
|
368
|
+
type=float,
|
|
369
|
+
default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
|
|
370
|
+
help="Maximum global error rate across all benchmarks.",
|
|
371
|
+
)
|
|
372
|
+
def run(**kwargs):
|
|
373
|
+
request_type = kwargs.pop("request_type", None)
|
|
374
|
+
request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
|
|
375
|
+
kwargs["data_request_formatter"] = (
|
|
376
|
+
request_type
|
|
377
|
+
if not request_formatter_kwargs
|
|
378
|
+
else {"request_type": request_type, **request_formatter_kwargs}
|
|
379
|
+
)
|
|
380
|
+
kwargs["data"] = cli_tools.format_list_arg(
|
|
381
|
+
kwargs.get("data"), default=[], simplify_single=False
|
|
296
382
|
)
|
|
383
|
+
kwargs["data_args"] = cli_tools.format_list_arg(
|
|
384
|
+
kwargs.get("data_args"), default=[], simplify_single=False
|
|
385
|
+
)
|
|
386
|
+
kwargs["rate"] = cli_tools.format_list_arg(
|
|
387
|
+
kwargs.get("rate"), default=None, simplify_single=False
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
disable_console_outputs = kwargs.pop("disable_console_outputs", False)
|
|
391
|
+
display_scheduler_stats = kwargs.pop("display_scheduler_stats", False)
|
|
392
|
+
disable_progress = kwargs.pop("disable_progress", False)
|
|
297
393
|
|
|
298
394
|
try:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
|
|
304
|
-
else: # Only builtins can make it here; click will catch anything else
|
|
305
|
-
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
|
|
306
|
-
except ValidationError as e:
|
|
395
|
+
args = BenchmarkGenerativeTextArgs.create(
|
|
396
|
+
scenario=kwargs.pop("scenario", None), **kwargs
|
|
397
|
+
)
|
|
398
|
+
except ValidationError as err:
|
|
307
399
|
# Translate pydantic valdation error to click argument error
|
|
308
|
-
errs =
|
|
400
|
+
errs = err.errors(include_url=False, include_context=True, include_input=True)
|
|
309
401
|
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
|
|
310
402
|
raise click.BadParameter(
|
|
311
|
-
errs[0]["msg"], ctx=
|
|
312
|
-
) from
|
|
403
|
+
errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name
|
|
404
|
+
) from err
|
|
313
405
|
|
|
406
|
+
if uvloop is not None:
|
|
407
|
+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
|
314
408
|
asyncio.run(
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
409
|
+
benchmark_generative_text(
|
|
410
|
+
args=args,
|
|
411
|
+
progress=(
|
|
412
|
+
GenerativeConsoleBenchmarkerProgress(
|
|
413
|
+
display_scheduler_stats=display_scheduler_stats
|
|
414
|
+
)
|
|
415
|
+
if not disable_progress
|
|
416
|
+
else None
|
|
417
|
+
),
|
|
418
|
+
console=Console() if not disable_console_outputs else None,
|
|
322
419
|
)
|
|
323
420
|
)
|
|
324
421
|
|
|
325
422
|
|
|
326
|
-
@benchmark.command(
|
|
423
|
+
@benchmark.command(
|
|
424
|
+
"from-file",
|
|
425
|
+
help=(
|
|
426
|
+
"Load a saved benchmark report and optionally re-export to other formats. "
|
|
427
|
+
"PATH: Path to the saved benchmark report file (default: ./benchmarks.json)."
|
|
428
|
+
),
|
|
429
|
+
)
|
|
327
430
|
@click.argument(
|
|
328
431
|
"path",
|
|
329
432
|
type=click.Path(file_okay=True, dir_okay=False, exists=True),
|
|
@@ -331,61 +434,46 @@ def run(
|
|
|
331
434
|
)
|
|
332
435
|
@click.option(
|
|
333
436
|
"--output-path",
|
|
334
|
-
type=click.Path(
|
|
335
|
-
default=
|
|
336
|
-
is_flag=False,
|
|
337
|
-
flag_value=Path.cwd() / "benchmarks_reexported.json",
|
|
437
|
+
type=click.Path(),
|
|
438
|
+
default=Path.cwd(),
|
|
338
439
|
help=(
|
|
339
|
-
"
|
|
340
|
-
"
|
|
341
|
-
"
|
|
342
|
-
"Otherwise, json, yaml, or csv files are supported for output types "
|
|
343
|
-
"which will be read from the extension for the file path. "
|
|
344
|
-
"This input is optional. If the output path flag is not provided, "
|
|
345
|
-
"the benchmarks will not be reexported. If the flag is present but "
|
|
346
|
-
"no value is specified, it will default to the current directory "
|
|
347
|
-
"with the file name `benchmarks_reexported.json`."
|
|
440
|
+
"Directory or file path to save re-exported benchmark results. "
|
|
441
|
+
"If a directory, all output formats will be saved there. "
|
|
442
|
+
"If a file, the matching format will be saved to that file."
|
|
348
443
|
),
|
|
349
444
|
)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
""
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
"""
|
|
360
|
-
if value is None:
|
|
361
|
-
return None
|
|
362
|
-
try:
|
|
363
|
-
return codecs.decode(value, "unicode_escape")
|
|
364
|
-
except Exception as e:
|
|
365
|
-
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
|
|
445
|
+
@click.option(
|
|
446
|
+
"--output-formats",
|
|
447
|
+
multiple=True,
|
|
448
|
+
type=str,
|
|
449
|
+
default=("console", "json"), # ("console", "json", "html", "csv")
|
|
450
|
+
help="Output formats for benchmark results (e.g., console, json, html, csv).",
|
|
451
|
+
)
|
|
452
|
+
def from_file(path, output_path, output_formats):
|
|
453
|
+
asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
|
|
366
454
|
|
|
367
455
|
|
|
368
456
|
@cli.command(
|
|
369
|
-
short_help="
|
|
370
|
-
help=
|
|
371
|
-
"Print out the available configuration settings that can be set "
|
|
372
|
-
"through environment variables."
|
|
373
|
-
),
|
|
457
|
+
short_help="Show configuration settings.",
|
|
458
|
+
help="Display environment variables for configuring GuideLLM behavior.",
|
|
374
459
|
)
|
|
375
460
|
def config():
|
|
376
461
|
print_config()
|
|
377
462
|
|
|
378
463
|
|
|
379
|
-
@cli.group(help="
|
|
464
|
+
@cli.group(help="Tools for preprocessing datasets for use in benchmarks.")
|
|
380
465
|
def preprocess():
|
|
381
|
-
|
|
466
|
+
"""Dataset preprocessing utilities."""
|
|
382
467
|
|
|
383
468
|
|
|
384
469
|
@preprocess.command(
|
|
470
|
+
"dataset",
|
|
385
471
|
help=(
|
|
386
|
-
"
|
|
387
|
-
"
|
|
388
|
-
"
|
|
472
|
+
"Process a dataset to have specific prompt and output token sizes. "
|
|
473
|
+
"Supports multiple strategies for handling prompts and optional "
|
|
474
|
+
"Hugging Face Hub upload.\n\n"
|
|
475
|
+
"DATA: Path to the input dataset or dataset ID.\n\n"
|
|
476
|
+
"OUTPUT_PATH: Path to save the processed dataset, including file suffix."
|
|
389
477
|
),
|
|
390
478
|
context_settings={"auto_envvar_prefix": "GUIDELLM"},
|
|
391
479
|
)
|
|
@@ -403,81 +491,70 @@ def preprocess():
|
|
|
403
491
|
"--processor",
|
|
404
492
|
type=str,
|
|
405
493
|
required=True,
|
|
406
|
-
help=
|
|
407
|
-
"The processor or tokenizer to use to calculate token counts for statistics "
|
|
408
|
-
"and synthetic data generation."
|
|
409
|
-
),
|
|
494
|
+
help="Processor or tokenizer name for calculating token counts.",
|
|
410
495
|
)
|
|
411
496
|
@click.option(
|
|
412
497
|
"--processor-args",
|
|
413
498
|
default=None,
|
|
414
499
|
callback=cli_tools.parse_json,
|
|
415
|
-
help=
|
|
416
|
-
"A JSON string containing any arguments to pass to the processor constructor "
|
|
417
|
-
"as a dict with **kwargs."
|
|
418
|
-
),
|
|
500
|
+
help="JSON string of arguments to pass to the processor constructor.",
|
|
419
501
|
)
|
|
420
502
|
@click.option(
|
|
421
503
|
"--data-args",
|
|
422
504
|
callback=cli_tools.parse_json,
|
|
423
|
-
help=
|
|
424
|
-
"A JSON string containing any arguments to pass to the dataset creation "
|
|
425
|
-
"as a dict with **kwargs."
|
|
426
|
-
),
|
|
505
|
+
help="JSON string of arguments to pass to dataset creation.",
|
|
427
506
|
)
|
|
428
507
|
@click.option(
|
|
429
508
|
"--short-prompt-strategy",
|
|
430
509
|
type=click.Choice([s.value for s in ShortPromptStrategy]),
|
|
431
510
|
default=ShortPromptStrategy.IGNORE.value,
|
|
432
511
|
show_default=True,
|
|
433
|
-
help="Strategy
|
|
512
|
+
help="Strategy for handling prompts shorter than target length.",
|
|
434
513
|
)
|
|
435
514
|
@click.option(
|
|
436
515
|
"--pad-char",
|
|
437
516
|
type=str,
|
|
438
517
|
default="",
|
|
439
518
|
callback=decode_escaped_str,
|
|
440
|
-
help="
|
|
519
|
+
help="Character to pad short prompts with when using 'pad' strategy.",
|
|
441
520
|
)
|
|
442
521
|
@click.option(
|
|
443
522
|
"--concat-delimiter",
|
|
444
523
|
type=str,
|
|
445
524
|
default="",
|
|
446
525
|
help=(
|
|
447
|
-
"
|
|
448
|
-
" Used when strategy is 'concatenate'."
|
|
526
|
+
"Delimiter for concatenating short prompts (used with 'concatenate' strategy)."
|
|
449
527
|
),
|
|
450
528
|
)
|
|
451
529
|
@click.option(
|
|
452
530
|
"--prompt-tokens",
|
|
453
531
|
type=str,
|
|
454
532
|
default=None,
|
|
455
|
-
help="Prompt tokens
|
|
533
|
+
help="Prompt tokens configuration (JSON, YAML file, or key=value string).",
|
|
456
534
|
)
|
|
457
535
|
@click.option(
|
|
458
536
|
"--output-tokens",
|
|
459
537
|
type=str,
|
|
460
538
|
default=None,
|
|
461
|
-
help="Output tokens
|
|
539
|
+
help="Output tokens configuration (JSON, YAML file, or key=value string).",
|
|
462
540
|
)
|
|
463
541
|
@click.option(
|
|
464
542
|
"--push-to-hub",
|
|
465
543
|
is_flag=True,
|
|
466
|
-
help="
|
|
544
|
+
help="Push the processed dataset to Hugging Face Hub.",
|
|
467
545
|
)
|
|
468
546
|
@click.option(
|
|
469
547
|
"--hub-dataset-id",
|
|
470
548
|
type=str,
|
|
471
549
|
default=None,
|
|
472
|
-
help="
|
|
473
|
-
"Required if --push-to-hub is used.",
|
|
550
|
+
help=("Hugging Face Hub dataset ID for upload (required if --push-to-hub is set)."),
|
|
474
551
|
)
|
|
475
552
|
@click.option(
|
|
476
553
|
"--random-seed",
|
|
477
554
|
type=int,
|
|
478
555
|
default=42,
|
|
479
556
|
show_default=True,
|
|
480
|
-
help="Random seed for
|
|
557
|
+
help="Random seed for reproducible token sampling.",
|
|
481
558
|
)
|
|
482
559
|
def dataset(
|
|
483
560
|
data,
|
|
@@ -511,5 +588,128 @@ def dataset(
|
|
|
511
588
|
)
|
|
512
589
|
|
|
513
590
|
|
|
591
|
+
@cli.command(
|
|
592
|
+
"mock-server",
|
|
593
|
+
help=(
|
|
594
|
+
"Start a mock OpenAI/vLLM-compatible server for testing. "
|
|
595
|
+
"Simulates model inference with configurable latency and token generation."
|
|
596
|
+
),
|
|
597
|
+
)
|
|
598
|
+
@click.option(
|
|
599
|
+
"--host",
|
|
600
|
+
default="127.0.0.1",
|
|
601
|
+
help="Host address to bind the server to.",
|
|
602
|
+
)
|
|
603
|
+
@click.option(
|
|
604
|
+
"--port",
|
|
605
|
+
default=8000,
|
|
606
|
+
type=int,
|
|
607
|
+
help="Port number to bind the server to.",
|
|
608
|
+
)
|
|
609
|
+
@click.option(
|
|
610
|
+
"--workers",
|
|
611
|
+
default=1,
|
|
612
|
+
type=int,
|
|
613
|
+
help="Number of worker processes.",
|
|
614
|
+
)
|
|
615
|
+
@click.option(
|
|
616
|
+
"--model",
|
|
617
|
+
default="llama-3.1-8b-instruct",
|
|
618
|
+
help="Name of the model to mock.",
|
|
619
|
+
)
|
|
620
|
+
@click.option(
|
|
621
|
+
"--processor",
|
|
622
|
+
default=None,
|
|
623
|
+
help="Processor or tokenizer to use for requests.",
|
|
624
|
+
)
|
|
625
|
+
@click.option(
|
|
626
|
+
"--request-latency",
|
|
627
|
+
default=3,
|
|
628
|
+
type=float,
|
|
629
|
+
help="Request latency in seconds for non-streaming requests.",
|
|
630
|
+
)
|
|
631
|
+
@click.option(
|
|
632
|
+
"--request-latency-std",
|
|
633
|
+
default=0,
|
|
634
|
+
type=float,
|
|
635
|
+
help="Request latency standard deviation in seconds (normal distribution).",
|
|
636
|
+
)
|
|
637
|
+
@click.option(
|
|
638
|
+
"--ttft-ms",
|
|
639
|
+
default=150,
|
|
640
|
+
type=float,
|
|
641
|
+
help="Time to first token in milliseconds for streaming requests.",
|
|
642
|
+
)
|
|
643
|
+
@click.option(
|
|
644
|
+
"--ttft-ms-std",
|
|
645
|
+
default=0,
|
|
646
|
+
type=float,
|
|
647
|
+
help="Time to first token standard deviation in milliseconds.",
|
|
648
|
+
)
|
|
649
|
+
@click.option(
|
|
650
|
+
"--itl-ms",
|
|
651
|
+
default=10,
|
|
652
|
+
type=float,
|
|
653
|
+
help="Inter-token latency in milliseconds for streaming requests.",
|
|
654
|
+
)
|
|
655
|
+
@click.option(
|
|
656
|
+
"--itl-ms-std",
|
|
657
|
+
default=0,
|
|
658
|
+
type=float,
|
|
659
|
+
help="Inter-token latency standard deviation in milliseconds.",
|
|
660
|
+
)
|
|
661
|
+
@click.option(
|
|
662
|
+
"--output-tokens",
|
|
663
|
+
default=128,
|
|
664
|
+
type=int,
|
|
665
|
+
help="Number of output tokens for streaming requests.",
|
|
666
|
+
)
|
|
667
|
+
@click.option(
|
|
668
|
+
"--output-tokens-std",
|
|
669
|
+
default=0,
|
|
670
|
+
type=float,
|
|
671
|
+
help="Output tokens standard deviation (normal distribution).",
|
|
672
|
+
)
|
|
673
|
+
def mock_server(
|
|
674
|
+
host: str,
|
|
675
|
+
port: int,
|
|
676
|
+
workers: int,
|
|
677
|
+
model: str,
|
|
678
|
+
processor: str | None,
|
|
679
|
+
request_latency: float,
|
|
680
|
+
request_latency_std: float,
|
|
681
|
+
ttft_ms: float,
|
|
682
|
+
ttft_ms_std: float,
|
|
683
|
+
itl_ms: float,
|
|
684
|
+
itl_ms_std: float,
|
|
685
|
+
output_tokens: int,
|
|
686
|
+
output_tokens_std: float,
|
|
687
|
+
):
|
|
688
|
+
config = MockServerConfig(
|
|
689
|
+
host=host,
|
|
690
|
+
port=port,
|
|
691
|
+
workers=workers,
|
|
692
|
+
model=model,
|
|
693
|
+
processor=processor,
|
|
694
|
+
request_latency=request_latency,
|
|
695
|
+
request_latency_std=request_latency_std,
|
|
696
|
+
ttft_ms=ttft_ms,
|
|
697
|
+
ttft_ms_std=ttft_ms_std,
|
|
698
|
+
itl_ms=itl_ms,
|
|
699
|
+
itl_ms_std=itl_ms_std,
|
|
700
|
+
output_tokens=output_tokens,
|
|
701
|
+
output_tokens_std=output_tokens_std,
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
server = MockServer(config)
|
|
705
|
+
console = Console()
|
|
706
|
+
console.print_update(
|
|
707
|
+
title="GuideLLM mock server starting...",
|
|
708
|
+
details=f"Listening on http://{host}:{port} for model {model}",
|
|
709
|
+
status="success",
|
|
710
|
+
)
|
|
711
|
+
server.run()
|
|
712
|
+
|
|
713
|
+
|
|
514
714
|
if __name__ == "__main__":
|
|
515
715
|
cli()
|