guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
guidellm/__main__.py CHANGED
@@ -1,51 +1,110 @@
1
+ """
2
+ GuideLLM command-line interface entry point.
3
+
4
+ Primary CLI application providing benchmark execution, dataset preprocessing, and
5
+ mock server functionality for language model evaluation. Organizes commands into
6
+ three main groups: benchmark operations for performance testing, preprocessing
7
+ utilities for data transformation, and mock server capabilities for development
8
+ and testing. Supports multiple backends, output formats, and flexible configuration
9
+ through CLI options and environment variables.
10
+
11
+ Example:
12
+ ::
13
+ # Run a benchmark against a model
14
+ guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
15
+ --profile sweep
16
+
17
+ # Preprocess a dataset
18
+ guidellm preprocess dataset input.json output.json --processor gpt2
19
+
20
+ # Start a mock server for testing
21
+ guidellm mock-server --host 0.0.0.0 --port 8080
22
+ """
23
+
24
+ from __future__ import annotations
25
+
1
26
  import asyncio
2
27
  import codecs
3
28
  from pathlib import Path
4
- from typing import get_args
5
29
 
6
30
  import click
7
31
  from pydantic import ValidationError
8
32
 
9
- from guidellm.backend import BackendType
33
+ from guidellm.data import ShortPromptStrategy, process_dataset
34
+
35
+ try:
36
+ import uvloop
37
+ except ImportError:
38
+ uvloop = None # type: ignore[assignment] # Optional dependency
39
+
40
+ from guidellm.backends import BackendType
10
41
  from guidellm.benchmark import (
42
+ BenchmarkGenerativeTextArgs,
43
+ GenerativeConsoleBenchmarkerProgress,
11
44
  ProfileType,
45
+ benchmark_generative_text,
46
+ get_builtin_scenarios,
12
47
  reimport_benchmarks_report,
13
48
  )
14
- from guidellm.benchmark.entrypoints import benchmark_with_scenario
15
- from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
16
- from guidellm.config import print_config
17
- from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
49
+ from guidellm.mock_server import MockServer, MockServerConfig
18
50
  from guidellm.scheduler import StrategyType
19
- from guidellm.utils import DefaultGroupHandler
51
+ from guidellm.schemas import GenerativeRequestType
52
+ from guidellm.settings import print_config
53
+ from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
20
54
  from guidellm.utils import cli as cli_tools
21
55
 
22
- STRATEGY_PROFILE_CHOICES = list(
23
- set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
24
- )
56
+ STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
57
+ """Available strategy and profile type choices for benchmark execution."""
58
+
59
+
60
+ def decode_escaped_str(_ctx, _param, value):
61
+ """
62
+ Decode escape sequences in Click option values.
63
+
64
+ Click automatically escapes characters converting sequences like "\\n" to
65
+ "\\\\n". This function decodes these sequences to their intended characters.
66
+
67
+ :param _ctx: Click context (unused)
68
+ :param _param: Click parameter (unused)
69
+ :param value: String value to decode
70
+ :return: Decoded string with proper escape sequences, or None if input is None
71
+ :raises click.BadParameter: When escape sequence decoding fails
72
+ """
73
+ if value is None:
74
+ return None
75
+ try:
76
+ return codecs.decode(value, "unicode_escape")
77
+ except Exception as e:
78
+ raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
25
79
 
26
80
 
27
81
  @click.group()
28
82
  @click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
29
83
  def cli():
30
- pass
84
+ """GuideLLM CLI for benchmarking, preprocessing, and testing language models."""
31
85
 
32
86
 
33
87
  @cli.group(
34
- help="Commands to run a new benchmark or load a prior one.",
88
+ help="Run a benchmark or load a previously saved benchmark report.",
35
89
  cls=DefaultGroupHandler,
36
90
  default="run",
37
91
  )
38
92
  def benchmark():
39
- pass
93
+ """Benchmark commands for performance testing generative models."""
40
94
 
41
95
 
42
96
  @benchmark.command(
43
97
  "run",
44
- help="Run a benchmark against a generative model using the specified arguments.",
98
+ help=(
99
+ "Run a benchmark against a generative model. "
100
+ "Supports multiple backends, data sources, strategies, and output formats. "
101
+ "Configuration can be loaded from a scenario file or specified via options."
102
+ ),
45
103
  context_settings={"auto_envvar_prefix": "GUIDELLM"},
46
104
  )
47
105
  @click.option(
48
106
  "--scenario",
107
+ "-c",
49
108
  type=cli_tools.Union(
50
109
  click.Path(
51
110
  exists=True,
@@ -54,276 +113,375 @@ def benchmark():
54
113
  dir_okay=False,
55
114
  path_type=Path,
56
115
  ),
57
- click.Choice(get_builtin_scenarios()),
116
+ click.Choice(tuple(get_builtin_scenarios().keys())),
58
117
  ),
59
118
  default=None,
60
119
  help=(
61
- "The name of a builtin scenario or path to a config file. "
62
- "Missing values from the config will use defaults. "
63
- "Options specified on the commandline will override the scenario."
120
+ "Builtin scenario name or path to config file. "
121
+ "CLI options override scenario settings."
64
122
  ),
65
123
  )
66
124
  @click.option(
67
125
  "--target",
68
126
  type=str,
69
- help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
127
+ help="Target backend URL (e.g., http://localhost:8000).",
70
128
  )
71
129
  @click.option(
72
- "--backend-type",
73
- type=click.Choice(list(get_args(BackendType))),
130
+ "--data",
131
+ type=str,
132
+ multiple=True,
74
133
  help=(
75
- "The type of backend to use to run requests against. Defaults to 'openai_http'."
76
- f" Supported types: {', '.join(get_args(BackendType))}"
134
+ "HuggingFace dataset ID, path to dataset, path to data file "
135
+ "(csv/json/jsonl/txt), or synthetic data config (json/key=value)."
77
136
  ),
78
- default=GenerativeTextScenario.get_default("backend_type"),
79
137
  )
80
138
  @click.option(
81
- "--backend-args",
82
- callback=cli_tools.parse_json,
83
- default=GenerativeTextScenario.get_default("backend_args"),
139
+ "--profile",
140
+ "--rate-type", # legacy alias
141
+ "profile",
142
+ default=BenchmarkGenerativeTextArgs.get_default("profile"),
143
+ type=click.Choice(STRATEGY_PROFILE_CHOICES),
144
+ help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.",
145
+ )
146
+ @click.option(
147
+ "--rate",
148
+ callback=cli_tools.parse_list_floats,
149
+ multiple=True,
150
+ default=BenchmarkGenerativeTextArgs.get_default("rate"),
84
151
  help=(
85
- "A JSON string containing any arguments to pass to the backend as a "
86
- "dict with **kwargs. Headers can be removed by setting their value to "
87
- "null. For example: "
88
- """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
152
+ "Benchmark rate(s) to test. Meaning depends on profile: "
153
+ "sweep=number of benchmarks, concurrent=concurrent requests, "
154
+ "async/constant/poisson=requests per second."
89
155
  ),
90
156
  )
157
+ # Backend configuration
158
+ @click.option(
159
+ "--backend",
160
+ "--backend-type", # legacy alias
161
+ "backend",
162
+ type=click.Choice(list(get_literal_vals(BackendType))),
163
+ default=BenchmarkGenerativeTextArgs.get_default("backend"),
164
+ help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.",
165
+ )
166
+ @click.option(
167
+ "--backend-kwargs",
168
+ "--backend-args", # legacy alias
169
+ "backend_kwargs",
170
+ callback=cli_tools.parse_json,
171
+ default=BenchmarkGenerativeTextArgs.get_default("backend_kwargs"),
172
+ help="JSON string of arguments to pass to the backend.",
173
+ )
91
174
  @click.option(
92
175
  "--model",
93
- default=GenerativeTextScenario.get_default("model"),
176
+ default=BenchmarkGenerativeTextArgs.get_default("model"),
94
177
  type=str,
178
+ help="Model ID to benchmark. If not provided, uses first available model.",
179
+ )
180
+ # Data configuration
181
+ @click.option(
182
+ "--request-type",
183
+ default=BenchmarkGenerativeTextArgs.get_default("data_request_formatter"),
184
+ type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
95
185
  help=(
96
- "The ID of the model to benchmark within the backend. "
97
- "If None provided (default), then it will use the first model available."
186
+ f"Request type to create for each data sample. "
187
+ f"Options: {', '.join(get_literal_vals(GenerativeRequestType))}."
98
188
  ),
99
189
  )
190
+ @click.option(
191
+ "--request-formatter-kwargs",
192
+ default=None,
193
+ callback=cli_tools.parse_json,
194
+ help="JSON string of arguments to pass to the request formatter.",
195
+ )
100
196
  @click.option(
101
197
  "--processor",
102
- default=GenerativeTextScenario.get_default("processor"),
198
+ default=BenchmarkGenerativeTextArgs.get_default("processor"),
103
199
  type=str,
104
200
  help=(
105
- "The processor or tokenizer to use to calculate token counts for statistics "
106
- "and synthetic data generation. If None provided (default), will load "
107
- "using the model arg, if needed."
201
+ "Processor or tokenizer for token count calculations. "
202
+ "If not provided, loads from model."
108
203
  ),
109
204
  )
110
205
  @click.option(
111
206
  "--processor-args",
112
- default=GenerativeTextScenario.get_default("processor_args"),
207
+ default=BenchmarkGenerativeTextArgs.get_default("processor_args"),
113
208
  callback=cli_tools.parse_json,
209
+ help="JSON string of arguments to pass to the processor constructor.",
210
+ )
211
+ @click.option(
212
+ "--data-args",
213
+ multiple=True,
214
+ default=BenchmarkGenerativeTextArgs.get_default("data_args"),
215
+ callback=cli_tools.parse_json,
216
+ help="JSON string of arguments to pass to dataset creation.",
217
+ )
218
+ @click.option(
219
+ "--data-samples",
220
+ default=BenchmarkGenerativeTextArgs.get_default("data_samples"),
221
+ type=int,
114
222
  help=(
115
- "A JSON string containing any arguments to pass to the processor constructor "
116
- "as a dict with **kwargs."
223
+ "Number of samples from dataset. -1 (default) uses all samples "
224
+ "and dynamically generates more."
117
225
  ),
118
226
  )
119
227
  @click.option(
120
- "--data",
121
- type=str,
228
+ "--data-column-mapper",
229
+ default=BenchmarkGenerativeTextArgs.get_default("data_column_mapper"),
230
+ callback=cli_tools.parse_json,
231
+ help="JSON string of column mappings to apply to the dataset.",
232
+ )
233
+ @click.option(
234
+ "--data-sampler",
235
+ default=BenchmarkGenerativeTextArgs.get_default("data_sampler"),
236
+ type=click.Choice(["shuffle"]),
237
+ help="Data sampler type.",
238
+ )
239
+ @click.option(
240
+ "--data-num-workers",
241
+ default=BenchmarkGenerativeTextArgs.get_default("data_num_workers"),
242
+ type=int,
243
+ help="Number of worker processes for data loading.",
244
+ )
245
+ @click.option(
246
+ "--dataloader-kwargs",
247
+ default=BenchmarkGenerativeTextArgs.get_default("dataloader_kwargs"),
248
+ callback=cli_tools.parse_json,
249
+ help="JSON string of arguments to pass to the dataloader constructor.",
250
+ )
251
+ @click.option(
252
+ "--random-seed",
253
+ default=BenchmarkGenerativeTextArgs.get_default("random_seed"),
254
+ type=int,
255
+ help="Random seed for reproducibility.",
256
+ )
257
+ # Output configuration
258
+ @click.option(
259
+ "--output-dir",
260
+ type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
261
+ default=BenchmarkGenerativeTextArgs.get_default("output_dir"),
262
+ help="The directory path to save file output types in",
263
+ )
264
+ @click.option(
265
+ "--outputs",
266
+ callback=cli_tools.parse_list,
267
+ multiple=True,
268
+ default=BenchmarkGenerativeTextArgs.get_default("outputs"),
122
269
  help=(
123
- "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
124
- "a path to a data file csv, json, jsonl, or txt, "
125
- "or a synthetic data config as a json or key=value string."
270
+ "The filename.ext for each of the outputs to create or the "
271
+ "alises (json, csv, html) for the output files to create with "
272
+ "their default file names (benchmark.[EXT])"
126
273
  ),
127
274
  )
128
275
  @click.option(
129
- "--data-args",
130
- default=GenerativeTextScenario.get_default("data_args"),
131
- callback=cli_tools.parse_json,
276
+ "--output-path",
277
+ type=click.Path(),
278
+ default=None,
132
279
  help=(
133
- "A JSON string containing any arguments to pass to the dataset creation "
134
- "as a dict with **kwargs."
280
+ "Legacy parameter for the output path to save the output result to. "
281
+ "Resolves to fill in output-dir and outputs based on input path."
135
282
  ),
136
283
  )
137
284
  @click.option(
138
- "--data-sampler",
139
- default=GenerativeTextScenario.get_default("data_sampler"),
140
- type=click.Choice(["random"]),
285
+ "--disable-console",
286
+ "--disable-console-outputs", # legacy alias
287
+ "disable_console",
288
+ is_flag=True,
141
289
  help=(
142
- "The data sampler type to use. 'random' will add a random shuffle on the data. "
143
- "Defaults to None"
290
+ "Disable all outputs to the console (updates, interactive progress, results)."
144
291
  ),
145
292
  )
146
293
  @click.option(
147
- "--rate-type",
148
- type=click.Choice(STRATEGY_PROFILE_CHOICES),
294
+ "--disable-console-interactive",
295
+ "--disable-progress", # legacy alias
296
+ "disable_console_interactive",
297
+ is_flag=True,
298
+ help="Disable interactive console progress updates.",
299
+ )
300
+ # Aggregators configuration
301
+ @click.option(
302
+ "--warmup",
303
+ "--warmup-percent", # legacy alias
304
+ "warmup",
305
+ default=BenchmarkGenerativeTextArgs.get_default("warmup"),
306
+ callback=cli_tools.parse_json,
149
307
  help=(
150
- "The type of benchmark to run. "
151
- f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
308
+ "Warmup specification: int, float, or dict as string "
309
+ "(json or key=value). "
310
+ "Controls time or requests before measurement starts. "
311
+ "Numeric in (0, 1): percent of duration or request count. "
312
+ "Numeric >=1: duration in seconds or request count. "
313
+ "Advanced config: see TransientPhaseConfig schema."
152
314
  ),
153
315
  )
154
316
  @click.option(
155
- "--rate",
156
- default=GenerativeTextScenario.get_default("rate"),
317
+ "--cooldown",
318
+ "--cooldown-percent", # legacy alias
319
+ "cooldown",
320
+ default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
321
+ callback=cli_tools.parse_json,
157
322
  help=(
158
- "The rates to run the benchmark at. "
159
- "Can be a single number or a comma-separated list of numbers. "
160
- "For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
161
- "For rate-type=concurrent, this is the number of concurrent requests. "
162
- "For rate-type=async,constant,poisson, this is the rate requests per second. "
163
- "For rate-type=synchronous,throughput, this must not be set."
323
+ "Cooldown specification: int, float, or dict as string "
324
+ "(json or key=value). "
325
+ "Controls time or requests after measurement ends. "
326
+ "Numeric in (0, 1): percent of duration or request count. "
327
+ "Numeric >=1: duration in seconds or request count. "
328
+ "Advanced config: see TransientPhaseConfig schema."
164
329
  ),
165
330
  )
166
331
  @click.option(
167
- "--max-seconds",
332
+ "--rampup",
168
333
  type=float,
169
- default=GenerativeTextScenario.get_default("max_seconds"),
334
+ default=BenchmarkGenerativeTextArgs.get_default("rampup"),
170
335
  help=(
171
- "The maximum number of seconds each benchmark can run for. "
172
- "If None, will run until max_requests or the data is exhausted."
336
+ "The time, in seconds, to ramp up the request rate over. "
337
+ "Only applicable for Throughput/Concurrent strategies"
173
338
  ),
174
339
  )
175
340
  @click.option(
176
- "--max-requests",
341
+ "--sample-requests",
342
+ "--output-sampling", # legacy alias
343
+ "sample_requests",
177
344
  type=int,
178
- default=GenerativeTextScenario.get_default("max_requests"),
179
345
  help=(
180
- "The maximum number of requests each benchmark can run for. "
181
- "If None, will run until max_seconds or the data is exhausted."
346
+ "Number of sample requests per status to save. "
347
+ "None (default) saves all, recommended: 20."
182
348
  ),
183
349
  )
350
+ # Constraints configuration
184
351
  @click.option(
185
- "--warmup-percent",
352
+ "--max-seconds",
186
353
  type=float,
187
- default=GenerativeTextScenario.get_default("warmup_percent"),
354
+ default=BenchmarkGenerativeTextArgs.get_default("max_seconds"),
188
355
  help=(
189
- "The percent of the benchmark (based on max-seconds, max-requets, "
190
- "or lenth of dataset) to run as a warmup and not include in the final results. "
191
- "Defaults to None."
356
+ "Maximum seconds per benchmark. "
357
+ "If None, runs until max_requests or data exhaustion."
192
358
  ),
193
359
  )
194
360
  @click.option(
195
- "--cooldown-percent",
196
- type=float,
197
- default=GenerativeTextScenario.get_default("cooldown_percent"),
361
+ "--max-requests",
362
+ type=int,
363
+ default=BenchmarkGenerativeTextArgs.get_default("max_requests"),
198
364
  help=(
199
- "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
200
- "of dataset) to run as a cooldown and not include in the final results. "
201
- "Defaults to None."
365
+ "Maximum requests per benchmark. "
366
+ "If None, runs until max_seconds or data exhaustion."
202
367
  ),
203
368
  )
204
369
  @click.option(
205
- "--disable-progress",
206
- is_flag=True,
207
- help="Set this flag to disable progress updates to the console",
370
+ "--max-errors",
371
+ type=int,
372
+ default=BenchmarkGenerativeTextArgs.get_default("max_errors"),
373
+ help="Maximum errors before stopping the benchmark.",
208
374
  )
209
375
  @click.option(
210
- "--display-scheduler-stats",
211
- is_flag=True,
212
- help="Set this flag to display stats for the processes running the benchmarks",
376
+ "--max-error-rate",
377
+ type=float,
378
+ default=BenchmarkGenerativeTextArgs.get_default("max_error_rate"),
379
+ help="Maximum error rate before stopping the benchmark.",
213
380
  )
214
381
  @click.option(
215
- "--disable-console-outputs",
216
- is_flag=True,
217
- help="Set this flag to disable console output",
382
+ "--max-global-error-rate",
383
+ type=float,
384
+ default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
385
+ help="Maximum global error rate across all benchmarks.",
218
386
  )
219
387
  @click.option(
220
- "--output-path",
221
- type=click.Path(),
222
- default=Path.cwd() / "benchmarks.json",
388
+ "--over-saturation",
389
+ "over_saturation",
390
+ callback=cli_tools.parse_json,
391
+ default=None,
223
392
  help=(
224
- "The path to save the output to. If it is a directory, "
225
- "it will save benchmarks.json under it. "
226
- "Otherwise, json, yaml, csv, or html files are supported for output types "
227
- "which will be read from the extension for the file path."
393
+ "Enable over-saturation detection. "
394
+ "Pass a JSON dict with configuration "
395
+ '(e.g., \'{"enabled": true, "min_seconds": 30}\'). '
396
+ "Defaults to None (disabled)."
228
397
  ),
229
398
  )
230
399
  @click.option(
231
- "--output-extras",
400
+ "--detect-saturation",
401
+ "--default-over-saturation",
402
+ "over_saturation",
232
403
  callback=cli_tools.parse_json,
233
- help="A JSON string of extra data to save with the output benchmarks",
234
- )
235
- @click.option(
236
- "--output-sampling",
237
- type=int,
238
- help=(
239
- "The number of samples to save in the output file. "
240
- "If None (default), will save all samples."
241
- ),
242
- default=GenerativeTextScenario.get_default("output_sampling"),
404
+ flag_value='{"enabled": true}',
405
+ help="Enable over-saturation detection with default settings.",
243
406
  )
244
- @click.option(
245
- "--random-seed",
246
- default=GenerativeTextScenario.get_default("random_seed"),
247
- type=int,
248
- help="The random seed to use for benchmarking to ensure reproducibility.",
249
- )
250
- def run(
251
- scenario,
252
- target,
253
- backend_type,
254
- backend_args,
255
- model,
256
- processor,
257
- processor_args,
258
- data,
259
- data_args,
260
- data_sampler,
261
- rate_type,
262
- rate,
263
- max_seconds,
264
- max_requests,
265
- warmup_percent,
266
- cooldown_percent,
267
- disable_progress,
268
- display_scheduler_stats,
269
- disable_console_outputs,
270
- output_path,
271
- output_extras,
272
- output_sampling,
273
- random_seed,
274
- ):
275
- click_ctx = click.get_current_context()
407
+ def run(**kwargs): # noqa: C901
408
+ # Only set CLI args that differ from click defaults
409
+ kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
276
410
 
277
- overrides = cli_tools.set_if_not_default(
278
- click_ctx,
279
- target=target,
280
- backend_type=backend_type,
281
- backend_args=backend_args,
282
- model=model,
283
- processor=processor,
284
- processor_args=processor_args,
285
- data=data,
286
- data_args=data_args,
287
- data_sampler=data_sampler,
288
- rate_type=rate_type,
289
- rate=rate,
290
- max_seconds=max_seconds,
291
- max_requests=max_requests,
292
- warmup_percent=warmup_percent,
293
- cooldown_percent=cooldown_percent,
294
- output_sampling=output_sampling,
295
- random_seed=random_seed,
411
+ # Handle remapping for request params
412
+ request_type = kwargs.pop("request_type", None)
413
+ request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
414
+ if request_type is not None:
415
+ kwargs["data_request_formatter"] = (
416
+ request_type
417
+ if not request_formatter_kwargs
418
+ else {"request_type": request_type, **request_formatter_kwargs}
419
+ )
420
+ elif request_formatter_kwargs is not None:
421
+ kwargs["data_request_formatter"] = request_formatter_kwargs
422
+
423
+ # Handle output path remapping
424
+ if (output_path := kwargs.pop("output_path", None)) is not None:
425
+ if kwargs.get("outputs_dir", None) is not None:
426
+ raise click.BadParameter("Cannot use --output-path with --output-dir.")
427
+ path = Path(output_path)
428
+ if path.is_dir():
429
+ kwargs["output_dir"] = path
430
+ else:
431
+ kwargs["output_dir"] = path.parent
432
+ kwargs["outputs"] = (path.name,)
433
+
434
+ # Handle console options
435
+ disable_console = kwargs.pop("disable_console", False)
436
+ disable_console_interactive = (
437
+ kwargs.pop("disable_console_interactive", False) or disable_console
296
438
  )
439
+ console = Console() if not disable_console else None
440
+ envs = cli_tools.list_set_env()
441
+ if console and envs:
442
+ console.print_update(
443
+ title=(
444
+ "Note: the following environment variables "
445
+ "are set and **may** affect configuration"
446
+ ),
447
+ details=", ".join(envs),
448
+ status="warning",
449
+ )
297
450
 
298
451
  try:
299
- # If a scenario file was specified read from it
300
- if scenario is None:
301
- _scenario = GenerativeTextScenario.model_validate(overrides)
302
- elif isinstance(scenario, Path):
303
- _scenario = GenerativeTextScenario.from_file(scenario, overrides)
304
- else: # Only builtins can make it here; click will catch anything else
305
- _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
306
- except ValidationError as e:
452
+ args = BenchmarkGenerativeTextArgs.create(
453
+ scenario=kwargs.pop("scenario", None), **kwargs
454
+ )
455
+ except ValidationError as err:
307
456
  # Translate pydantic valdation error to click argument error
308
- errs = e.errors(include_url=False, include_context=True, include_input=True)
457
+ errs = err.errors(include_url=False, include_context=True, include_input=True)
309
458
  param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
310
459
  raise click.BadParameter(
311
- errs[0]["msg"], ctx=click_ctx, param_hint=param_name
312
- ) from e
460
+ errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name
461
+ ) from err
313
462
 
463
+ if uvloop is not None:
464
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
314
465
  asyncio.run(
315
- benchmark_with_scenario(
316
- scenario=_scenario,
317
- show_progress=not disable_progress,
318
- show_progress_scheduler_stats=display_scheduler_stats,
319
- output_console=not disable_console_outputs,
320
- output_path=output_path,
321
- output_extras=output_extras,
466
+ benchmark_generative_text(
467
+ args=args,
468
+ progress=(
469
+ GenerativeConsoleBenchmarkerProgress()
470
+ if not disable_console_interactive
471
+ else None
472
+ ),
473
+ console=console,
322
474
  )
323
475
  )
324
476
 
325
477
 
326
- @benchmark.command("from-file", help="Load a saved benchmark report.")
478
+ @benchmark.command(
479
+ "from-file",
480
+ help=(
481
+ "Load a saved benchmark report and optionally re-export to other formats. "
482
+ "PATH: Path to the saved benchmark report file (default: ./benchmarks.json)."
483
+ ),
484
+ )
327
485
  @click.argument(
328
486
  "path",
329
487
  type=click.Path(file_okay=True, dir_okay=False, exists=True),
@@ -331,61 +489,46 @@ def run(
331
489
  )
332
490
  @click.option(
333
491
  "--output-path",
334
- type=click.Path(file_okay=True, dir_okay=True, exists=False),
335
- default=None,
336
- is_flag=False,
337
- flag_value=Path.cwd() / "benchmarks_reexported.json",
492
+ type=click.Path(),
493
+ default=Path.cwd(),
338
494
  help=(
339
- "Allows re-exporting the benchmarks to another format. "
340
- "The path to save the output to. If it is a directory, "
341
- "it will save benchmarks.json under it. "
342
- "Otherwise, json, yaml, or csv files are supported for output types "
343
- "which will be read from the extension for the file path. "
344
- "This input is optional. If the output path flag is not provided, "
345
- "the benchmarks will not be reexported. If the flag is present but "
346
- "no value is specified, it will default to the current directory "
347
- "with the file name `benchmarks_reexported.json`."
495
+ "Directory or file path to save re-exported benchmark results. "
496
+ "If a directory, all output formats will be saved there. "
497
+ "If a file, the matching format will be saved to that file."
348
498
  ),
349
499
  )
350
- def from_file(path, output_path):
351
- reimport_benchmarks_report(path, output_path)
352
-
353
-
354
- def decode_escaped_str(_ctx, _param, value):
355
- """
356
- Click auto adds characters. For example, when using --pad-char "\n",
357
- it parses it as "\\n". This method decodes the string to handle escape
358
- sequences correctly.
359
- """
360
- if value is None:
361
- return None
362
- try:
363
- return codecs.decode(value, "unicode_escape")
364
- except Exception as e:
365
- raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
500
+ @click.option(
501
+ "--output-formats",
502
+ multiple=True,
503
+ type=str,
504
+ default=("console", "json"), # ("console", "json", "html", "csv")
505
+ help="Output formats for benchmark results (e.g., console, json, html, csv).",
506
+ )
507
+ def from_file(path, output_path, output_formats):
508
+ asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
366
509
 
367
510
 
368
511
  @cli.command(
369
- short_help="Prints environment variable settings.",
370
- help=(
371
- "Print out the available configuration settings that can be set "
372
- "through environment variables."
373
- ),
512
+ short_help="Show configuration settings.",
513
+ help="Display environment variables for configuring GuideLLM behavior.",
374
514
  )
375
515
  def config():
376
516
  print_config()
377
517
 
378
518
 
379
- @cli.group(help="General preprocessing tools and utilities.")
519
+ @cli.group(help="Tools for preprocessing datasets for use in benchmarks.")
380
520
  def preprocess():
381
- pass
521
+ """Dataset preprocessing utilities."""
382
522
 
383
523
 
384
524
  @preprocess.command(
525
+ "dataset",
385
526
  help=(
386
- "Convert a dataset to have specific prompt and output token sizes.\n"
387
- "DATA: Path to the input dataset or dataset ID.\n"
388
- "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
527
+ "Process a dataset to have specific prompt and output token sizes. "
528
+ "Supports multiple strategies for handling prompts and optional "
529
+ "Hugging Face Hub upload.\n\n"
530
+ "DATA: Path to the input dataset or dataset ID.\n\n"
531
+ "OUTPUT_PATH: Path to save the processed dataset, including file suffix."
389
532
  ),
390
533
  context_settings={"auto_envvar_prefix": "GUIDELLM"},
391
534
  )
@@ -403,93 +546,95 @@ def preprocess():
403
546
  "--processor",
404
547
  type=str,
405
548
  required=True,
549
+ help="Processor or tokenizer name for calculating token counts.",
550
+ )
551
+ @click.option(
552
+ "--config",
553
+ type=str,
554
+ required=True,
406
555
  help=(
407
- "The processor or tokenizer to use to calculate token counts for statistics "
408
- "and synthetic data generation."
556
+ "PreprocessDatasetConfig as JSON string, key=value pairs, "
557
+ "or file path (.json, .yaml, .yml, .config). "
558
+ "Example: 'prompt_tokens=100,output_tokens=50,prefix_tokens_max=10'"
559
+ ' or \'{"prompt_tokens": 100, "output_tokens": 50, '
560
+ '"prefix_tokens_max": 10}\''
409
561
  ),
410
562
  )
411
563
  @click.option(
412
564
  "--processor-args",
413
565
  default=None,
414
566
  callback=cli_tools.parse_json,
415
- help=(
416
- "A JSON string containing any arguments to pass to the processor constructor "
417
- "as a dict with **kwargs."
418
- ),
567
+ help="JSON string of arguments to pass to the processor constructor.",
419
568
  )
420
569
  @click.option(
421
570
  "--data-args",
422
571
  callback=cli_tools.parse_json,
423
- help=(
424
- "A JSON string containing any arguments to pass to the dataset creation "
425
- "as a dict with **kwargs."
426
- ),
572
+ help="JSON string of arguments to pass to dataset creation.",
573
+ )
574
+ @click.option(
575
+ "--data-column-mapper",
576
+ default=None,
577
+ callback=cli_tools.parse_json,
578
+ help="JSON string of column mappings to apply to the dataset.",
427
579
  )
428
580
  @click.option(
429
581
  "--short-prompt-strategy",
430
582
  type=click.Choice([s.value for s in ShortPromptStrategy]),
431
583
  default=ShortPromptStrategy.IGNORE.value,
432
584
  show_default=True,
433
- help="Strategy to handle prompts shorter than the target length. ",
585
+ help="Strategy for handling prompts shorter than target length.",
434
586
  )
435
587
  @click.option(
436
588
  "--pad-char",
437
589
  type=str,
438
590
  default="",
439
591
  callback=decode_escaped_str,
440
- help="The token to pad short prompts with when using the 'pad' strategy.",
592
+ help="Character to pad short prompts with when using 'pad' strategy.",
441
593
  )
442
594
  @click.option(
443
595
  "--concat-delimiter",
444
596
  type=str,
445
597
  default="",
446
598
  help=(
447
- "The delimiter to use when concatenating prompts that are too short."
448
- " Used when strategy is 'concatenate'."
599
+ "Delimiter for concatenating short prompts (used with 'concatenate' strategy)."
449
600
  ),
450
601
  )
451
602
  @click.option(
452
- "--prompt-tokens",
453
- type=str,
454
- default=None,
455
- help="Prompt tokens config (JSON, YAML file or key=value string)",
456
- )
457
- @click.option(
458
- "--output-tokens",
459
- type=str,
460
- default=None,
461
- help="Output tokens config (JSON, YAML file or key=value string)",
603
+ "--include-prefix-in-token-count",
604
+ is_flag=True,
605
+ default=False,
606
+ help="Include prefix tokens in prompt token count calculation.",
462
607
  )
463
608
  @click.option(
464
609
  "--push-to-hub",
465
610
  is_flag=True,
466
- help="Set this flag to push the converted dataset to the Hugging Face Hub.",
611
+ help="Push the processed dataset to Hugging Face Hub.",
467
612
  )
468
613
  @click.option(
469
614
  "--hub-dataset-id",
470
615
  type=str,
471
616
  default=None,
472
- help="The Hugging Face Hub dataset ID to push to. "
473
- "Required if --push-to-hub is used.",
617
+ help=("Hugging Face Hub dataset ID for upload (required if --push-to-hub is set)."),
474
618
  )
475
619
  @click.option(
476
620
  "--random-seed",
477
621
  type=int,
478
622
  default=42,
479
623
  show_default=True,
480
- help="Random seed for prompt token sampling and output tokens sampling.",
624
+ help="Random seed for reproducible token sampling.",
481
625
  )
482
626
  def dataset(
483
627
  data,
484
628
  output_path,
485
629
  processor,
630
+ config,
486
631
  processor_args,
487
632
  data_args,
633
+ data_column_mapper,
488
634
  short_prompt_strategy,
489
635
  pad_char,
490
636
  concat_delimiter,
491
- prompt_tokens,
492
- output_tokens,
637
+ include_prefix_in_token_count,
493
638
  push_to_hub,
494
639
  hub_dataset_id,
495
640
  random_seed,
@@ -498,18 +643,142 @@ def dataset(
498
643
  data=data,
499
644
  output_path=output_path,
500
645
  processor=processor,
501
- prompt_tokens=prompt_tokens,
502
- output_tokens=output_tokens,
646
+ config=config,
503
647
  processor_args=processor_args,
504
648
  data_args=data_args,
649
+ data_column_mapper=data_column_mapper,
505
650
  short_prompt_strategy=short_prompt_strategy,
506
651
  pad_char=pad_char,
507
652
  concat_delimiter=concat_delimiter,
653
+ include_prefix_in_token_count=include_prefix_in_token_count,
508
654
  push_to_hub=push_to_hub,
509
655
  hub_dataset_id=hub_dataset_id,
510
656
  random_seed=random_seed,
511
657
  )
512
658
 
513
659
 
660
+ @cli.command(
661
+ "mock-server",
662
+ help=(
663
+ "Start a mock OpenAI/vLLM-compatible server for testing. "
664
+ "Simulates model inference with configurable latency and token generation."
665
+ ),
666
+ )
667
+ @click.option(
668
+ "--host",
669
+ default="127.0.0.1",
670
+ help="Host address to bind the server to.",
671
+ )
672
+ @click.option(
673
+ "--port",
674
+ default=8000,
675
+ type=int,
676
+ help="Port number to bind the server to.",
677
+ )
678
+ @click.option(
679
+ "--workers",
680
+ default=1,
681
+ type=int,
682
+ help="Number of worker processes.",
683
+ )
684
+ @click.option(
685
+ "--model",
686
+ default="llama-3.1-8b-instruct",
687
+ help="Name of the model to mock.",
688
+ )
689
+ @click.option(
690
+ "--processor",
691
+ default=None,
692
+ help="Processor or tokenizer to use for requests.",
693
+ )
694
+ @click.option(
695
+ "--request-latency",
696
+ default=3,
697
+ type=float,
698
+ help="Request latency in seconds for non-streaming requests.",
699
+ )
700
+ @click.option(
701
+ "--request-latency-std",
702
+ default=0,
703
+ type=float,
704
+ help="Request latency standard deviation in seconds (normal distribution).",
705
+ )
706
+ @click.option(
707
+ "--ttft-ms",
708
+ default=150,
709
+ type=float,
710
+ help="Time to first token in milliseconds for streaming requests.",
711
+ )
712
+ @click.option(
713
+ "--ttft-ms-std",
714
+ default=0,
715
+ type=float,
716
+ help="Time to first token standard deviation in milliseconds.",
717
+ )
718
+ @click.option(
719
+ "--itl-ms",
720
+ default=10,
721
+ type=float,
722
+ help="Inter-token latency in milliseconds for streaming requests.",
723
+ )
724
+ @click.option(
725
+ "--itl-ms-std",
726
+ default=0,
727
+ type=float,
728
+ help="Inter-token latency standard deviation in milliseconds.",
729
+ )
730
+ @click.option(
731
+ "--output-tokens",
732
+ default=128,
733
+ type=int,
734
+ help="Number of output tokens for streaming requests.",
735
+ )
736
+ @click.option(
737
+ "--output-tokens-std",
738
+ default=0,
739
+ type=float,
740
+ help="Output tokens standard deviation (normal distribution).",
741
+ )
742
+ def mock_server(
743
+ host: str,
744
+ port: int,
745
+ workers: int,
746
+ model: str,
747
+ processor: str | None,
748
+ request_latency: float,
749
+ request_latency_std: float,
750
+ ttft_ms: float,
751
+ ttft_ms_std: float,
752
+ itl_ms: float,
753
+ itl_ms_std: float,
754
+ output_tokens: int,
755
+ output_tokens_std: float,
756
+ ):
757
+ config = MockServerConfig(
758
+ host=host,
759
+ port=port,
760
+ workers=workers,
761
+ model=model,
762
+ processor=processor,
763
+ request_latency=request_latency,
764
+ request_latency_std=request_latency_std,
765
+ ttft_ms=ttft_ms,
766
+ ttft_ms_std=ttft_ms_std,
767
+ itl_ms=itl_ms,
768
+ itl_ms_std=itl_ms_std,
769
+ output_tokens=output_tokens,
770
+ output_tokens_std=output_tokens_std,
771
+ )
772
+
773
+ server = MockServer(config)
774
+ console = Console()
775
+ console.print_update(
776
+ title="GuideLLM mock server starting...",
777
+ details=f"Listening on http://{host}:{port} for model {model}",
778
+ status="success",
779
+ )
780
+ server.run()
781
+
782
+
514
783
  if __name__ == "__main__":
515
784
  cli()