guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a18.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
guidellm/__main__.py CHANGED
@@ -1,47 +1,117 @@
1
+ """
2
+ GuideLLM command-line interface entry point.
3
+
4
+ Primary CLI application providing benchmark execution, dataset preprocessing, and
5
+ mock server functionality for language model evaluation. Organizes commands into
6
+ three main groups: benchmark operations for performance testing, preprocessing
7
+ utilities for data transformation, and mock server capabilities for development
8
+ and testing. Supports multiple backends, output formats, and flexible configuration
9
+ through CLI options and environment variables.
10
+
11
+ Example:
12
+ ::
13
+ # Run a benchmark against a model
14
+ guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
15
+ --profile sweep
16
+
17
+ # Preprocess a dataset
18
+ guidellm preprocess dataset input.json output.json --processor gpt2
19
+
20
+ # Start a mock server for testing
21
+ guidellm mock-server --host 0.0.0.0 --port 8080
22
+ """
23
+
24
+ from __future__ import annotations
25
+
1
26
  import asyncio
2
27
  import codecs
3
28
  from pathlib import Path
4
- from typing import get_args
5
29
 
6
30
  import click
7
31
  from pydantic import ValidationError
8
32
 
9
- from guidellm.backend import BackendType
33
+ try:
34
+ import uvloop
35
+ except ImportError:
36
+ uvloop = None # type: ignore[assignment] # Optional dependency
37
+
38
+ from guidellm.backends import BackendType
10
39
  from guidellm.benchmark import (
40
+ BenchmarkGenerativeTextArgs,
41
+ GenerativeConsoleBenchmarkerProgress,
11
42
  ProfileType,
43
+ benchmark_generative_text,
44
+ get_builtin_scenarios,
12
45
  reimport_benchmarks_report,
13
46
  )
14
- from guidellm.benchmark.entrypoints import benchmark_with_scenario
15
- from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
16
- from guidellm.config import print_config
47
+ from guidellm.mock_server import MockServer, MockServerConfig
17
48
  from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
18
49
  from guidellm.scheduler import StrategyType
19
- from guidellm.utils import DefaultGroupHandler
50
+ from guidellm.schemas import GenerativeRequestType
51
+ from guidellm.settings import print_config
52
+ from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
20
53
  from guidellm.utils import cli as cli_tools
21
54
 
22
- STRATEGY_PROFILE_CHOICES = list(
23
- set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
24
- )
55
+ __all__ = [
56
+ "STRATEGY_PROFILE_CHOICES",
57
+ "benchmark",
58
+ "cli",
59
+ "config",
60
+ "dataset",
61
+ "decode_escaped_str",
62
+ "from_file",
63
+ "mock_server",
64
+ "preprocess",
65
+ "run",
66
+ ]
67
+
68
+ STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
69
+ """Available strategy and profile type choices for benchmark execution."""
70
+
71
+
72
+ def decode_escaped_str(_ctx, _param, value):
73
+ """
74
+ Decode escape sequences in Click option values.
75
+
76
+ Click automatically escapes characters converting sequences like "\\n" to
77
+ "\\\\n". This function decodes these sequences to their intended characters.
78
+
79
+ :param _ctx: Click context (unused)
80
+ :param _param: Click parameter (unused)
81
+ :param value: String value to decode
82
+ :return: Decoded string with proper escape sequences, or None if input is None
83
+ :raises click.BadParameter: When escape sequence decoding fails
84
+ """
85
+ if value is None:
86
+ return None
87
+ try:
88
+ return codecs.decode(value, "unicode_escape")
89
+ except Exception as e:
90
+ raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
25
91
 
26
92
 
27
93
  @click.group()
28
94
  @click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
29
95
  def cli():
30
- pass
96
+ """GuideLLM CLI for benchmarking, preprocessing, and testing language models."""
31
97
 
32
98
 
33
99
  @cli.group(
34
- help="Commands to run a new benchmark or load a prior one.",
100
+ help="Run a benchmark or load a previously saved benchmark report.",
35
101
  cls=DefaultGroupHandler,
36
102
  default="run",
37
103
  )
38
104
  def benchmark():
39
- pass
105
+ """Benchmark commands for performance testing generative models."""
40
106
 
41
107
 
42
108
  @benchmark.command(
43
109
  "run",
44
- help="Run a benchmark against a generative model using the specified arguments.",
110
+ help=(
111
+ "Run a benchmark against a generative model. "
112
+ "Supports multiple backends, data sources, strategies, and output formats. "
113
+ "Configuration can be loaded from a scenario file or specified via options."
114
+ ),
45
115
  context_settings={"auto_envvar_prefix": "GUIDELLM"},
46
116
  )
47
117
  @click.option(
@@ -54,276 +124,308 @@ def benchmark():
54
124
  dir_okay=False,
55
125
  path_type=Path,
56
126
  ),
57
- click.Choice(get_builtin_scenarios()),
127
+ click.Choice(tuple(get_builtin_scenarios().keys())),
58
128
  ),
59
129
  default=None,
60
130
  help=(
61
- "The name of a builtin scenario or path to a config file. "
62
- "Missing values from the config will use defaults. "
63
- "Options specified on the commandline will override the scenario."
131
+ "Builtin scenario name or path to config file. "
132
+ "CLI options override scenario settings."
64
133
  ),
65
134
  )
66
135
  @click.option(
67
136
  "--target",
68
137
  type=str,
69
- help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
138
+ help="Target backend URL (e.g., http://localhost:8000).",
70
139
  )
71
140
  @click.option(
72
- "--backend-type",
73
- type=click.Choice(list(get_args(BackendType))),
141
+ "--data",
142
+ type=str,
143
+ multiple=True,
74
144
  help=(
75
- "The type of backend to use to run requests against. Defaults to 'openai_http'."
76
- f" Supported types: {', '.join(get_args(BackendType))}"
145
+ "HuggingFace dataset ID, path to dataset, path to data file "
146
+ "(csv/json/jsonl/txt), or synthetic data config (json/key=value)."
77
147
  ),
78
- default=GenerativeTextScenario.get_default("backend_type"),
79
148
  )
80
149
  @click.option(
81
- "--backend-args",
82
- callback=cli_tools.parse_json,
83
- default=GenerativeTextScenario.get_default("backend_args"),
150
+ "--profile",
151
+ "--rate-type", # legacy alias
152
+ "profile",
153
+ default=BenchmarkGenerativeTextArgs.get_default("profile"),
154
+ type=click.Choice(STRATEGY_PROFILE_CHOICES),
155
+ help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.",
156
+ )
157
+ @click.option(
158
+ "--rate",
159
+ type=float,
160
+ multiple=True,
161
+ default=BenchmarkGenerativeTextArgs.get_default("rate"),
84
162
  help=(
85
- "A JSON string containing any arguments to pass to the backend as a "
86
- "dict with **kwargs. Headers can be removed by setting their value to "
87
- "null. For example: "
88
- """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
163
+ "Benchmark rate(s) to test. Meaning depends on profile: "
164
+ "sweep=number of benchmarks, concurrent=concurrent requests, "
165
+ "async/constant/poisson=requests per second."
89
166
  ),
90
167
  )
168
+ # Backend configuration
169
+ @click.option(
170
+ "--backend",
171
+ "--backend-type", # legacy alias
172
+ "backend",
173
+ type=click.Choice(list(get_literal_vals(BackendType))),
174
+ default=BenchmarkGenerativeTextArgs.get_default("backend"),
175
+ help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.",
176
+ )
177
+ @click.option(
178
+ "--backend-kwargs",
179
+ "--backend-args", # legacy alias
180
+ "backend_kwargs",
181
+ callback=cli_tools.parse_json,
182
+ default=BenchmarkGenerativeTextArgs.get_default("backend_kwargs"),
183
+ help="JSON string of arguments to pass to the backend.",
184
+ )
91
185
  @click.option(
92
186
  "--model",
93
- default=GenerativeTextScenario.get_default("model"),
187
+ default=BenchmarkGenerativeTextArgs.get_default("model"),
94
188
  type=str,
189
+ help="Model ID to benchmark. If not provided, uses first available model.",
190
+ )
191
+ # Data configuration
192
+ @click.option(
193
+ "--request-type",
194
+ default=BenchmarkGenerativeTextArgs.get_default("data_request_formatter"),
195
+ type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
95
196
  help=(
96
- "The ID of the model to benchmark within the backend. "
97
- "If None provided (default), then it will use the first model available."
197
+ f"Request type to create for each data sample. "
198
+ f"Options: {', '.join(get_literal_vals(GenerativeRequestType))}."
98
199
  ),
99
200
  )
201
+ @click.option(
202
+ "--request-formatter-kwargs",
203
+ default=None,
204
+ callback=cli_tools.parse_json,
205
+ help="JSON string of arguments to pass to the request formatter.",
206
+ )
100
207
  @click.option(
101
208
  "--processor",
102
- default=GenerativeTextScenario.get_default("processor"),
209
+ default=BenchmarkGenerativeTextArgs.get_default("processor"),
103
210
  type=str,
104
211
  help=(
105
- "The processor or tokenizer to use to calculate token counts for statistics "
106
- "and synthetic data generation. If None provided (default), will load "
107
- "using the model arg, if needed."
212
+ "Processor or tokenizer for token count calculations. "
213
+ "If not provided, loads from model."
108
214
  ),
109
215
  )
110
216
  @click.option(
111
217
  "--processor-args",
112
- default=GenerativeTextScenario.get_default("processor_args"),
218
+ default=BenchmarkGenerativeTextArgs.get_default("processor_args"),
113
219
  callback=cli_tools.parse_json,
114
- help=(
115
- "A JSON string containing any arguments to pass to the processor constructor "
116
- "as a dict with **kwargs."
117
- ),
220
+ help="JSON string of arguments to pass to the processor constructor.",
118
221
  )
119
222
  @click.option(
120
- "--data",
121
- type=str,
223
+ "--data-args",
224
+ multiple=True,
225
+ default=BenchmarkGenerativeTextArgs.get_default("data_args"),
226
+ callback=cli_tools.parse_json,
227
+ help="JSON string of arguments to pass to dataset creation.",
228
+ )
229
+ @click.option(
230
+ "--data-samples",
231
+ default=BenchmarkGenerativeTextArgs.get_default("data_samples"),
232
+ type=int,
122
233
  help=(
123
- "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
124
- "a path to a data file csv, json, jsonl, or txt, "
125
- "or a synthetic data config as a json or key=value string."
234
+ "Number of samples from dataset. -1 (default) uses all samples "
235
+ "and dynamically generates more."
126
236
  ),
127
237
  )
128
238
  @click.option(
129
- "--data-args",
130
- default=GenerativeTextScenario.get_default("data_args"),
239
+ "--data-column-mapper",
240
+ default=BenchmarkGenerativeTextArgs.get_default("data_column_mapper"),
131
241
  callback=cli_tools.parse_json,
132
- help=(
133
- "A JSON string containing any arguments to pass to the dataset creation "
134
- "as a dict with **kwargs."
135
- ),
242
+ help="JSON string of column mappings to apply to the dataset.",
136
243
  )
137
244
  @click.option(
138
245
  "--data-sampler",
139
- default=GenerativeTextScenario.get_default("data_sampler"),
140
- type=click.Choice(["random"]),
141
- help=(
142
- "The data sampler type to use. 'random' will add a random shuffle on the data. "
143
- "Defaults to None"
144
- ),
246
+ default=BenchmarkGenerativeTextArgs.get_default("data_sampler"),
247
+ type=click.Choice(["shuffle"]),
248
+ help="Data sampler type.",
145
249
  )
146
250
  @click.option(
147
- "--rate-type",
148
- type=click.Choice(STRATEGY_PROFILE_CHOICES),
149
- help=(
150
- "The type of benchmark to run. "
151
- f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
152
- ),
251
+ "--data-num-workers",
252
+ default=BenchmarkGenerativeTextArgs.get_default("data_num_workers"),
253
+ type=int,
254
+ help="Number of worker processes for data loading.",
153
255
  )
154
256
  @click.option(
155
- "--rate",
156
- default=GenerativeTextScenario.get_default("rate"),
157
- help=(
158
- "The rates to run the benchmark at. "
159
- "Can be a single number or a comma-separated list of numbers. "
160
- "For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
161
- "For rate-type=concurrent, this is the number of concurrent requests. "
162
- "For rate-type=async,constant,poisson, this is the rate requests per second. "
163
- "For rate-type=synchronous,throughput, this must not be set."
164
- ),
257
+ "--dataloader_kwargs",
258
+ default=BenchmarkGenerativeTextArgs.get_default("dataloader_kwargs"),
259
+ callback=cli_tools.parse_json,
260
+ help="JSON string of arguments to pass to the dataloader constructor.",
165
261
  )
166
262
  @click.option(
167
- "--max-seconds",
168
- type=float,
169
- default=GenerativeTextScenario.get_default("max_seconds"),
170
- help=(
171
- "The maximum number of seconds each benchmark can run for. "
172
- "If None, will run until max_requests or the data is exhausted."
173
- ),
263
+ "--random-seed",
264
+ default=BenchmarkGenerativeTextArgs.get_default("random_seed"),
265
+ type=int,
266
+ help="Random seed for reproducibility.",
174
267
  )
268
+ # Output configuration
175
269
  @click.option(
176
- "--max-requests",
177
- type=int,
178
- default=GenerativeTextScenario.get_default("max_requests"),
270
+ "--output-path",
271
+ type=click.Path(),
272
+ default=BenchmarkGenerativeTextArgs.get_default("output_path"),
179
273
  help=(
180
- "The maximum number of requests each benchmark can run for. "
181
- "If None, will run until max_seconds or the data is exhausted."
274
+ "Path to save output files. Can be a directory or file. "
275
+ "If a file, saves that format; mismatched formats save to parent directory."
182
276
  ),
183
277
  )
184
278
  @click.option(
185
- "--warmup-percent",
186
- type=float,
187
- default=GenerativeTextScenario.get_default("warmup_percent"),
188
- help=(
189
- "The percent of the benchmark (based on max-seconds, max-requets, "
190
- "or lenth of dataset) to run as a warmup and not include in the final results. "
191
- "Defaults to None."
192
- ),
279
+ "--output-formats",
280
+ multiple=True,
281
+ type=str,
282
+ default=BenchmarkGenerativeTextArgs.get_default("output_formats"),
283
+ help="Output formats for results (e.g., console, json, html, csv).",
193
284
  )
194
285
  @click.option(
195
- "--cooldown-percent",
196
- type=float,
197
- default=GenerativeTextScenario.get_default("cooldown_percent"),
198
- help=(
199
- "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
200
- "of dataset) to run as a cooldown and not include in the final results. "
201
- "Defaults to None."
202
- ),
286
+ "--disable-console-outputs",
287
+ is_flag=True,
288
+ help="Disable console output.",
203
289
  )
290
+ # Updates configuration
204
291
  @click.option(
205
292
  "--disable-progress",
206
293
  is_flag=True,
207
- help="Set this flag to disable progress updates to the console",
294
+ help="Disable progress updates to the console.",
208
295
  )
209
296
  @click.option(
210
297
  "--display-scheduler-stats",
211
298
  is_flag=True,
212
- help="Set this flag to display stats for the processes running the benchmarks",
299
+ help="Display scheduler process statistics.",
213
300
  )
301
+ # Aggregators configuration
214
302
  @click.option(
215
- "--disable-console-outputs",
216
- is_flag=True,
217
- help="Set this flag to disable console output",
303
+ "--warmup",
304
+ "--warmup-percent", # legacy alias
305
+ "warmup",
306
+ type=float,
307
+ default=BenchmarkGenerativeTextArgs.get_default("warmup"),
308
+ help=(
309
+ "Warmup specification: if in (0,1) = percent, if >=1 = number of "
310
+ "requests/seconds (depends on active constraint)."
311
+ ),
218
312
  )
219
313
  @click.option(
220
- "--output-path",
221
- type=click.Path(),
222
- default=Path.cwd() / "benchmarks.json",
314
+ "--cooldown",
315
+ "--cooldown-percent", # legacy alias
316
+ "cooldown",
317
+ type=float,
318
+ default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
223
319
  help=(
224
- "The path to save the output to. If it is a directory, "
225
- "it will save benchmarks.json under it. "
226
- "Otherwise, json, yaml, csv, or html files are supported for output types "
227
- "which will be read from the extension for the file path."
320
+ "Cooldown specification: if in (0,1) = percent, if >=1 = number of "
321
+ "requests/seconds (depends on active constraint)."
228
322
  ),
229
323
  )
230
324
  @click.option(
231
- "--output-extras",
232
- callback=cli_tools.parse_json,
233
- help="A JSON string of extra data to save with the output benchmarks",
325
+ "--sample-requests",
326
+ "--output-sampling", # legacy alias
327
+ "sample_requests",
328
+ type=int,
329
+ help=(
330
+ "Number of sample requests per status to save. "
331
+ "None (default) saves all, recommended: 20."
332
+ ),
234
333
  )
334
+ # Constraints configuration
235
335
  @click.option(
236
- "--output-sampling",
336
+ "--max-seconds",
337
+ type=float,
338
+ default=BenchmarkGenerativeTextArgs.get_default("max_seconds"),
339
+ help=(
340
+ "Maximum seconds per benchmark. "
341
+ "If None, runs until max_requests or data exhaustion."
342
+ ),
343
+ )
344
+ @click.option(
345
+ "--max-requests",
237
346
  type=int,
347
+ default=BenchmarkGenerativeTextArgs.get_default("max_requests"),
238
348
  help=(
239
- "The number of samples to save in the output file. "
240
- "If None (default), will save all samples."
349
+ "Maximum requests per benchmark. "
350
+ "If None, runs until max_seconds or data exhaustion."
241
351
  ),
242
- default=GenerativeTextScenario.get_default("output_sampling"),
243
352
  )
244
353
  @click.option(
245
- "--random-seed",
246
- default=GenerativeTextScenario.get_default("random_seed"),
354
+ "--max-errors",
247
355
  type=int,
248
- help="The random seed to use for benchmarking to ensure reproducibility.",
249
- )
250
- def run(
251
- scenario,
252
- target,
253
- backend_type,
254
- backend_args,
255
- model,
256
- processor,
257
- processor_args,
258
- data,
259
- data_args,
260
- data_sampler,
261
- rate_type,
262
- rate,
263
- max_seconds,
264
- max_requests,
265
- warmup_percent,
266
- cooldown_percent,
267
- disable_progress,
268
- display_scheduler_stats,
269
- disable_console_outputs,
270
- output_path,
271
- output_extras,
272
- output_sampling,
273
- random_seed,
274
- ):
275
- click_ctx = click.get_current_context()
276
-
277
- overrides = cli_tools.set_if_not_default(
278
- click_ctx,
279
- target=target,
280
- backend_type=backend_type,
281
- backend_args=backend_args,
282
- model=model,
283
- processor=processor,
284
- processor_args=processor_args,
285
- data=data,
286
- data_args=data_args,
287
- data_sampler=data_sampler,
288
- rate_type=rate_type,
289
- rate=rate,
290
- max_seconds=max_seconds,
291
- max_requests=max_requests,
292
- warmup_percent=warmup_percent,
293
- cooldown_percent=cooldown_percent,
294
- output_sampling=output_sampling,
295
- random_seed=random_seed,
356
+ default=BenchmarkGenerativeTextArgs.get_default("max_errors"),
357
+ help="Maximum errors before stopping the benchmark.",
358
+ )
359
+ @click.option(
360
+ "--max-error-rate",
361
+ type=float,
362
+ default=BenchmarkGenerativeTextArgs.get_default("max_error_rate"),
363
+ help="Maximum error rate before stopping the benchmark.",
364
+ )
365
+ @click.option(
366
+ "--max-global-error-rate",
367
+ type=float,
368
+ default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
369
+ help="Maximum global error rate across all benchmarks.",
370
+ )
371
+ def run(**kwargs):
372
+ request_type = kwargs.pop("request_type", None)
373
+ request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
374
+ kwargs["data_request_formatter"] = (
375
+ request_type
376
+ if not request_formatter_kwargs
377
+ else {"request_type": request_type, **request_formatter_kwargs}
378
+ )
379
+ kwargs["data"] = cli_tools.format_list_arg(
380
+ kwargs.get("data"), default=[], simplify_single=False
296
381
  )
382
+ kwargs["data_args"] = cli_tools.format_list_arg(
383
+ kwargs.get("data_args"), default=[], simplify_single=False
384
+ )
385
+ kwargs["rate"] = cli_tools.format_list_arg(
386
+ kwargs.get("rate"), default=None, simplify_single=True
387
+ )
388
+
389
+ disable_console_outputs = kwargs.pop("disable_console_outputs", False)
390
+ display_scheduler_stats = kwargs.pop("display_scheduler_stats", False)
391
+ disable_progress = kwargs.pop("disable_progress", False)
297
392
 
298
393
  try:
299
- # If a scenario file was specified read from it
300
- if scenario is None:
301
- _scenario = GenerativeTextScenario.model_validate(overrides)
302
- elif isinstance(scenario, Path):
303
- _scenario = GenerativeTextScenario.from_file(scenario, overrides)
304
- else: # Only builtins can make it here; click will catch anything else
305
- _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
306
- except ValidationError as e:
394
+ args = BenchmarkGenerativeTextArgs.create(
395
+ scenario=kwargs.pop("scenario", None), **kwargs
396
+ )
397
+ except ValidationError as err:
307
398
  # Translate pydantic valdation error to click argument error
308
- errs = e.errors(include_url=False, include_context=True, include_input=True)
399
+ errs = err.errors(include_url=False, include_context=True, include_input=True)
309
400
  param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
310
401
  raise click.BadParameter(
311
- errs[0]["msg"], ctx=click_ctx, param_hint=param_name
312
- ) from e
402
+ errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name
403
+ ) from err
313
404
 
405
+ if uvloop is not None:
406
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
314
407
  asyncio.run(
315
- benchmark_with_scenario(
316
- scenario=_scenario,
317
- show_progress=not disable_progress,
318
- show_progress_scheduler_stats=display_scheduler_stats,
319
- output_console=not disable_console_outputs,
320
- output_path=output_path,
321
- output_extras=output_extras,
408
+ benchmark_generative_text(
409
+ args=args,
410
+ progress=(
411
+ GenerativeConsoleBenchmarkerProgress(
412
+ display_scheduler_stats=display_scheduler_stats
413
+ )
414
+ if not disable_progress
415
+ else None
416
+ ),
417
+ console=Console() if not disable_console_outputs else None,
322
418
  )
323
419
  )
324
420
 
325
421
 
326
- @benchmark.command("from-file", help="Load a saved benchmark report.")
422
+ @benchmark.command(
423
+ "from-file",
424
+ help=(
425
+ "Load a saved benchmark report and optionally re-export to other formats. "
426
+ "PATH: Path to the saved benchmark report file (default: ./benchmarks.json)."
427
+ ),
428
+ )
327
429
  @click.argument(
328
430
  "path",
329
431
  type=click.Path(file_okay=True, dir_okay=False, exists=True),
@@ -331,61 +433,46 @@ def run(
331
433
  )
332
434
  @click.option(
333
435
  "--output-path",
334
- type=click.Path(file_okay=True, dir_okay=True, exists=False),
335
- default=None,
336
- is_flag=False,
337
- flag_value=Path.cwd() / "benchmarks_reexported.json",
436
+ type=click.Path(),
437
+ default=Path.cwd(),
338
438
  help=(
339
- "Allows re-exporting the benchmarks to another format. "
340
- "The path to save the output to. If it is a directory, "
341
- "it will save benchmarks.json under it. "
342
- "Otherwise, json, yaml, or csv files are supported for output types "
343
- "which will be read from the extension for the file path. "
344
- "This input is optional. If the output path flag is not provided, "
345
- "the benchmarks will not be reexported. If the flag is present but "
346
- "no value is specified, it will default to the current directory "
347
- "with the file name `benchmarks_reexported.json`."
439
+ "Directory or file path to save re-exported benchmark results. "
440
+ "If a directory, all output formats will be saved there. "
441
+ "If a file, the matching format will be saved to that file."
348
442
  ),
349
443
  )
350
- def from_file(path, output_path):
351
- reimport_benchmarks_report(path, output_path)
352
-
353
-
354
- def decode_escaped_str(_ctx, _param, value):
355
- """
356
- Click auto adds characters. For example, when using --pad-char "\n",
357
- it parses it as "\\n". This method decodes the string to handle escape
358
- sequences correctly.
359
- """
360
- if value is None:
361
- return None
362
- try:
363
- return codecs.decode(value, "unicode_escape")
364
- except Exception as e:
365
- raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
444
+ @click.option(
445
+ "--output-formats",
446
+ multiple=True,
447
+ type=str,
448
+ default=("console", "json"), # ("console", "json", "html", "csv")
449
+ help="Output formats for benchmark results (e.g., console, json, html, csv).",
450
+ )
451
+ def from_file(path, output_path, output_formats):
452
+ asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
366
453
 
367
454
 
368
455
  @cli.command(
369
- short_help="Prints environment variable settings.",
370
- help=(
371
- "Print out the available configuration settings that can be set "
372
- "through environment variables."
373
- ),
456
+ short_help="Show configuration settings.",
457
+ help="Display environment variables for configuring GuideLLM behavior.",
374
458
  )
375
459
  def config():
376
460
  print_config()
377
461
 
378
462
 
379
- @cli.group(help="General preprocessing tools and utilities.")
463
+ @cli.group(help="Tools for preprocessing datasets for use in benchmarks.")
380
464
  def preprocess():
381
- pass
465
+ """Dataset preprocessing utilities."""
382
466
 
383
467
 
384
468
  @preprocess.command(
469
+ "dataset",
385
470
  help=(
386
- "Convert a dataset to have specific prompt and output token sizes.\n"
387
- "DATA: Path to the input dataset or dataset ID.\n"
388
- "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
471
+ "Process a dataset to have specific prompt and output token sizes. "
472
+ "Supports multiple strategies for handling prompts and optional "
473
+ "Hugging Face Hub upload.\n\n"
474
+ "DATA: Path to the input dataset or dataset ID.\n\n"
475
+ "OUTPUT_PATH: Path to save the processed dataset, including file suffix."
389
476
  ),
390
477
  context_settings={"auto_envvar_prefix": "GUIDELLM"},
391
478
  )
@@ -403,81 +490,70 @@ def preprocess():
403
490
  "--processor",
404
491
  type=str,
405
492
  required=True,
406
- help=(
407
- "The processor or tokenizer to use to calculate token counts for statistics "
408
- "and synthetic data generation."
409
- ),
493
+ help="Processor or tokenizer name for calculating token counts.",
410
494
  )
411
495
  @click.option(
412
496
  "--processor-args",
413
497
  default=None,
414
498
  callback=cli_tools.parse_json,
415
- help=(
416
- "A JSON string containing any arguments to pass to the processor constructor "
417
- "as a dict with **kwargs."
418
- ),
499
+ help="JSON string of arguments to pass to the processor constructor.",
419
500
  )
420
501
  @click.option(
421
502
  "--data-args",
422
503
  callback=cli_tools.parse_json,
423
- help=(
424
- "A JSON string containing any arguments to pass to the dataset creation "
425
- "as a dict with **kwargs."
426
- ),
504
+ help="JSON string of arguments to pass to dataset creation.",
427
505
  )
428
506
  @click.option(
429
507
  "--short-prompt-strategy",
430
508
  type=click.Choice([s.value for s in ShortPromptStrategy]),
431
509
  default=ShortPromptStrategy.IGNORE.value,
432
510
  show_default=True,
433
- help="Strategy to handle prompts shorter than the target length. ",
511
+ help="Strategy for handling prompts shorter than target length.",
434
512
  )
435
513
  @click.option(
436
514
  "--pad-char",
437
515
  type=str,
438
516
  default="",
439
517
  callback=decode_escaped_str,
440
- help="The token to pad short prompts with when using the 'pad' strategy.",
518
+ help="Character to pad short prompts with when using 'pad' strategy.",
441
519
  )
442
520
  @click.option(
443
521
  "--concat-delimiter",
444
522
  type=str,
445
523
  default="",
446
524
  help=(
447
- "The delimiter to use when concatenating prompts that are too short."
448
- " Used when strategy is 'concatenate'."
525
+ "Delimiter for concatenating short prompts (used with 'concatenate' strategy)."
449
526
  ),
450
527
  )
451
528
  @click.option(
452
529
  "--prompt-tokens",
453
530
  type=str,
454
531
  default=None,
455
- help="Prompt tokens config (JSON, YAML file or key=value string)",
532
+ help="Prompt tokens configuration (JSON, YAML file, or key=value string).",
456
533
  )
457
534
  @click.option(
458
535
  "--output-tokens",
459
536
  type=str,
460
537
  default=None,
461
- help="Output tokens config (JSON, YAML file or key=value string)",
538
+ help="Output tokens configuration (JSON, YAML file, or key=value string).",
462
539
  )
463
540
  @click.option(
464
541
  "--push-to-hub",
465
542
  is_flag=True,
466
- help="Set this flag to push the converted dataset to the Hugging Face Hub.",
543
+ help="Push the processed dataset to Hugging Face Hub.",
467
544
  )
468
545
  @click.option(
469
546
  "--hub-dataset-id",
470
547
  type=str,
471
548
  default=None,
472
- help="The Hugging Face Hub dataset ID to push to. "
473
- "Required if --push-to-hub is used.",
549
+ help=("Hugging Face Hub dataset ID for upload (required if --push-to-hub is set)."),
474
550
  )
475
551
  @click.option(
476
552
  "--random-seed",
477
553
  type=int,
478
554
  default=42,
479
555
  show_default=True,
480
- help="Random seed for prompt token sampling and output tokens sampling.",
556
+ help="Random seed for reproducible token sampling.",
481
557
  )
482
558
  def dataset(
483
559
  data,
@@ -511,5 +587,128 @@ def dataset(
511
587
  )
512
588
 
513
589
 
590
+ @cli.command(
591
+ "mock-server",
592
+ help=(
593
+ "Start a mock OpenAI/vLLM-compatible server for testing. "
594
+ "Simulates model inference with configurable latency and token generation."
595
+ ),
596
+ )
597
+ @click.option(
598
+ "--host",
599
+ default="127.0.0.1",
600
+ help="Host address to bind the server to.",
601
+ )
602
+ @click.option(
603
+ "--port",
604
+ default=8000,
605
+ type=int,
606
+ help="Port number to bind the server to.",
607
+ )
608
+ @click.option(
609
+ "--workers",
610
+ default=1,
611
+ type=int,
612
+ help="Number of worker processes.",
613
+ )
614
+ @click.option(
615
+ "--model",
616
+ default="llama-3.1-8b-instruct",
617
+ help="Name of the model to mock.",
618
+ )
619
+ @click.option(
620
+ "--processor",
621
+ default=None,
622
+ help="Processor or tokenizer to use for requests.",
623
+ )
624
+ @click.option(
625
+ "--request-latency",
626
+ default=3,
627
+ type=float,
628
+ help="Request latency in seconds for non-streaming requests.",
629
+ )
630
+ @click.option(
631
+ "--request-latency-std",
632
+ default=0,
633
+ type=float,
634
+ help="Request latency standard deviation in seconds (normal distribution).",
635
+ )
636
+ @click.option(
637
+ "--ttft-ms",
638
+ default=150,
639
+ type=float,
640
+ help="Time to first token in milliseconds for streaming requests.",
641
+ )
642
+ @click.option(
643
+ "--ttft-ms-std",
644
+ default=0,
645
+ type=float,
646
+ help="Time to first token standard deviation in milliseconds.",
647
+ )
648
+ @click.option(
649
+ "--itl-ms",
650
+ default=10,
651
+ type=float,
652
+ help="Inter-token latency in milliseconds for streaming requests.",
653
+ )
654
+ @click.option(
655
+ "--itl-ms-std",
656
+ default=0,
657
+ type=float,
658
+ help="Inter-token latency standard deviation in milliseconds.",
659
+ )
660
+ @click.option(
661
+ "--output-tokens",
662
+ default=128,
663
+ type=int,
664
+ help="Number of output tokens for streaming requests.",
665
+ )
666
+ @click.option(
667
+ "--output-tokens-std",
668
+ default=0,
669
+ type=float,
670
+ help="Output tokens standard deviation (normal distribution).",
671
+ )
672
+ def mock_server(
673
+ host: str,
674
+ port: int,
675
+ workers: int,
676
+ model: str,
677
+ processor: str | None,
678
+ request_latency: float,
679
+ request_latency_std: float,
680
+ ttft_ms: float,
681
+ ttft_ms_std: float,
682
+ itl_ms: float,
683
+ itl_ms_std: float,
684
+ output_tokens: int,
685
+ output_tokens_std: float,
686
+ ):
687
+ config = MockServerConfig(
688
+ host=host,
689
+ port=port,
690
+ workers=workers,
691
+ model=model,
692
+ processor=processor,
693
+ request_latency=request_latency,
694
+ request_latency_std=request_latency_std,
695
+ ttft_ms=ttft_ms,
696
+ ttft_ms_std=ttft_ms_std,
697
+ itl_ms=itl_ms,
698
+ itl_ms_std=itl_ms_std,
699
+ output_tokens=output_tokens,
700
+ output_tokens_std=output_tokens_std,
701
+ )
702
+
703
+ server = MockServer(config)
704
+ console = Console()
705
+ console.print_update(
706
+ title="GuideLLM mock server starting...",
707
+ details=f"Listening on http://{host}:{port} for model {model}",
708
+ status="success",
709
+ )
710
+ server.run()
711
+
712
+
514
713
  if __name__ == "__main__":
515
714
  cli()