guidellm 0.3.0rc20250507__py3-none-any.whl → 0.4.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show
  1. guidellm/__init__.py +8 -13
  2. guidellm/__main__.py +290 -69
  3. guidellm/backend/__init__.py +6 -6
  4. guidellm/backend/backend.py +25 -4
  5. guidellm/backend/openai.py +147 -27
  6. guidellm/backend/response.py +6 -2
  7. guidellm/benchmark/__init__.py +16 -22
  8. guidellm/benchmark/aggregator.py +3 -3
  9. guidellm/benchmark/benchmark.py +11 -12
  10. guidellm/benchmark/benchmarker.py +2 -2
  11. guidellm/benchmark/entrypoints.py +34 -10
  12. guidellm/benchmark/output.py +57 -5
  13. guidellm/benchmark/profile.py +4 -4
  14. guidellm/benchmark/progress.py +2 -2
  15. guidellm/benchmark/scenario.py +104 -0
  16. guidellm/benchmark/scenarios/__init__.py +0 -0
  17. guidellm/config.py +28 -7
  18. guidellm/dataset/__init__.py +4 -4
  19. guidellm/dataset/creator.py +1 -1
  20. guidellm/dataset/synthetic.py +36 -11
  21. guidellm/logger.py +8 -4
  22. guidellm/objects/__init__.py +2 -2
  23. guidellm/objects/pydantic.py +30 -1
  24. guidellm/objects/statistics.py +20 -14
  25. guidellm/preprocess/__init__.py +3 -0
  26. guidellm/preprocess/dataset.py +374 -0
  27. guidellm/presentation/__init__.py +28 -0
  28. guidellm/presentation/builder.py +27 -0
  29. guidellm/presentation/data_models.py +232 -0
  30. guidellm/presentation/injector.py +66 -0
  31. guidellm/request/__init__.py +6 -3
  32. guidellm/request/loader.py +5 -5
  33. guidellm/{scheduler → request}/types.py +4 -1
  34. guidellm/scheduler/__init__.py +10 -15
  35. guidellm/scheduler/queues.py +25 -0
  36. guidellm/scheduler/result.py +21 -3
  37. guidellm/scheduler/scheduler.py +68 -60
  38. guidellm/scheduler/strategy.py +26 -24
  39. guidellm/scheduler/worker.py +64 -103
  40. guidellm/utils/__init__.py +17 -5
  41. guidellm/utils/cli.py +62 -0
  42. guidellm/utils/default_group.py +105 -0
  43. guidellm/utils/dict.py +23 -0
  44. guidellm/utils/hf_datasets.py +36 -0
  45. guidellm/utils/random.py +1 -1
  46. guidellm/utils/text.py +12 -5
  47. guidellm/version.py +6 -0
  48. guidellm-0.4.0a2.dist-info/METADATA +317 -0
  49. guidellm-0.4.0a2.dist-info/RECORD +62 -0
  50. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/WHEEL +1 -1
  51. guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
  52. guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
  53. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/entry_points.txt +0 -0
  54. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/licenses/LICENSE +0 -0
  55. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/top_level.txt +0 -0
guidellm/__init__.py CHANGED
@@ -3,15 +3,12 @@ Guidellm is a package that provides an easy and intuitive interface for
3
3
  evaluating and benchmarking large language models (LLMs).
4
4
  """
5
5
 
6
- # flake8: noqa
7
-
8
- import os
9
- import logging
10
6
  import contextlib
11
-
7
+ import logging
8
+ import os
12
9
 
13
10
  with (
14
- open(os.devnull, "w") as devnull,
11
+ open(os.devnull, "w") as devnull, # noqa: PTH123
15
12
  contextlib.redirect_stderr(devnull),
16
13
  contextlib.redirect_stdout(devnull),
17
14
  ):
@@ -24,28 +21,26 @@ with (
24
21
  logging.getLogger("transformers").setLevel(logging.ERROR)
25
22
 
26
23
  from .config import (
27
- settings,
28
24
  DatasetSettings,
29
25
  Environment,
30
26
  LoggingSettings,
31
27
  OpenAISettings,
32
- print_config,
33
28
  Settings,
29
+ print_config,
34
30
  reload_settings,
31
+ settings,
35
32
  )
36
33
  from .logger import configure_logger, logger
37
34
 
38
35
  __all__ = [
39
- # Config
40
36
  "DatasetSettings",
41
37
  "Environment",
42
38
  "LoggingSettings",
43
39
  "OpenAISettings",
44
- "print_config",
45
40
  "Settings",
41
+ "configure_logger",
42
+ "logger",
43
+ "print_config",
46
44
  "reload_settings",
47
45
  "settings",
48
- # Logger
49
- "logger",
50
- "configure_logger",
51
46
  ]
guidellm/__main__.py CHANGED
@@ -1,54 +1,70 @@
1
1
  import asyncio
2
- import json
2
+ import codecs
3
3
  from pathlib import Path
4
4
  from typing import get_args
5
5
 
6
6
  import click
7
+ from pydantic import ValidationError
7
8
 
8
9
  from guidellm.backend import BackendType
9
- from guidellm.benchmark import ProfileType, benchmark_generative_text
10
+ from guidellm.benchmark import (
11
+ ProfileType,
12
+ reimport_benchmarks_report,
13
+ )
14
+ from guidellm.benchmark.entrypoints import benchmark_with_scenario
15
+ from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
10
16
  from guidellm.config import print_config
17
+ from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
11
18
  from guidellm.scheduler import StrategyType
19
+ from guidellm.utils import DefaultGroupHandler
20
+ from guidellm.utils import cli as cli_tools
12
21
 
13
- STRATEGY_PROFILE_CHOICES = set(
14
- list(get_args(ProfileType)) + list(get_args(StrategyType))
22
+ STRATEGY_PROFILE_CHOICES = list(
23
+ set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
15
24
  )
16
25
 
17
26
 
18
- def parse_json(ctx, param, value): # noqa: ARG001
19
- if value is None:
20
- return None
21
- try:
22
- return json.loads(value)
23
- except json.JSONDecodeError as err:
24
- raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
25
-
26
-
27
- def parse_number_str(ctx, param, value): # noqa: ARG001
28
- if value is None:
29
- return None
30
-
31
- values = value.split(",") if "," in value else [value]
32
-
33
- try:
34
- return [int(val) if val.isdigit() else float(val) for val in values]
35
- except ValueError as err:
36
- raise click.BadParameter(
37
- f"{param.name} must be a number or comma-separated list of numbers."
38
- ) from err
39
-
40
-
41
27
  @click.group()
28
+ @click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
42
29
  def cli():
43
30
  pass
44
31
 
45
32
 
46
- @cli.command(
47
- help="Run a benchmark against a generative model using the specified arguments."
33
+ @cli.group(
34
+ help="Commands to run a new benchmark or load a prior one.",
35
+ cls=DefaultGroupHandler,
36
+ default="run",
37
+ )
38
+ def benchmark():
39
+ pass
40
+
41
+
42
+ @benchmark.command(
43
+ "run",
44
+ help="Run a benchmark against a generative model using the specified arguments.",
45
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
46
+ )
47
+ @click.option(
48
+ "--scenario",
49
+ type=cli_tools.Union(
50
+ click.Path(
51
+ exists=True,
52
+ readable=True,
53
+ file_okay=True,
54
+ dir_okay=False,
55
+ path_type=Path,
56
+ ),
57
+ click.Choice(get_builtin_scenarios()),
58
+ ),
59
+ default=None,
60
+ help=(
61
+ "The name of a builtin scenario or path to a config file. "
62
+ "Missing values from the config will use defaults. "
63
+ "Options specified on the commandline will override the scenario."
64
+ ),
48
65
  )
49
66
  @click.option(
50
67
  "--target",
51
- required=True,
52
68
  type=str,
53
69
  help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
54
70
  )
@@ -59,20 +75,22 @@ def cli():
59
75
  "The type of backend to use to run requests against. Defaults to 'openai_http'."
60
76
  f" Supported types: {', '.join(get_args(BackendType))}"
61
77
  ),
62
- default="openai_http",
78
+ default=GenerativeTextScenario.get_default("backend_type"),
63
79
  )
64
80
  @click.option(
65
81
  "--backend-args",
66
- callback=parse_json,
67
- default=None,
82
+ callback=cli_tools.parse_json,
83
+ default=GenerativeTextScenario.get_default("backend_args"),
68
84
  help=(
69
85
  "A JSON string containing any arguments to pass to the backend as a "
70
- "dict with **kwargs."
86
+ "dict with **kwargs. Headers can be removed by setting their value to "
87
+ "null. For example: "
88
+ """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
71
89
  ),
72
90
  )
73
91
  @click.option(
74
92
  "--model",
75
- default=None,
93
+ default=GenerativeTextScenario.get_default("model"),
76
94
  type=str,
77
95
  help=(
78
96
  "The ID of the model to benchmark within the backend. "
@@ -81,7 +99,7 @@ def cli():
81
99
  )
82
100
  @click.option(
83
101
  "--processor",
84
- default=None,
102
+ default=GenerativeTextScenario.get_default("processor"),
85
103
  type=str,
86
104
  help=(
87
105
  "The processor or tokenizer to use to calculate token counts for statistics "
@@ -91,8 +109,8 @@ def cli():
91
109
  )
92
110
  @click.option(
93
111
  "--processor-args",
94
- default=None,
95
- callback=parse_json,
112
+ default=GenerativeTextScenario.get_default("processor_args"),
113
+ callback=cli_tools.parse_json,
96
114
  help=(
97
115
  "A JSON string containing any arguments to pass to the processor constructor "
98
116
  "as a dict with **kwargs."
@@ -100,7 +118,6 @@ def cli():
100
118
  )
101
119
  @click.option(
102
120
  "--data",
103
- required=True,
104
121
  type=str,
105
122
  help=(
106
123
  "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
@@ -110,7 +127,8 @@ def cli():
110
127
  )
111
128
  @click.option(
112
129
  "--data-args",
113
- callback=parse_json,
130
+ default=GenerativeTextScenario.get_default("data_args"),
131
+ callback=cli_tools.parse_json,
114
132
  help=(
115
133
  "A JSON string containing any arguments to pass to the dataset creation "
116
134
  "as a dict with **kwargs."
@@ -118,7 +136,7 @@ def cli():
118
136
  )
119
137
  @click.option(
120
138
  "--data-sampler",
121
- default=None,
139
+ default=GenerativeTextScenario.get_default("data_sampler"),
122
140
  type=click.Choice(["random"]),
123
141
  help=(
124
142
  "The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -127,7 +145,6 @@ def cli():
127
145
  )
128
146
  @click.option(
129
147
  "--rate-type",
130
- required=True,
131
148
  type=click.Choice(STRATEGY_PROFILE_CHOICES),
132
149
  help=(
133
150
  "The type of benchmark to run. "
@@ -136,8 +153,7 @@ def cli():
136
153
  )
137
154
  @click.option(
138
155
  "--rate",
139
- default=None,
140
- callback=parse_number_str,
156
+ default=GenerativeTextScenario.get_default("rate"),
141
157
  help=(
142
158
  "The rates to run the benchmark at. "
143
159
  "Can be a single number or a comma-separated list of numbers. "
@@ -150,6 +166,7 @@ def cli():
150
166
  @click.option(
151
167
  "--max-seconds",
152
168
  type=float,
169
+ default=GenerativeTextScenario.get_default("max_seconds"),
153
170
  help=(
154
171
  "The maximum number of seconds each benchmark can run for. "
155
172
  "If None, will run until max_requests or the data is exhausted."
@@ -158,6 +175,7 @@ def cli():
158
175
  @click.option(
159
176
  "--max-requests",
160
177
  type=int,
178
+ default=GenerativeTextScenario.get_default("max_requests"),
161
179
  help=(
162
180
  "The maximum number of requests each benchmark can run for. "
163
181
  "If None, will run until max_seconds or the data is exhausted."
@@ -166,7 +184,7 @@ def cli():
166
184
  @click.option(
167
185
  "--warmup-percent",
168
186
  type=float,
169
- default=None,
187
+ default=GenerativeTextScenario.get_default("warmup_percent"),
170
188
  help=(
171
189
  "The percent of the benchmark (based on max-seconds, max-requets, "
172
190
  "or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -176,6 +194,7 @@ def cli():
176
194
  @click.option(
177
195
  "--cooldown-percent",
178
196
  type=float,
197
+ default=GenerativeTextScenario.get_default("cooldown_percent"),
179
198
  help=(
180
199
  "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
181
200
  "of dataset) to run as a cooldown and not include in the final results. "
@@ -204,13 +223,13 @@ def cli():
204
223
  help=(
205
224
  "The path to save the output to. If it is a directory, "
206
225
  "it will save benchmarks.json under it. "
207
- "Otherwise, json, yaml, or csv files are supported for output types "
226
+ "Otherwise, json, yaml, csv, or html files are supported for output types "
208
227
  "which will be read from the extension for the file path."
209
228
  ),
210
229
  )
211
230
  @click.option(
212
231
  "--output-extras",
213
- callback=parse_json,
232
+ callback=cli_tools.parse_json,
214
233
  help="A JSON string of extra data to save with the output benchmarks",
215
234
  )
216
235
  @click.option(
@@ -220,15 +239,16 @@ def cli():
220
239
  "The number of samples to save in the output file. "
221
240
  "If None (default), will save all samples."
222
241
  ),
223
- default=None,
242
+ default=GenerativeTextScenario.get_default("output_sampling"),
224
243
  )
225
244
  @click.option(
226
245
  "--random-seed",
227
- default=42,
246
+ default=GenerativeTextScenario.get_default("random_seed"),
228
247
  type=int,
229
248
  help="The random seed to use for benchmarking to ensure reproducibility.",
230
249
  )
231
- def benchmark(
250
+ def run(
251
+ scenario,
232
252
  target,
233
253
  backend_type,
234
254
  backend_args,
@@ -252,43 +272,244 @@ def benchmark(
252
272
  output_sampling,
253
273
  random_seed,
254
274
  ):
275
+ click_ctx = click.get_current_context()
276
+
277
+ overrides = cli_tools.set_if_not_default(
278
+ click_ctx,
279
+ target=target,
280
+ backend_type=backend_type,
281
+ backend_args=backend_args,
282
+ model=model,
283
+ processor=processor,
284
+ processor_args=processor_args,
285
+ data=data,
286
+ data_args=data_args,
287
+ data_sampler=data_sampler,
288
+ rate_type=rate_type,
289
+ rate=rate,
290
+ max_seconds=max_seconds,
291
+ max_requests=max_requests,
292
+ warmup_percent=warmup_percent,
293
+ cooldown_percent=cooldown_percent,
294
+ output_sampling=output_sampling,
295
+ random_seed=random_seed,
296
+ )
297
+
298
+ try:
299
+ # If a scenario file was specified read from it
300
+ if scenario is None:
301
+ _scenario = GenerativeTextScenario.model_validate(overrides)
302
+ elif isinstance(scenario, Path):
303
+ _scenario = GenerativeTextScenario.from_file(scenario, overrides)
304
+ else: # Only builtins can make it here; click will catch anything else
305
+ _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
306
+ except ValidationError as e:
307
+ # Translate pydantic valdation error to click argument error
308
+ errs = e.errors(include_url=False, include_context=True, include_input=True)
309
+ param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
310
+ raise click.BadParameter(
311
+ errs[0]["msg"], ctx=click_ctx, param_hint=param_name
312
+ ) from e
313
+
255
314
  asyncio.run(
256
- benchmark_generative_text(
257
- target=target,
258
- backend_type=backend_type,
259
- backend_args=backend_args,
260
- model=model,
261
- processor=processor,
262
- processor_args=processor_args,
263
- data=data,
264
- data_args=data_args,
265
- data_sampler=data_sampler,
266
- rate_type=rate_type,
267
- rate=rate,
268
- max_seconds=max_seconds,
269
- max_requests=max_requests,
270
- warmup_percent=warmup_percent,
271
- cooldown_percent=cooldown_percent,
315
+ benchmark_with_scenario(
316
+ scenario=_scenario,
272
317
  show_progress=not disable_progress,
273
318
  show_progress_scheduler_stats=display_scheduler_stats,
274
319
  output_console=not disable_console_outputs,
275
320
  output_path=output_path,
276
321
  output_extras=output_extras,
277
- output_sampling=output_sampling,
278
- random_seed=random_seed,
279
322
  )
280
323
  )
281
324
 
282
325
 
326
+ @benchmark.command("from-file", help="Load a saved benchmark report.")
327
+ @click.argument(
328
+ "path",
329
+ type=click.Path(file_okay=True, dir_okay=False, exists=True),
330
+ default=Path.cwd() / "benchmarks.json",
331
+ )
332
+ @click.option(
333
+ "--output-path",
334
+ type=click.Path(file_okay=True, dir_okay=True, exists=False),
335
+ default=None,
336
+ is_flag=False,
337
+ flag_value=Path.cwd() / "benchmarks_reexported.json",
338
+ help=(
339
+ "Allows re-exporting the benchmarks to another format. "
340
+ "The path to save the output to. If it is a directory, "
341
+ "it will save benchmarks.json under it. "
342
+ "Otherwise, json, yaml, or csv files are supported for output types "
343
+ "which will be read from the extension for the file path. "
344
+ "This input is optional. If the output path flag is not provided, "
345
+ "the benchmarks will not be reexported. If the flag is present but "
346
+ "no value is specified, it will default to the current directory "
347
+ "with the file name `benchmarks_reexported.json`."
348
+ ),
349
+ )
350
+ def from_file(path, output_path):
351
+ reimport_benchmarks_report(path, output_path)
352
+
353
+
354
+ def decode_escaped_str(_ctx, _param, value):
355
+ """
356
+ Click auto adds characters. For example, when using --pad-char "\n",
357
+ it parses it as "\\n". This method decodes the string to handle escape
358
+ sequences correctly.
359
+ """
360
+ if value is None:
361
+ return None
362
+ try:
363
+ return codecs.decode(value, "unicode_escape")
364
+ except Exception as e:
365
+ raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
366
+
367
+
283
368
  @cli.command(
369
+ short_help="Prints environment variable settings.",
284
370
  help=(
285
371
  "Print out the available configuration settings that can be set "
286
372
  "through environment variables."
287
- )
373
+ ),
288
374
  )
289
375
  def config():
290
376
  print_config()
291
377
 
292
378
 
379
+ @cli.group(help="General preprocessing tools and utilities.")
380
+ def preprocess():
381
+ pass
382
+
383
+
384
+ @preprocess.command(
385
+ help=(
386
+ "Convert a dataset to have specific prompt and output token sizes.\n"
387
+ "DATA: Path to the input dataset or dataset ID.\n"
388
+ "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
389
+ ),
390
+ context_settings={"auto_envvar_prefix": "GUIDELLM"},
391
+ )
392
+ @click.argument(
393
+ "data",
394
+ type=str,
395
+ required=True,
396
+ )
397
+ @click.argument(
398
+ "output_path",
399
+ type=click.Path(file_okay=True, dir_okay=False, writable=True, resolve_path=True),
400
+ required=True,
401
+ )
402
+ @click.option(
403
+ "--processor",
404
+ type=str,
405
+ required=True,
406
+ help=(
407
+ "The processor or tokenizer to use to calculate token counts for statistics "
408
+ "and synthetic data generation."
409
+ ),
410
+ )
411
+ @click.option(
412
+ "--processor-args",
413
+ default=None,
414
+ callback=cli_tools.parse_json,
415
+ help=(
416
+ "A JSON string containing any arguments to pass to the processor constructor "
417
+ "as a dict with **kwargs."
418
+ ),
419
+ )
420
+ @click.option(
421
+ "--data-args",
422
+ callback=cli_tools.parse_json,
423
+ help=(
424
+ "A JSON string containing any arguments to pass to the dataset creation "
425
+ "as a dict with **kwargs."
426
+ ),
427
+ )
428
+ @click.option(
429
+ "--short-prompt-strategy",
430
+ type=click.Choice([s.value for s in ShortPromptStrategy]),
431
+ default=ShortPromptStrategy.IGNORE.value,
432
+ show_default=True,
433
+ help="Strategy to handle prompts shorter than the target length. ",
434
+ )
435
+ @click.option(
436
+ "--pad-char",
437
+ type=str,
438
+ default="",
439
+ callback=decode_escaped_str,
440
+ help="The token to pad short prompts with when using the 'pad' strategy.",
441
+ )
442
+ @click.option(
443
+ "--concat-delimiter",
444
+ type=str,
445
+ default="",
446
+ help=(
447
+ "The delimiter to use when concatenating prompts that are too short."
448
+ " Used when strategy is 'concatenate'."
449
+ ),
450
+ )
451
+ @click.option(
452
+ "--prompt-tokens",
453
+ type=str,
454
+ default=None,
455
+ help="Prompt tokens config (JSON, YAML file or key=value string)",
456
+ )
457
+ @click.option(
458
+ "--output-tokens",
459
+ type=str,
460
+ default=None,
461
+ help="Output tokens config (JSON, YAML file or key=value string)",
462
+ )
463
+ @click.option(
464
+ "--push-to-hub",
465
+ is_flag=True,
466
+ help="Set this flag to push the converted dataset to the Hugging Face Hub.",
467
+ )
468
+ @click.option(
469
+ "--hub-dataset-id",
470
+ type=str,
471
+ default=None,
472
+ help="The Hugging Face Hub dataset ID to push to. "
473
+ "Required if --push-to-hub is used.",
474
+ )
475
+ @click.option(
476
+ "--random-seed",
477
+ type=int,
478
+ default=42,
479
+ show_default=True,
480
+ help="Random seed for prompt token sampling and output tokens sampling.",
481
+ )
482
+ def dataset(
483
+ data,
484
+ output_path,
485
+ processor,
486
+ processor_args,
487
+ data_args,
488
+ short_prompt_strategy,
489
+ pad_char,
490
+ concat_delimiter,
491
+ prompt_tokens,
492
+ output_tokens,
493
+ push_to_hub,
494
+ hub_dataset_id,
495
+ random_seed,
496
+ ):
497
+ process_dataset(
498
+ data=data,
499
+ output_path=output_path,
500
+ processor=processor,
501
+ prompt_tokens=prompt_tokens,
502
+ output_tokens=output_tokens,
503
+ processor_args=processor_args,
504
+ data_args=data_args,
505
+ short_prompt_strategy=short_prompt_strategy,
506
+ pad_char=pad_char,
507
+ concat_delimiter=concat_delimiter,
508
+ push_to_hub=push_to_hub,
509
+ hub_dataset_id=hub_dataset_id,
510
+ random_seed=random_seed,
511
+ )
512
+
513
+
293
514
  if __name__ == "__main__":
294
515
  cli()
@@ -11,13 +11,13 @@ from .response import (
11
11
  )
12
12
 
13
13
  __all__ = [
14
- "StreamingResponseType",
15
- "StreamingTextResponse",
16
- "RequestArgs",
17
- "ResponseSummary",
14
+ "CHAT_COMPLETIONS_PATH",
15
+ "TEXT_COMPLETIONS_PATH",
18
16
  "Backend",
19
17
  "BackendType",
20
18
  "OpenAIHTTPBackend",
21
- "TEXT_COMPLETIONS_PATH",
22
- "CHAT_COMPLETIONS_PATH",
19
+ "RequestArgs",
20
+ "ResponseSummary",
21
+ "StreamingResponseType",
22
+ "StreamingTextResponse",
23
23
  ]
@@ -7,6 +7,7 @@ from loguru import logger
7
7
  from PIL import Image
8
8
 
9
9
  from guidellm.backend.response import ResponseSummary, StreamingTextResponse
10
+ from guidellm.config import settings
10
11
 
11
12
  __all__ = [
12
13
  "Backend",
@@ -110,6 +111,14 @@ class Backend(ABC):
110
111
  """
111
112
  ...
112
113
 
114
+ @abstractmethod
115
+ async def reset(self) -> None:
116
+ """
117
+ Reset the connection object. This is useful for backends that
118
+ reuse connections or have state that needs to be cleared.
119
+ """
120
+ ...
121
+
113
122
  async def validate(self):
114
123
  """
115
124
  Handle final setup and validate the backend is ready for use.
@@ -121,10 +130,22 @@ class Backend(ABC):
121
130
  if not models:
122
131
  raise ValueError("No models available for the backend")
123
132
 
124
- async for _ in self.text_completions(
125
- prompt="Test connection", output_token_count=1
126
- ): # type: ignore[attr-defined]
127
- pass
133
+ # Use the preferred route defined in the global settings when performing the
134
+ # validation request. This avoids calling an unavailable endpoint (ie
135
+ # /v1/completions) when the deployment only supports the chat completions
136
+ # endpoint.
137
+ if settings.preferred_route == "chat_completions":
138
+ async for _ in self.chat_completions( # type: ignore[attr-defined]
139
+ content="Test connection", output_token_count=1
140
+ ):
141
+ pass
142
+ else:
143
+ async for _ in self.text_completions( # type: ignore[attr-defined]
144
+ prompt="Test connection", output_token_count=1
145
+ ):
146
+ pass
147
+
148
+ await self.reset()
128
149
 
129
150
  @abstractmethod
130
151
  async def check_setup(self):