PyPI - guidellm - Versions diffs - 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl - Mend

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +524 -255
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +109 -0
guidellm/backends/openai.py +340 -0
guidellm/backends/response_handlers.py +428 -0
guidellm/benchmark/__init__.py +69 -39
guidellm/benchmark/benchmarker.py +160 -316
guidellm/benchmark/entrypoints.py +560 -127
guidellm/benchmark/outputs/__init__.py +24 -0
guidellm/benchmark/outputs/console.py +633 -0
guidellm/benchmark/outputs/csv.py +721 -0
guidellm/benchmark/outputs/html.py +473 -0
guidellm/benchmark/outputs/output.py +169 -0
guidellm/benchmark/outputs/serialized.py +69 -0
guidellm/benchmark/profiles.py +718 -0
guidellm/benchmark/progress.py +553 -556
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas/__init__.py +66 -0
guidellm/benchmark/schemas/base.py +402 -0
guidellm/benchmark/schemas/generative/__init__.py +55 -0
guidellm/benchmark/schemas/generative/accumulator.py +841 -0
guidellm/benchmark/schemas/generative/benchmark.py +163 -0
guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
guidellm/benchmark/schemas/generative/metrics.py +927 -0
guidellm/benchmark/schemas/generative/report.py +158 -0
guidellm/data/__init__.py +34 -4
guidellm/data/builders.py +541 -0
guidellm/data/collators.py +16 -0
guidellm/data/config.py +120 -0
guidellm/data/deserializers/__init__.py +49 -0
guidellm/data/deserializers/deserializer.py +141 -0
guidellm/data/deserializers/file.py +223 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +246 -0
guidellm/data/entrypoints.py +52 -0
guidellm/data/loaders.py +190 -0
guidellm/data/preprocessors/__init__.py +27 -0
guidellm/data/preprocessors/formatters.py +410 -0
guidellm/data/preprocessors/mappers.py +196 -0
guidellm/data/preprocessors/preprocessor.py +30 -0
guidellm/data/processor.py +29 -0
guidellm/data/schemas.py +175 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +220 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +238 -0
guidellm/mock_server/utils.py +302 -0
guidellm/scheduler/__init__.py +69 -26
guidellm/scheduler/constraints/__init__.py +49 -0
guidellm/scheduler/constraints/constraint.py +325 -0
guidellm/scheduler/constraints/error.py +411 -0
guidellm/scheduler/constraints/factory.py +182 -0
guidellm/scheduler/constraints/request.py +312 -0
guidellm/scheduler/constraints/saturation.py +722 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +137 -368
guidellm/scheduler/schemas.py +358 -0
guidellm/scheduler/strategies.py +617 -0
guidellm/scheduler/worker.py +413 -419
guidellm/scheduler/worker_group.py +712 -0
guidellm/schemas/__init__.py +65 -0
guidellm/schemas/base.py +417 -0
guidellm/schemas/info.py +188 -0
guidellm/schemas/request.py +235 -0
guidellm/schemas/request_stats.py +349 -0
guidellm/schemas/response.py +124 -0
guidellm/schemas/statistics.py +1018 -0
guidellm/{config.py → settings.py} +31 -24
guidellm/utils/__init__.py +71 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +132 -5
guidellm/utils/console.py +566 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +159 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +2 -2
guidellm-0.6.0a5.dist-info/METADATA +364 -0
guidellm-0.6.0a5.dist-info/RECORD +109 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -708
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/output.py +0 -997
guidellm/benchmark/profile.py +0 -409
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/objects/statistics.py +0 -953
guidellm/preprocess/__init__.py +0 -3
guidellm/preprocess/dataset.py +0 -374
guidellm/presentation/__init__.py +0 -28
guidellm/presentation/builder.py +0 -27
guidellm/presentation/data_models.py +0 -232
guidellm/presentation/injector.py +0 -66
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.3.1.dist-info/METADATA +0 -329
guidellm-0.3.1.dist-info/RECORD +0 -62
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0

guidellm/__main__.py CHANGED Viewed

@@ -1,51 +1,110 @@
+"""
+GuideLLM command-line interface entry point.
+Primary CLI application providing benchmark execution, dataset preprocessing, and
+mock server functionality for language model evaluation. Organizes commands into
+three main groups: benchmark operations for performance testing, preprocessing
+utilities for data transformation, and mock server capabilities for development
+and testing. Supports multiple backends, output formats, and flexible configuration
+through CLI options and environment variables.
+Example:
+::
+    # Run a benchmark against a model
+    guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
+        --profile sweep
+    # Preprocess a dataset
+    guidellm preprocess dataset input.json output.json --processor gpt2
+    # Start a mock server for testing
+    guidellm mock-server --host 0.0.0.0 --port 8080
+"""
+from __future__ import annotations
 import asyncio
 import codecs
 from pathlib import Path
-from typing import get_args
 import click
 from pydantic import ValidationError
-from guidellm.backend import BackendType
+from guidellm.data import ShortPromptStrategy, process_dataset
+try:
+    import uvloop
+except ImportError:
+    uvloop = None  # type: ignore[assignment] # Optional dependency
+from guidellm.backends import BackendType
 from guidellm.benchmark import (
+    BenchmarkGenerativeTextArgs,
+    GenerativeConsoleBenchmarkerProgress,
     ProfileType,
+    benchmark_generative_text,
+    get_builtin_scenarios,
     reimport_benchmarks_report,
 )
-from guidellm.benchmark.entrypoints import benchmark_with_scenario
-from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-from guidellm.config import print_config
-from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
+from guidellm.mock_server import MockServer, MockServerConfig
 from guidellm.scheduler import StrategyType
-from guidellm.utils import DefaultGroupHandler
+from guidellm.schemas import GenerativeRequestType
+from guidellm.settings import print_config
+from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
 from guidellm.utils import cli as cli_tools
-STRATEGY_PROFILE_CHOICES = list(
-    set(list(get_args(ProfileType)) + list(get_args(StrategyType)))
-)
+STRATEGY_PROFILE_CHOICES: list[str] = list(get_literal_vals(ProfileType | StrategyType))
+"""Available strategy and profile type choices for benchmark execution."""
+def decode_escaped_str(_ctx, _param, value):
+    """
+    Decode escape sequences in Click option values.
+    Click automatically escapes characters converting sequences like "\\n" to
+    "\\\\n". This function decodes these sequences to their intended characters.
+    :param _ctx: Click context (unused)
+    :param _param: Click parameter (unused)
+    :param value: String value to decode
+    :return: Decoded string with proper escape sequences, or None if input is None
+    :raises click.BadParameter: When escape sequence decoding fails
+    """
+    if value is None:
+        return None
+    try:
+        return codecs.decode(value, "unicode_escape")
+    except Exception as e:
+        raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
 @click.group()
 @click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
 def cli():
-    pass
+    """GuideLLM CLI for benchmarking, preprocessing, and testing language models."""
 @cli.group(
-    help="Commands to run a new benchmark or load a prior one.",
+    help="Run a benchmark or load a previously saved benchmark report.",
     cls=DefaultGroupHandler,
     default="run",
 )
 def benchmark():
-    pass
+    """Benchmark commands for performance testing generative models."""
 @benchmark.command(
     "run",
-    help="Run a benchmark against a generative model using the specified arguments.",
+    help=(
+        "Run a benchmark against a generative model. "
+        "Supports multiple backends, data sources, strategies, and output formats. "
+        "Configuration can be loaded from a scenario file or specified via options."
+    ),
     context_settings={"auto_envvar_prefix": "GUIDELLM"},
 )
 @click.option(
     "--scenario",
+    "-c",
     type=cli_tools.Union(
         click.Path(
             exists=True,
@@ -54,276 +113,375 @@ def benchmark():
             dir_okay=False,
             path_type=Path,
         ),
-        click.Choice(get_builtin_scenarios()),
+        click.Choice(tuple(get_builtin_scenarios().keys())),
     ),
     default=None,
     help=(
-        "The name of a builtin scenario or path to a config file. "
-        "Missing values from the config will use defaults. "
-        "Options specified on the commandline will override the scenario."
+        "Builtin scenario name or path to config file. "
+        "CLI options override scenario settings."
     ),
 )
 @click.option(
     "--target",
     type=str,
-    help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
+    help="Target backend URL (e.g., http://localhost:8000).",
 )
 @click.option(
-    "--backend-type",
-    type=click.Choice(list(get_args(BackendType))),
+    "--data",
+    type=str,
+    multiple=True,
     help=(
-        "The type of backend to use to run requests against. Defaults to 'openai_http'."
-        f" Supported types: {', '.join(get_args(BackendType))}"
+        "HuggingFace dataset ID, path to dataset, path to data file "
+        "(csv/json/jsonl/txt), or synthetic data config (json/key=value)."
     ),
-    default=GenerativeTextScenario.get_default("backend_type"),
 )
 @click.option(
-    "--backend-args",
-    callback=cli_tools.parse_json,
-    default=GenerativeTextScenario.get_default("backend_args"),
+    "--profile",
+    "--rate-type",  # legacy alias
+    "profile",
+    default=BenchmarkGenerativeTextArgs.get_default("profile"),
+    type=click.Choice(STRATEGY_PROFILE_CHOICES),
+    help=f"Benchmark profile type. Options: {', '.join(STRATEGY_PROFILE_CHOICES)}.",
+)
+@click.option(
+    "--rate",
+    callback=cli_tools.parse_list_floats,
+    multiple=True,
+    default=BenchmarkGenerativeTextArgs.get_default("rate"),
     help=(
-        "A JSON string containing any arguments to pass to the backend as a "
-        "dict with **kwargs. Headers can be removed by setting their value to "
-        "null. For example: "
-        """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
+        "Benchmark rate(s) to test. Meaning depends on profile: "
+        "sweep=number of benchmarks, concurrent=concurrent requests, "
+        "async/constant/poisson=requests per second."
     ),
 )
+# Backend configuration
+@click.option(
+    "--backend",
+    "--backend-type",  # legacy alias
+    "backend",
+    type=click.Choice(list(get_literal_vals(BackendType))),
+    default=BenchmarkGenerativeTextArgs.get_default("backend"),
+    help=f"Backend type. Options: {', '.join(get_literal_vals(BackendType))}.",
+)
+@click.option(
+    "--backend-kwargs",
+    "--backend-args",  # legacy alias
+    "backend_kwargs",
+    callback=cli_tools.parse_json,
+    default=BenchmarkGenerativeTextArgs.get_default("backend_kwargs"),
+    help="JSON string of arguments to pass to the backend.",
+)
 @click.option(
     "--model",
-    default=GenerativeTextScenario.get_default("model"),
+    default=BenchmarkGenerativeTextArgs.get_default("model"),
     type=str,
+    help="Model ID to benchmark. If not provided, uses first available model.",
+)
+# Data configuration
+@click.option(
+    "--request-type",
+    default=BenchmarkGenerativeTextArgs.get_default("data_request_formatter"),
+    type=click.Choice(list(get_literal_vals(GenerativeRequestType))),
     help=(
-        "The ID of the model to benchmark within the backend. "
-        "If None provided (default), then it will use the first model available."
+        f"Request type to create for each data sample. "
+        f"Options: {', '.join(get_literal_vals(GenerativeRequestType))}."
     ),
 )
+@click.option(
+    "--request-formatter-kwargs",
+    default=None,
+    callback=cli_tools.parse_json,
+    help="JSON string of arguments to pass to the request formatter.",
+)
 @click.option(
     "--processor",
-    default=GenerativeTextScenario.get_default("processor"),
+    default=BenchmarkGenerativeTextArgs.get_default("processor"),
     type=str,
     help=(
-        "The processor or tokenizer to use to calculate token counts for statistics "
-        "and synthetic data generation. If None provided (default), will load "
-        "using the model arg, if needed."
+        "Processor or tokenizer for token count calculations. "
+        "If not provided, loads from model."
     ),
 )
 @click.option(
     "--processor-args",
-    default=GenerativeTextScenario.get_default("processor_args"),
+    default=BenchmarkGenerativeTextArgs.get_default("processor_args"),
     callback=cli_tools.parse_json,
+    help="JSON string of arguments to pass to the processor constructor.",
+)
+@click.option(
+    "--data-args",
+    multiple=True,
+    default=BenchmarkGenerativeTextArgs.get_default("data_args"),
+    callback=cli_tools.parse_json,
+    help="JSON string of arguments to pass to dataset creation.",
+)
+@click.option(
+    "--data-samples",
+    default=BenchmarkGenerativeTextArgs.get_default("data_samples"),
+    type=int,
     help=(
-        "A JSON string containing any arguments to pass to the processor constructor "
-        "as a dict with **kwargs."
+        "Number of samples from dataset. -1 (default) uses all samples "
+        "and dynamically generates more."
     ),
 )
 @click.option(
-    "--data",
-    type=str,
+    "--data-column-mapper",
+    default=BenchmarkGenerativeTextArgs.get_default("data_column_mapper"),
+    callback=cli_tools.parse_json,
+    help="JSON string of column mappings to apply to the dataset.",
+)
+@click.option(
+    "--data-sampler",
+    default=BenchmarkGenerativeTextArgs.get_default("data_sampler"),
+    type=click.Choice(["shuffle"]),
+    help="Data sampler type.",
+)
+@click.option(
+    "--data-num-workers",
+    default=BenchmarkGenerativeTextArgs.get_default("data_num_workers"),
+    type=int,
+    help="Number of worker processes for data loading.",
+)
+@click.option(
+    "--dataloader-kwargs",
+    default=BenchmarkGenerativeTextArgs.get_default("dataloader_kwargs"),
+    callback=cli_tools.parse_json,
+    help="JSON string of arguments to pass to the dataloader constructor.",
+)
+@click.option(
+    "--random-seed",
+    default=BenchmarkGenerativeTextArgs.get_default("random_seed"),
+    type=int,
+    help="Random seed for reproducibility.",
+)
+# Output configuration
+@click.option(
+    "--output-dir",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=Path),
+    default=BenchmarkGenerativeTextArgs.get_default("output_dir"),
+    help="The directory path to save file output types in",
+)
+@click.option(
+    "--outputs",
+    callback=cli_tools.parse_list,
+    multiple=True,
+    default=BenchmarkGenerativeTextArgs.get_default("outputs"),
     help=(
-        "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
-        "a path to a data file csv, json, jsonl, or txt, "
-        "or a synthetic data config as a json or key=value string."
+        "The filename.ext for each of the outputs to create or the "
+        "alises (json, csv, html) for the output files to create with "
+        "their default file names (benchmark.[EXT])"
     ),
 )
 @click.option(
-    "--data-args",
-    default=GenerativeTextScenario.get_default("data_args"),
-    callback=cli_tools.parse_json,
+    "--output-path",
+    type=click.Path(),
+    default=None,
     help=(
-        "A JSON string containing any arguments to pass to the dataset creation "
-        "as a dict with **kwargs."
+        "Legacy parameter for the output path to save the output result to. "
+        "Resolves to fill in output-dir and outputs based on input path."
     ),
 )
 @click.option(
-    "--data-sampler",
-    default=GenerativeTextScenario.get_default("data_sampler"),
-    type=click.Choice(["random"]),
+    "--disable-console",
+    "--disable-console-outputs",  # legacy alias
+    "disable_console",
+    is_flag=True,
     help=(
-        "The data sampler type to use. 'random' will add a random shuffle on the data. "
-        "Defaults to None"
+        "Disable all outputs to the console (updates, interactive progress, results)."
     ),
 )
 @click.option(
-    "--rate-type",
-    type=click.Choice(STRATEGY_PROFILE_CHOICES),
+    "--disable-console-interactive",
+    "--disable-progress",  # legacy alias
+    "disable_console_interactive",
+    is_flag=True,
+    help="Disable interactive console progress updates.",
+)
+# Aggregators configuration
+@click.option(
+    "--warmup",
+    "--warmup-percent",  # legacy alias
+    "warmup",
+    default=BenchmarkGenerativeTextArgs.get_default("warmup"),
+    callback=cli_tools.parse_json,
     help=(
-        "The type of benchmark to run. "
-        f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
+        "Warmup specification: int, float, or dict as string "
+        "(json or key=value). "
+        "Controls time or requests before measurement starts. "
+        "Numeric in (0, 1): percent of duration or request count. "
+        "Numeric >=1: duration in seconds or request count. "
+        "Advanced config: see TransientPhaseConfig schema."
     ),
 )
 @click.option(
-    "--rate",
-    default=GenerativeTextScenario.get_default("rate"),
+    "--cooldown",
+    "--cooldown-percent",  # legacy alias
+    "cooldown",
+    default=BenchmarkGenerativeTextArgs.get_default("cooldown"),
+    callback=cli_tools.parse_json,
     help=(
-        "The rates to run the benchmark at. "
-        "Can be a single number or a comma-separated list of numbers. "
-        "For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
-        "For rate-type=concurrent, this is the number of concurrent requests. "
-        "For rate-type=async,constant,poisson, this is the rate requests per second. "
-        "For rate-type=synchronous,throughput, this must not be set."
+        "Cooldown specification: int, float, or dict as string "
+        "(json or key=value). "
+        "Controls time or requests after measurement ends. "
+        "Numeric in (0, 1): percent of duration or request count. "
+        "Numeric >=1: duration in seconds or request count. "
+        "Advanced config: see TransientPhaseConfig schema."
     ),
 )
 @click.option(
-    "--max-seconds",
+    "--rampup",
     type=float,
-    default=GenerativeTextScenario.get_default("max_seconds"),
+    default=BenchmarkGenerativeTextArgs.get_default("rampup"),
     help=(
-        "The maximum number of seconds each benchmark can run for. "
-        "If None, will run until max_requests or the data is exhausted."
+        "The time, in seconds, to ramp up the request rate over. "
+        "Only applicable for Throughput/Concurrent strategies"
     ),
 )
 @click.option(
-    "--max-requests",
+    "--sample-requests",
+    "--output-sampling",  # legacy alias
+    "sample_requests",
     type=int,
-    default=GenerativeTextScenario.get_default("max_requests"),
     help=(
-        "The maximum number of requests each benchmark can run for. "
-        "If None, will run until max_seconds or the data is exhausted."
+        "Number of sample requests per status to save. "
+        "None (default) saves all, recommended: 20."
     ),
 )
+# Constraints configuration
 @click.option(
-    "--warmup-percent",
+    "--max-seconds",
     type=float,
-    default=GenerativeTextScenario.get_default("warmup_percent"),
+    default=BenchmarkGenerativeTextArgs.get_default("max_seconds"),
     help=(
-        "The percent of the benchmark (based on max-seconds, max-requets, "
-        "or lenth of dataset) to run as a warmup and not include in the final results. "
-        "Defaults to None."
+        "Maximum seconds per benchmark. "
+        "If None, runs until max_requests or data exhaustion."
     ),
 )
 @click.option(
-    "--cooldown-percent",
-    type=float,
-    default=GenerativeTextScenario.get_default("cooldown_percent"),
+    "--max-requests",
+    type=int,
+    default=BenchmarkGenerativeTextArgs.get_default("max_requests"),
     help=(
-        "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
-        "of dataset) to run as a cooldown and not include in the final results. "
-        "Defaults to None."
+        "Maximum requests per benchmark. "
+        "If None, runs until max_seconds or data exhaustion."
     ),
 )
 @click.option(
-    "--disable-progress",
-    is_flag=True,
-    help="Set this flag to disable progress updates to the console",
+    "--max-errors",
+    type=int,
+    default=BenchmarkGenerativeTextArgs.get_default("max_errors"),
+    help="Maximum errors before stopping the benchmark.",
 )
 @click.option(
-    "--display-scheduler-stats",
-    is_flag=True,
-    help="Set this flag to display stats for the processes running the benchmarks",
+    "--max-error-rate",
+    type=float,
+    default=BenchmarkGenerativeTextArgs.get_default("max_error_rate"),
+    help="Maximum error rate before stopping the benchmark.",
 )
 @click.option(
-    "--disable-console-outputs",
-    is_flag=True,
-    help="Set this flag to disable console output",
+    "--max-global-error-rate",
+    type=float,
+    default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"),
+    help="Maximum global error rate across all benchmarks.",
 )
 @click.option(
-    "--output-path",
-    type=click.Path(),
-    default=Path.cwd() / "benchmarks.json",
+    "--over-saturation",
+    "over_saturation",
+    callback=cli_tools.parse_json,
+    default=None,
     help=(
-        "The path to save the output to. If it is a directory, "
-        "it will save benchmarks.json under it. "
-        "Otherwise, json, yaml, csv, or html files are supported for output types "
-        "which will be read from the extension for the file path."
+        "Enable over-saturation detection. "
+        "Pass a JSON dict with configuration "
+        '(e.g., \'{"enabled": true, "min_seconds": 30}\'). '
+        "Defaults to None (disabled)."
     ),
 )
 @click.option(
-    "--output-extras",
+    "--detect-saturation",
+    "--default-over-saturation",
+    "over_saturation",
     callback=cli_tools.parse_json,
-    help="A JSON string of extra data to save with the output benchmarks",
-)
-@click.option(
-    "--output-sampling",
-    type=int,
-    help=(
-        "The number of samples to save in the output file. "
-        "If None (default), will save all samples."
-    ),
-    default=GenerativeTextScenario.get_default("output_sampling"),
+    flag_value='{"enabled": true}',
+    help="Enable over-saturation detection with default settings.",
 )
-@click.option(
-    "--random-seed",
-    default=GenerativeTextScenario.get_default("random_seed"),
-    type=int,
-    help="The random seed to use for benchmarking to ensure reproducibility.",
-)
-def run(
-    scenario,
-    target,
-    backend_type,
-    backend_args,
-    model,
-    processor,
-    processor_args,
-    data,
-    data_args,
-    data_sampler,
-    rate_type,
-    rate,
-    max_seconds,
-    max_requests,
-    warmup_percent,
-    cooldown_percent,
-    disable_progress,
-    display_scheduler_stats,
-    disable_console_outputs,
-    output_path,
-    output_extras,
-    output_sampling,
-    random_seed,
-):
-    click_ctx = click.get_current_context()
+def run(**kwargs):  # noqa: C901
+    # Only set CLI args that differ from click defaults
+    kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
-    overrides = cli_tools.set_if_not_default(
-        click_ctx,
-        target=target,
-        backend_type=backend_type,
-        backend_args=backend_args,
-        model=model,
-        processor=processor,
-        processor_args=processor_args,
-        data=data,
-        data_args=data_args,
-        data_sampler=data_sampler,
-        rate_type=rate_type,
-        rate=rate,
-        max_seconds=max_seconds,
-        max_requests=max_requests,
-        warmup_percent=warmup_percent,
-        cooldown_percent=cooldown_percent,
-        output_sampling=output_sampling,
-        random_seed=random_seed,
+    # Handle remapping for request params
+    request_type = kwargs.pop("request_type", None)
+    request_formatter_kwargs = kwargs.pop("request_formatter_kwargs", None)
+    if request_type is not None:
+        kwargs["data_request_formatter"] = (
+            request_type
+            if not request_formatter_kwargs
+            else {"request_type": request_type, **request_formatter_kwargs}
+        )
+    elif request_formatter_kwargs is not None:
+        kwargs["data_request_formatter"] = request_formatter_kwargs
+    # Handle output path remapping
+    if (output_path := kwargs.pop("output_path", None)) is not None:
+        if kwargs.get("outputs_dir", None) is not None:
+            raise click.BadParameter("Cannot use --output-path with --output-dir.")
+        path = Path(output_path)
+        if path.is_dir():
+            kwargs["output_dir"] = path
+        else:
+            kwargs["output_dir"] = path.parent
+            kwargs["outputs"] = (path.name,)
+    # Handle console options
+    disable_console = kwargs.pop("disable_console", False)
+    disable_console_interactive = (
+        kwargs.pop("disable_console_interactive", False) or disable_console
     )
+    console = Console() if not disable_console else None
+    envs = cli_tools.list_set_env()
+    if console and envs:
+        console.print_update(
+            title=(
+                "Note: the following environment variables "
+                "are set and **may** affect configuration"
+            ),
+            details=", ".join(envs),
+            status="warning",
+        )
     try:
-        # If a scenario file was specified read from it
-        if scenario is None:
-            _scenario = GenerativeTextScenario.model_validate(overrides)
-        elif isinstance(scenario, Path):
-            _scenario = GenerativeTextScenario.from_file(scenario, overrides)
-        else:  # Only builtins can make it here; click will catch anything else
-            _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
-    except ValidationError as e:
+        args = BenchmarkGenerativeTextArgs.create(
+            scenario=kwargs.pop("scenario", None), **kwargs
+        )
+    except ValidationError as err:
         # Translate pydantic valdation error to click argument error
-        errs = e.errors(include_url=False, include_context=True, include_input=True)
+        errs = err.errors(include_url=False, include_context=True, include_input=True)
         param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
         raise click.BadParameter(
-            errs[0]["msg"], ctx=click_ctx, param_hint=param_name
-        ) from e
+            errs[0]["msg"], ctx=click.get_current_context(), param_hint=param_name
+        ) from err
+    if uvloop is not None:
+        asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
     asyncio.run(
-        benchmark_with_scenario(
-            scenario=_scenario,
-            show_progress=not disable_progress,
-            show_progress_scheduler_stats=display_scheduler_stats,
-            output_console=not disable_console_outputs,
-            output_path=output_path,
-            output_extras=output_extras,
+        benchmark_generative_text(
+            args=args,
+            progress=(
+                GenerativeConsoleBenchmarkerProgress()
+                if not disable_console_interactive
+                else None
+            ),
+            console=console,
         )
     )
-@benchmark.command("from-file", help="Load a saved benchmark report.")
+@benchmark.command(
+    "from-file",
+    help=(
+        "Load a saved benchmark report and optionally re-export to other formats. "
+        "PATH: Path to the saved benchmark report file (default: ./benchmarks.json)."
+    ),
+)
 @click.argument(
     "path",
     type=click.Path(file_okay=True, dir_okay=False, exists=True),
@@ -331,61 +489,46 @@ def run(
 )
 @click.option(
     "--output-path",
-    type=click.Path(file_okay=True, dir_okay=True, exists=False),
-    default=None,
-    is_flag=False,
-    flag_value=Path.cwd() / "benchmarks_reexported.json",
+    type=click.Path(),
+    default=Path.cwd(),
     help=(
-        "Allows re-exporting the benchmarks to another format. "
-        "The path to save the output to. If it is a directory, "
-        "it will save benchmarks.json under it. "
-        "Otherwise, json, yaml, or csv files are supported for output types "
-        "which will be read from the extension for the file path. "
-        "This input is optional. If the output path flag is not provided, "
-        "the benchmarks will not be reexported. If the flag is present but "
-        "no value is specified, it will default to the current directory "
-        "with the file name `benchmarks_reexported.json`."
+        "Directory or file path to save re-exported benchmark results. "
+        "If a directory, all output formats will be saved there. "
+        "If a file, the matching format will be saved to that file."
     ),
 )
-def from_file(path, output_path):
-    reimport_benchmarks_report(path, output_path)
-def decode_escaped_str(_ctx, _param, value):
-    """
-    Click auto adds characters. For example, when using --pad-char "\n",
-    it parses it as "\\n". This method decodes the string to handle escape
-    sequences correctly.
-    """
-    if value is None:
-        return None
-    try:
-        return codecs.decode(value, "unicode_escape")
-    except Exception as e:
-        raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
+@click.option(
+    "--output-formats",
+    multiple=True,
+    type=str,
+    default=("console", "json"),  # ("console", "json", "html", "csv")
+    help="Output formats for benchmark results (e.g., console, json, html, csv).",
+)
+def from_file(path, output_path, output_formats):
+    asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
 @cli.command(
-    short_help="Prints environment variable settings.",
-    help=(
-        "Print out the available configuration settings that can be set "
-        "through environment variables."
-    ),
+    short_help="Show configuration settings.",
+    help="Display environment variables for configuring GuideLLM behavior.",
 )
 def config():
     print_config()
-@cli.group(help="General preprocessing tools and utilities.")
+@cli.group(help="Tools for preprocessing datasets for use in benchmarks.")
 def preprocess():
-    pass
+    """Dataset preprocessing utilities."""
 @preprocess.command(
+    "dataset",
     help=(
-        "Convert a dataset to have specific prompt and output token sizes.\n"
-        "DATA: Path to the input dataset or dataset ID.\n"
-        "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
+        "Process a dataset to have specific prompt and output token sizes. "
+        "Supports multiple strategies for handling prompts and optional "
+        "Hugging Face Hub upload.\n\n"
+        "DATA: Path to the input dataset or dataset ID.\n\n"
+        "OUTPUT_PATH: Path to save the processed dataset, including file suffix."
     ),
     context_settings={"auto_envvar_prefix": "GUIDELLM"},
 )
@@ -403,93 +546,95 @@ def preprocess():
     "--processor",
     type=str,
     required=True,
+    help="Processor or tokenizer name for calculating token counts.",
+)
+@click.option(
+    "--config",
+    type=str,
+    required=True,
     help=(
-        "The processor or tokenizer to use to calculate token counts for statistics "
-        "and synthetic data generation."
+        "PreprocessDatasetConfig as JSON string, key=value pairs, "
+        "or file path (.json, .yaml, .yml, .config). "
+        "Example: 'prompt_tokens=100,output_tokens=50,prefix_tokens_max=10'"
+        ' or \'{"prompt_tokens": 100, "output_tokens": 50, '
+        '"prefix_tokens_max": 10}\''
     ),
 )
 @click.option(
     "--processor-args",
     default=None,
     callback=cli_tools.parse_json,
-    help=(
-        "A JSON string containing any arguments to pass to the processor constructor "
-        "as a dict with **kwargs."
-    ),
+    help="JSON string of arguments to pass to the processor constructor.",
 )
 @click.option(
     "--data-args",
     callback=cli_tools.parse_json,
-    help=(
-        "A JSON string containing any arguments to pass to the dataset creation "
-        "as a dict with **kwargs."
-    ),
+    help="JSON string of arguments to pass to dataset creation.",
+)
+@click.option(
+    "--data-column-mapper",
+    default=None,
+    callback=cli_tools.parse_json,
+    help="JSON string of column mappings to apply to the dataset.",
 )
 @click.option(
     "--short-prompt-strategy",
     type=click.Choice([s.value for s in ShortPromptStrategy]),
     default=ShortPromptStrategy.IGNORE.value,
     show_default=True,
-    help="Strategy to handle prompts shorter than the target length. ",
+    help="Strategy for handling prompts shorter than target length.",
 )
 @click.option(
     "--pad-char",
     type=str,
     default="",
     callback=decode_escaped_str,
-    help="The token to pad short prompts with when using the 'pad' strategy.",
+    help="Character to pad short prompts with when using 'pad' strategy.",
 )
 @click.option(
     "--concat-delimiter",
     type=str,
     default="",
     help=(
-        "The delimiter to use when concatenating prompts that are too short."
-        " Used when strategy is 'concatenate'."
+        "Delimiter for concatenating short prompts (used with 'concatenate' strategy)."
     ),
 )
 @click.option(
-    "--prompt-tokens",
-    type=str,
-    default=None,
-    help="Prompt tokens config (JSON, YAML file or key=value string)",
-)
-@click.option(
-    "--output-tokens",
-    type=str,
-    default=None,
-    help="Output tokens config (JSON, YAML file or key=value string)",
+    "--include-prefix-in-token-count",
+    is_flag=True,
+    default=False,
+    help="Include prefix tokens in prompt token count calculation.",
 )
 @click.option(
     "--push-to-hub",
     is_flag=True,
-    help="Set this flag to push the converted dataset to the Hugging Face Hub.",
+    help="Push the processed dataset to Hugging Face Hub.",
 )
 @click.option(
     "--hub-dataset-id",
     type=str,
     default=None,
-    help="The Hugging Face Hub dataset ID to push to. "
-    "Required if --push-to-hub is used.",
+    help=("Hugging Face Hub dataset ID for upload (required if --push-to-hub is set)."),
 )
 @click.option(
     "--random-seed",
     type=int,
     default=42,
     show_default=True,
-    help="Random seed for prompt token sampling and output tokens sampling.",
+    help="Random seed for reproducible token sampling.",
 )
 def dataset(
     data,
     output_path,
     processor,
+    config,
     processor_args,
     data_args,
+    data_column_mapper,
     short_prompt_strategy,
     pad_char,
     concat_delimiter,
-    prompt_tokens,
-    output_tokens,
+    include_prefix_in_token_count,
     push_to_hub,
     hub_dataset_id,
     random_seed,
@@ -498,18 +643,142 @@ def dataset(
         data=data,
         output_path=output_path,
         processor=processor,
-        prompt_tokens=prompt_tokens,
-        output_tokens=output_tokens,
+        config=config,
         processor_args=processor_args,
         data_args=data_args,
+        data_column_mapper=data_column_mapper,
         short_prompt_strategy=short_prompt_strategy,
         pad_char=pad_char,
         concat_delimiter=concat_delimiter,
+        include_prefix_in_token_count=include_prefix_in_token_count,
         push_to_hub=push_to_hub,
         hub_dataset_id=hub_dataset_id,
         random_seed=random_seed,
     )
+@cli.command(
+    "mock-server",
+    help=(
+        "Start a mock OpenAI/vLLM-compatible server for testing. "
+        "Simulates model inference with configurable latency and token generation."
+    ),
+)
+@click.option(
+    "--host",
+    default="127.0.0.1",
+    help="Host address to bind the server to.",
+)
+@click.option(
+    "--port",
+    default=8000,
+    type=int,
+    help="Port number to bind the server to.",
+)
+@click.option(
+    "--workers",
+    default=1,
+    type=int,
+    help="Number of worker processes.",
+)
+@click.option(
+    "--model",
+    default="llama-3.1-8b-instruct",
+    help="Name of the model to mock.",
+)
+@click.option(
+    "--processor",
+    default=None,
+    help="Processor or tokenizer to use for requests.",
+)
+@click.option(
+    "--request-latency",
+    default=3,
+    type=float,
+    help="Request latency in seconds for non-streaming requests.",
+)
+@click.option(
+    "--request-latency-std",
+    default=0,
+    type=float,
+    help="Request latency standard deviation in seconds (normal distribution).",
+)
+@click.option(
+    "--ttft-ms",
+    default=150,
+    type=float,
+    help="Time to first token in milliseconds for streaming requests.",
+)
+@click.option(
+    "--ttft-ms-std",
+    default=0,
+    type=float,
+    help="Time to first token standard deviation in milliseconds.",
+)
+@click.option(
+    "--itl-ms",
+    default=10,
+    type=float,
+    help="Inter-token latency in milliseconds for streaming requests.",
+)
+@click.option(
+    "--itl-ms-std",
+    default=0,
+    type=float,
+    help="Inter-token latency standard deviation in milliseconds.",
+)
+@click.option(
+    "--output-tokens",
+    default=128,
+    type=int,
+    help="Number of output tokens for streaming requests.",
+)
+@click.option(
+    "--output-tokens-std",
+    default=0,
+    type=float,
+    help="Output tokens standard deviation (normal distribution).",
+)
+def mock_server(
+    host: str,
+    port: int,
+    workers: int,
+    model: str,
+    processor: str | None,
+    request_latency: float,
+    request_latency_std: float,
+    ttft_ms: float,
+    ttft_ms_std: float,
+    itl_ms: float,
+    itl_ms_std: float,
+    output_tokens: int,
+    output_tokens_std: float,
+):
+    config = MockServerConfig(
+        host=host,
+        port=port,
+        workers=workers,
+        model=model,
+        processor=processor,
+        request_latency=request_latency,
+        request_latency_std=request_latency_std,
+        ttft_ms=ttft_ms,
+        ttft_ms_std=ttft_ms_std,
+        itl_ms=itl_ms,
+        itl_ms_std=itl_ms_std,
+        output_tokens=output_tokens,
+        output_tokens_std=output_tokens_std,
+    )
+    server = MockServer(config)
+    console = Console()
+    console.print_update(
+        title="GuideLLM mock server starting...",
+        details=f"Listening on http://{host}:{port} for model {model}",
+        status="success",
+    )
+    server.run()
 if __name__ == "__main__":
     cli()

guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl