PyPI - guidellm - Versions diffs - 0.3.0a22__tar.gz → 0.3.0a26__tar.gz - Mend

guidellm 0.3.0a22tar.gz → 0.3.0a26tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (63) hide show

{guidellm-0.3.0a22/src/guidellm.egg-info → guidellm-0.3.0a26}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: guidellm
-Version: 0.3.0a22
+Version: 0.3.0a26
 Summary: Guidance platform for deploying and managing large language models.
 Author: Red Hat
 License-Expression: Apache-2.0
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
 - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
+### GuideLLM UI
+GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
+### 🛠 Running the UI
+1. Use the Hosted Build (Recommended for Most Users)
+After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
+```
+https://neuralmagic.github.io/guidellm/ui/dev/
+```
+Open the file in your browser and you're done—no setup required.
+2. Build and Serve the UI Locally (For Development) This option is useful if:
+- You are actively developing the UI
+- You want to test changes to the UI before publishing
+- You want full control over how the report is displayed
+```bash
+npm install
+npm run build
+npx serve out
+```
+This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
+### 🧪 Development Notes
+During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
+```
+src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
+```
+In the future this will be replaced by a configurable untracked file for dev use.
 ## Resources
 ### Documentation

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/README.md RENAMED Viewed

@@ -153,6 +153,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
 - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
+### GuideLLM UI
+GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
+### 🛠 Running the UI
+1. Use the Hosted Build (Recommended for Most Users)
+After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
+```
+https://neuralmagic.github.io/guidellm/ui/dev/
+```
+Open the file in your browser and you're done—no setup required.
+2. Build and Serve the UI Locally (For Development) This option is useful if:
+- You are actively developing the UI
+- You want to test changes to the UI before publishing
+- You want full control over how the report is displayed
+```bash
+npm install
+npm run build
+npx serve out
+```
+This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
+### 🧪 Development Notes
+During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
+```
+src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
+```
+In the future this will be replaced by a configurable untracked file for dev use.
 ## Resources
 ### Documentation

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/__main__.py RENAMED Viewed

@@ -1,56 +1,55 @@
 import asyncio
 import codecs
-import json
 from pathlib import Path
 from typing import get_args
 import click
+from pydantic import ValidationError
 from guidellm.backend import BackendType
-from guidellm.benchmark import ProfileType, benchmark_generative_text
+from guidellm.benchmark import ProfileType
+from guidellm.benchmark.entrypoints import benchmark_with_scenario
+from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 from guidellm.config import print_config
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
+from guidellm.utils import cli as cli_tools
 STRATEGY_PROFILE_CHOICES = set(
     list(get_args(ProfileType)) + list(get_args(StrategyType))
 )
-def parse_json(ctx, param, value):  # noqa: ARG001
-    if value is None:
-        return None
-    try:
-        return json.loads(value)
-    except json.JSONDecodeError as err:
-        raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
-def parse_number_str(ctx, param, value):  # noqa: ARG001
-    if value is None:
-        return None
-    values = value.split(",") if "," in value else [value]
-    try:
-        return [float(val) for val in values]
-    except ValueError as err:
-        raise click.BadParameter(
-            f"{param.name} must be a number or comma-separated list of numbers."
-        ) from err
 @click.group()
 def cli():
     pass
 @cli.command(
-    help="Run a benchmark against a generative model using the specified arguments."
+    help="Run a benchmark against a generative model using the specified arguments.",
+    context_settings={"auto_envvar_prefix": "GUIDELLM"},
+)
+@click.option(
+    "--scenario",
+    type=cli_tools.Union(
+        click.Path(
+            exists=True,
+            readable=True,
+            file_okay=True,
+            dir_okay=False,
+            path_type=Path,  # type: ignore[type-var]
+        ),
+        click.Choice(get_builtin_scenarios()),
+    ),
+    default=None,
+    help=(
+        "The name of a builtin scenario or path to a config file. "
+        "Missing values from the config will use defaults. "
+        "Options specified on the commandline will override the scenario."
+    ),
 )
 @click.option(
     "--target",
-    required=True,
     type=str,
     help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
 )
@@ -61,12 +60,12 @@ def cli():
         "The type of backend to use to run requests against. Defaults to 'openai_http'."
         f" Supported types: {', '.join(get_args(BackendType))}"
     ),
-    default="openai_http",
+    default=GenerativeTextScenario.get_default("backend_type"),
 )
 @click.option(
     "--backend-args",
-    callback=parse_json,
-    default=None,
+    callback=cli_tools.parse_json,
+    default=GenerativeTextScenario.get_default("backend_args"),
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
         "dict with **kwargs."
@@ -74,7 +73,7 @@ def cli():
 )
 @click.option(
     "--model",
-    default=None,
+    default=GenerativeTextScenario.get_default("model"),
     type=str,
     help=(
         "The ID of the model to benchmark within the backend. "
@@ -83,7 +82,7 @@ def cli():
 )
 @click.option(
     "--processor",
-    default=None,
+    default=GenerativeTextScenario.get_default("processor"),
     type=str,
     help=(
         "The processor or tokenizer to use to calculate token counts for statistics "
@@ -93,8 +92,8 @@ def cli():
 )
 @click.option(
     "--processor-args",
-    default=None,
-    callback=parse_json,
+    default=GenerativeTextScenario.get_default("processor_args"),
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the processor constructor "
         "as a dict with **kwargs."
@@ -102,7 +101,6 @@ def cli():
 )
 @click.option(
     "--data",
-    required=True,
     type=str,
     help=(
         "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
@@ -112,7 +110,8 @@ def cli():
 )
 @click.option(
     "--data-args",
-    callback=parse_json,
+    default=GenerativeTextScenario.get_default("data_args"),
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the dataset creation "
         "as a dict with **kwargs."
@@ -120,7 +119,7 @@ def cli():
 )
 @click.option(
     "--data-sampler",
-    default=None,
+    default=GenerativeTextScenario.get_default("data_sampler"),
     type=click.Choice(["random"]),
     help=(
         "The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -129,7 +128,6 @@ def cli():
 )
 @click.option(
     "--rate-type",
-    required=True,
     type=click.Choice(STRATEGY_PROFILE_CHOICES),
     help=(
         "The type of benchmark to run. "
@@ -138,8 +136,7 @@ def cli():
 )
 @click.option(
     "--rate",
-    default=None,
-    callback=parse_number_str,
+    default=GenerativeTextScenario.get_default("rate"),
     help=(
         "The rates to run the benchmark at. "
         "Can be a single number or a comma-separated list of numbers. "
@@ -152,6 +149,7 @@ def cli():
 @click.option(
     "--max-seconds",
     type=float,
+    default=GenerativeTextScenario.get_default("max_seconds"),
     help=(
         "The maximum number of seconds each benchmark can run for. "
         "If None, will run until max_requests or the data is exhausted."
@@ -160,6 +158,7 @@ def cli():
 @click.option(
     "--max-requests",
     type=int,
+    default=GenerativeTextScenario.get_default("max_requests"),
     help=(
         "The maximum number of requests each benchmark can run for. "
         "If None, will run until max_seconds or the data is exhausted."
@@ -168,7 +167,7 @@ def cli():
 @click.option(
     "--warmup-percent",
     type=float,
-    default=None,
+    default=GenerativeTextScenario.get_default("warmup_percent"),
     help=(
         "The percent of the benchmark (based on max-seconds, max-requets, "
         "or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -178,6 +177,7 @@ def cli():
 @click.option(
     "--cooldown-percent",
     type=float,
+    default=GenerativeTextScenario.get_default("cooldown_percent"),
     help=(
         "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
         "of dataset) to run as a cooldown and not include in the final results. "
@@ -212,7 +212,7 @@ def cli():
 )
 @click.option(
     "--output-extras",
-    callback=parse_json,
+    callback=cli_tools.parse_json,
     help="A JSON string of extra data to save with the output benchmarks",
 )
 @click.option(
@@ -222,15 +222,16 @@ def cli():
         "The number of samples to save in the output file. "
         "If None (default), will save all samples."
     ),
-    default=None,
+    default=GenerativeTextScenario.get_default("output_sampling"),
 )
 @click.option(
     "--random-seed",
-    default=42,
+    default=GenerativeTextScenario.get_default("random_seed"),
     type=int,
     help="The random seed to use for benchmarking to ensure reproducibility.",
 )
 def benchmark(
+    scenario,
     target,
     backend_type,
     backend_args,
@@ -254,30 +255,53 @@ def benchmark(
     output_sampling,
     random_seed,
 ):
+    click_ctx = click.get_current_context()
+    overrides = cli_tools.set_if_not_default(
+        click_ctx,
+        target=target,
+        backend_type=backend_type,
+        backend_args=backend_args,
+        model=model,
+        processor=processor,
+        processor_args=processor_args,
+        data=data,
+        data_args=data_args,
+        data_sampler=data_sampler,
+        rate_type=rate_type,
+        rate=rate,
+        max_seconds=max_seconds,
+        max_requests=max_requests,
+        warmup_percent=warmup_percent,
+        cooldown_percent=cooldown_percent,
+        output_sampling=output_sampling,
+        random_seed=random_seed,
+    )
+    try:
+        # If a scenario file was specified read from it
+        if scenario is None:
+            _scenario = GenerativeTextScenario.model_validate(overrides)
+        elif isinstance(scenario, Path):
+            _scenario = GenerativeTextScenario.from_file(scenario, overrides)
+        else:  # Only builtins can make it here; click will catch anything else
+            _scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
+    except ValidationError as e:
+        # Translate pydantic valdation error to click argument error
+        errs = e.errors(include_url=False, include_context=True, include_input=True)
+        param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
+        raise click.BadParameter(
+            errs[0]["msg"], ctx=click_ctx, param_hint=param_name
+        ) from e
     asyncio.run(
-        benchmark_generative_text(
-            target=target,
-            backend_type=backend_type,
-            backend_args=backend_args,
-            model=model,
-            processor=processor,
-            processor_args=processor_args,
-            data=data,
-            data_args=data_args,
-            data_sampler=data_sampler,
-            rate_type=rate_type,
-            rate=rate,
-            max_seconds=max_seconds,
-            max_requests=max_requests,
-            warmup_percent=warmup_percent,
-            cooldown_percent=cooldown_percent,
+        benchmark_with_scenario(
+            scenario=_scenario,
             show_progress=not disable_progress,
             show_progress_scheduler_stats=display_scheduler_stats,
             output_console=not disable_console_outputs,
             output_path=output_path,
             output_extras=output_extras,
-            output_sampling=output_sampling,
-            random_seed=random_seed,
         )
     )
@@ -316,7 +340,8 @@ def preprocess():
         "Convert a dataset to have specific prompt and output token sizes.\n"
         "DATA: Path to the input dataset or dataset ID.\n"
         "OUTPUT_PATH: Path to save the converted dataset, including file suffix."
-    )
+    ),
+    context_settings={"auto_envvar_prefix": "GUIDELLM"},
 )
 @click.argument(
     "data",
@@ -340,7 +365,7 @@ def preprocess():
 @click.option(
     "--processor-args",
     default=None,
-    callback=parse_json,
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the processor constructor "
         "as a dict with **kwargs."
@@ -348,7 +373,7 @@ def preprocess():
 )
 @click.option(
     "--data-args",
-    callback=parse_json,
+    callback=cli_tools.parse_json,
     help=(
         "A JSON string containing any arguments to pass to the dataset creation "
         "as a dict with **kwargs."

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/backend/openai.py RENAMED Viewed

@@ -70,6 +70,14 @@ class OpenAIHTTPBackend(Backend):
         the values of these keys will be used as the parameters for the respective
         endpoint.
         If not provided, no extra query parameters are added.
+    :param extra_body: Body parameters to include in requests to the OpenAI server.
+        If "chat_completions", "models", or "text_completions" are included as keys,
+        the values of these keys will be included in the body for the respective
+        endpoint.
+        If not provided, no extra body parameters are added.
+    :param remove_from_body: Parameters that should be removed from the body of each
+        request.
+        If not provided, no parameters are removed from the body.
     """
     def __init__(
@@ -85,6 +93,7 @@ class OpenAIHTTPBackend(Backend):
         max_output_tokens: Optional[int] = None,
         extra_query: Optional[dict] = None,
         extra_body: Optional[dict] = None,
+        remove_from_body: Optional[list[str]] = None,
     ):
         super().__init__(type_="openai_http")
         self._target = target or settings.openai.base_url
@@ -122,6 +131,7 @@ class OpenAIHTTPBackend(Backend):
         )
         self.extra_query = extra_query
         self.extra_body = extra_body
+        self.remove_from_body = remove_from_body
         self._async_client: Optional[httpx.AsyncClient] = None
     @property
@@ -253,9 +263,8 @@ class OpenAIHTTPBackend(Backend):
         headers = self._headers()
         params = self._params(TEXT_COMPLETIONS)
-        body = self._body(TEXT_COMPLETIONS)
         payload = self._completions_payload(
-            body=body,
+            endpoint_type=TEXT_COMPLETIONS,
             orig_kwargs=kwargs,
             max_output_tokens=output_token_count,
             prompt=prompt,
@@ -330,12 +339,11 @@ class OpenAIHTTPBackend(Backend):
         logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
         headers = self._headers()
         params = self._params(CHAT_COMPLETIONS)
-        body = self._body(CHAT_COMPLETIONS)
         messages = (
             content if raw_content else self._create_chat_messages(content=content)
         )
         payload = self._completions_payload(
-            body=body,
+            endpoint_type=CHAT_COMPLETIONS,
             orig_kwargs=kwargs,
             max_output_tokens=output_token_count,
             messages=messages,
@@ -411,7 +419,7 @@ class OpenAIHTTPBackend(Backend):
         return self.extra_query
-    def _body(self, endpoint_type: EndpointType) -> dict[str, str]:
+    def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
         if self.extra_body is None:
             return {}
@@ -426,12 +434,12 @@ class OpenAIHTTPBackend(Backend):
     def _completions_payload(
         self,
-        body: Optional[dict],
+        endpoint_type: EndpointType,
         orig_kwargs: Optional[dict],
         max_output_tokens: Optional[int],
         **kwargs,
     ) -> dict:
-        payload = body or {}
+        payload = self._extra_body(endpoint_type)
         payload.update(orig_kwargs or {})
         payload.update(kwargs)
         payload["model"] = self.model
@@ -455,6 +463,10 @@ class OpenAIHTTPBackend(Backend):
                 payload["stop"] = None
                 payload["ignore_eos"] = True
+        if self.remove_from_body:
+            for key in self.remove_from_body:
+                payload.pop(key, None)
         return payload
     @staticmethod

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/benchmark.py RENAMED Viewed

@@ -815,10 +815,7 @@ class GenerativeBenchmark(Benchmark):
                         req.first_token_time or req.start_time
                         for req in total_with_output_first
                     ],
-                    iter_counts=[
-                        req.output_tokens
-                        for req in total_with_output_first
-                    ],
+                    iter_counts=[req.output_tokens for req in total_with_output_first],
                     first_iter_counts=[
                         req.prompt_tokens for req in total_with_output_first
                     ],

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/benchmark/entrypoints.py RENAMED Viewed

@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
 )
 from guidellm.benchmark.profile import ProfileType, create_profile
 from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
+from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
 from guidellm.request import GenerativeRequestLoader
 from guidellm.scheduler import StrategyType
+async def benchmark_with_scenario(scenario: Scenario, **kwargs):
+    """
+    Run a benchmark using a scenario and specify any extra arguments
+    """
+    if isinstance(scenario, GenerativeTextScenario):
+        return await benchmark_generative_text(**vars(scenario), **kwargs)
+    else:
+        raise ValueError(f"Unsupported Scenario type {type(scenario)}")
 async def benchmark_generative_text(
     target: str,
     backend_type: BackendType,
@@ -43,13 +55,13 @@ async def benchmark_generative_text(
     max_requests: Optional[int],
     warmup_percent: Optional[float],
     cooldown_percent: Optional[float],
-    show_progress: bool,
-    show_progress_scheduler_stats: bool,
-    output_console: bool,
     output_path: Optional[Union[str, Path]],
     output_extras: Optional[dict[str, Any]],
     output_sampling: Optional[int],
     random_seed: int,
+    show_progress: bool = True,
+    show_progress_scheduler_stats: bool = False,
+    output_console: bool = True,
 ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
     console = GenerativeBenchmarksConsole(enabled=show_progress)
     console.print_line("Creating backend...")

guidellm-0.3.0a26/src/guidellm/benchmark/scenario.py ADDED Viewed

@@ -0,0 +1,104 @@
+from collections.abc import Iterable
+from functools import cache
+from pathlib import Path
+from typing import Annotated, Any, Literal, Optional, TypeVar, Union
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
+from transformers.tokenization_utils_base import (  # type: ignore[import]
+    PreTrainedTokenizerBase,
+)
+from guidellm.backend.backend import BackendType
+from guidellm.benchmark.profile import ProfileType
+from guidellm.objects.pydantic import StandardBaseModel
+from guidellm.scheduler.strategy import StrategyType
+__ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
+SCENARIO_DIR = Path(__file__).parent / "scenarios/"
+@cache
+def get_builtin_scenarios() -> list[str]:
+    """Returns list of builtin scenario names."""
+    return [p.stem for p in SCENARIO_DIR.glob("*.json")]
+def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
+    """
+    Parse a comma separated string to a list of float
+    or convert single float list of one or pass float
+    list through.
+    """
+    if isinstance(value, (int, float)):
+        return [value]
+    elif isinstance(value, list):
+        return value
+    values = value.split(",") if "," in value else [value]
+    try:
+        return [float(val) for val in values]
+    except ValueError as err:
+        raise ValueError(
+            "must be a number or comma-separated list of numbers."
+        ) from err
+T = TypeVar("T", bound="Scenario")
+class Scenario(StandardBaseModel):
+    """
+    Parent Scenario class with common options for all benchmarking types.
+    """
+    target: str
+    @classmethod
+    def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
+        filename = SCENARIO_DIR / f"{name}.json"
+        if not filename.is_file():
+            raise ValueError(f"{name} is not a valid builtin scenario")
+        return cls.from_file(filename, overrides)
+class GenerativeTextScenario(Scenario):
+    """
+    Scenario class for generative text benchmarks.
+    """
+    class Config:
+        # NOTE: This prevents errors due to unvalidatable
+        # types like PreTrainedTokenizerBase
+        arbitrary_types_allowed = True
+    backend_type: BackendType = "openai_http"
+    backend_args: Optional[dict[str, Any]] = None
+    model: Optional[str] = None
+    processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
+    processor_args: Optional[dict[str, Any]] = None
+    data: Union[
+        str,
+        Path,
+        Iterable[Union[str, dict[str, Any]]],
+        Dataset,
+        DatasetDict,
+        IterableDataset,
+        IterableDatasetDict,
+    ]
+    data_args: Optional[dict[str, Any]] = None
+    data_sampler: Optional[Literal["random"]] = None
+    rate_type: Union[StrategyType, ProfileType]
+    rate: Annotated[
+        Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
+    ] = None
+    max_seconds: Optional[PositiveFloat] = None
+    max_requests: Optional[PositiveInt] = None
+    warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
+    cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
+    output_sampling: Optional[NonNegativeInt] = None
+    random_seed: int = 42

guidellm-0.3.0a26/src/guidellm/benchmark/scenarios/__init__.py ADDED Viewed

File without changes

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm/objects/pydantic.py RENAMED Viewed

@@ -1,10 +1,15 @@
-from typing import Any, Generic, TypeVar
+import json
+from pathlib import Path
+from typing import Any, Generic, Optional, TypeVar
+import yaml
 from loguru import logger
 from pydantic import BaseModel, ConfigDict, Field
 __all__ = ["StandardBaseModel", "StatusBreakdown"]
+T = TypeVar("T", bound="StandardBaseModel")
 class StandardBaseModel(BaseModel):
     """
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
             data,
         )
+    @classmethod
+    def get_default(cls: type[T], field: str) -> Any:
+        """Get default values for model fields"""
+        return cls.model_fields[field].default
+    @classmethod
+    def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
+        """
+        Attempt to create a new instance of the model using
+        data loaded from json or yaml file.
+        """
+        try:
+            with filename.open() as f:
+                if str(filename).endswith(".json"):
+                    data = json.load(f)
+                else:  # Assume everything else is yaml
+                    data = yaml.safe_load(f)
+        except (json.JSONDecodeError, yaml.YAMLError) as e:
+            logger.error(f"Failed to parse {filename} as type {cls.__name__}")
+            raise ValueError(f"Error when parsing file: {filename}") from e
+        data.update(overrides)
+        return cls.model_validate(data)
 SuccessfulT = TypeVar("SuccessfulT")
 ErroredT = TypeVar("ErroredT")

guidellm-0.3.0a26/src/guidellm/utils/cli.py ADDED Viewed

@@ -0,0 +1,62 @@
+import json
+from typing import Any
+import click
+def parse_json(ctx, param, value):  # noqa: ARG001
+    if value is None:
+        return None
+    try:
+        return json.loads(value)
+    except json.JSONDecodeError as err:
+        raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
+def set_if_not_default(ctx: click.Context, **kwargs) -> dict[str, Any]:
+    """
+    Set the value of a click option if it is not the default value.
+    This is useful for setting options that are not None by default.
+    """
+    values = {}
+    for k, v in kwargs.items():
+        if ctx.get_parameter_source(k) != click.core.ParameterSource.DEFAULT:  # type: ignore[attr-defined]
+            values[k] = v
+    return values
+class Union(click.ParamType):
+    """
+    A custom click parameter type that allows for multiple types to be accepted.
+    """
+    def __init__(self, *types: click.ParamType):
+        self.types = types
+        self.name = "".join(t.name for t in types)
+    def convert(self, value, param, ctx):
+        fails = []
+        for t in self.types:
+            try:
+                return t.convert(value, param, ctx)
+            except click.BadParameter as e:
+                fails.append(str(e))
+                continue
+        self.fail("; ".join(fails) or f"Invalid value: {value}")  # noqa: RET503
+    def get_metavar(self, param: click.Parameter) -> str:
+        def get_choices(t: click.ParamType) -> str:
+            meta = t.get_metavar(param)
+            return meta if meta is not None else t.name
+        # Get the choices for each type in the union.
+        choices_str = "|".join(map(get_choices, self.types))
+        # Use curly braces to indicate a required argument.
+        if param.required and param.param_type_name == "argument":
+            return f"{{{choices_str}}}"
+        # Use square braces to indicate an option or optional argument.
+        return f"[{choices_str}]"

guidellm-0.3.0a26/src/guidellm/version.py ADDED Viewed

@@ -0,0 +1,6 @@
+version = "0.3.0a26"
+build_type = "nightly"
+build_iteration = "26"
+git_commit = "0e78c65948eab356f2f846a0d5ae609ab650c290"
+git_branch = "main"
+git_last_tag = "v0.2.1"

{guidellm-0.3.0a22 → guidellm-0.3.0a26/src/guidellm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: guidellm
-Version: 0.3.0a22
+Version: 0.3.0a26
 Summary: Guidance platform for deploying and managing large language models.
 Author: Red Hat
 License-Expression: Apache-2.0
@@ -208,6 +208,48 @@ The `guidellm benchmark` command is used to run benchmarks against a generative
 - `--output-path`: Defines the path to save the benchmark results. Supports JSON, YAML, or CSV formats. If a directory is provided, the results will be saved as `benchmarks.json` in that directory. If not set, the results will be saved in the current working directory.
+### GuideLLM UI
+GuideLLM UI is a companion frontend for visualizing the results of a GuideLLM benchmark run.
+### 🛠 Running the UI
+1. Use the Hosted Build (Recommended for Most Users)
+After running a benchmark with GuideLLM, a report.html file will be generated (by default at guidellm_report/report.html). This file references the latest stable version of the UI hosted at:
+```
+https://neuralmagic.github.io/guidellm/ui/dev/
+```
+Open the file in your browser and you're done—no setup required.
+2. Build and Serve the UI Locally (For Development) This option is useful if:
+- You are actively developing the UI
+- You want to test changes to the UI before publishing
+- You want full control over how the report is displayed
+```bash
+npm install
+npm run build
+npx serve out
+```
+This will start a local server (e.g., at http://localhost:3000). Then, in your GuideLLM config or CLI flags, point to this local server as the asset base for report generation.
+### 🧪 Development Notes
+During UI development, it can be helpful to view sample data. We include a sample benchmark run wired into the Redux store under:
+```
+src/lib/store/[runInfo/workloadDetails/benchmarks]WindowData.ts
+```
+In the future this will be replaced by a configurable untracked file for dev use.
 ## Resources
 ### Documentation

{guidellm-0.3.0a22 → guidellm-0.3.0a26}/src/guidellm.egg-info/SOURCES.txt RENAMED Viewed

@@ -26,6 +26,8 @@ src/guidellm/benchmark/entrypoints.py
 src/guidellm/benchmark/output.py
 src/guidellm/benchmark/profile.py
 src/guidellm/benchmark/progress.py
+src/guidellm/benchmark/scenario.py
+src/guidellm/benchmark/scenarios/__init__.py
 src/guidellm/data/__init__.py
 src/guidellm/data/prideandprejudice.txt.gz
 src/guidellm/dataset/__init__.py
@@ -50,6 +52,7 @@ src/guidellm/scheduler/strategy.py
 src/guidellm/scheduler/types.py
 src/guidellm/scheduler/worker.py
 src/guidellm/utils/__init__.py
+src/guidellm/utils/cli.py
 src/guidellm/utils/colors.py
 src/guidellm/utils/hf_datasets.py
 src/guidellm/utils/hf_transformers.py

guidellm-0.3.0a22/src/guidellm/version.py DELETED Viewed

@@ -1,6 +0,0 @@
-version = "0.3.0a22"
-build_type = "nightly"
-build_iteration = "22"
-git_commit = "023c8dd0a4ee9fc1be5ad60b7854f661423eb722"
-git_branch = "main"
-git_last_tag = "v0.2.1"