PyPI - guidellm - Versions diffs - 0.3.0rc20250429__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

guidellm 0.3.0rc20250429py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show

guidellm/__init__.py +8 -13
guidellm/__main__.py +290 -69
guidellm/backend/__init__.py +6 -6
guidellm/backend/backend.py +25 -4
guidellm/backend/openai.py +153 -30
guidellm/backend/response.py +6 -2
guidellm/benchmark/__init__.py +16 -22
guidellm/benchmark/aggregator.py +3 -3
guidellm/benchmark/benchmark.py +11 -12
guidellm/benchmark/benchmarker.py +2 -2
guidellm/benchmark/entrypoints.py +34 -10
guidellm/benchmark/output.py +59 -8
guidellm/benchmark/profile.py +4 -4
guidellm/benchmark/progress.py +2 -2
guidellm/benchmark/scenario.py +104 -0
guidellm/benchmark/scenarios/__init__.py +0 -0
guidellm/config.py +32 -7
guidellm/dataset/__init__.py +4 -4
guidellm/dataset/creator.py +1 -1
guidellm/dataset/synthetic.py +36 -11
guidellm/logger.py +8 -4
guidellm/objects/__init__.py +2 -2
guidellm/objects/pydantic.py +30 -1
guidellm/objects/statistics.py +20 -14
guidellm/preprocess/__init__.py +3 -0
guidellm/preprocess/dataset.py +374 -0
guidellm/presentation/__init__.py +28 -0
guidellm/presentation/builder.py +27 -0
guidellm/presentation/data_models.py +232 -0
guidellm/presentation/injector.py +66 -0
guidellm/request/__init__.py +6 -3
guidellm/request/loader.py +5 -5
guidellm/{scheduler → request}/types.py +4 -1
guidellm/scheduler/__init__.py +10 -15
guidellm/scheduler/queues.py +25 -0
guidellm/scheduler/result.py +21 -3
guidellm/scheduler/scheduler.py +68 -60
guidellm/scheduler/strategy.py +26 -24
guidellm/scheduler/worker.py +64 -103
guidellm/utils/__init__.py +17 -5
guidellm/utils/cli.py +62 -0
guidellm/utils/default_group.py +105 -0
guidellm/utils/dict.py +23 -0
guidellm/utils/hf_datasets.py +36 -0
guidellm/utils/random.py +1 -1
guidellm/utils/text.py +14 -15
guidellm/version.py +6 -0
guidellm-0.3.1.dist-info/METADATA +329 -0
guidellm-0.3.1.dist-info/RECORD +62 -0
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
guidellm-0.3.0rc20250429.dist-info/METADATA +0 -453
guidellm-0.3.0rc20250429.dist-info/RECORD +0 -48
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0

guidellm/backend/openai.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import base64
+import copy
 import json
 import time
 from collections.abc import AsyncGenerator
@@ -17,12 +18,25 @@ from guidellm.backend.response import (
 )
 from guidellm.config import settings
-__all__ = ["OpenAIHTTPBackend", "TEXT_COMPLETIONS_PATH", "CHAT_COMPLETIONS_PATH"]
+__all__ = [
+    "CHAT_COMPLETIONS",
+    "CHAT_COMPLETIONS_PATH",
+    "MODELS",
+    "TEXT_COMPLETIONS",
+    "TEXT_COMPLETIONS_PATH",
+    "OpenAIHTTPBackend",
+]
 TEXT_COMPLETIONS_PATH = "/v1/completions"
 CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
+CompletionEndpointType = Literal["text_completions", "chat_completions"]
+EndpointType = Union[Literal["models"], CompletionEndpointType]
+CHAT_COMPLETIONS: CompletionEndpointType = "chat_completions"
+MODELS: EndpointType = "models"
+TEXT_COMPLETIONS: CompletionEndpointType = "text_completions"
 @Backend.register("openai_http")
 class OpenAIHTTPBackend(Backend):
@@ -49,8 +63,23 @@ class OpenAIHTTPBackend(Backend):
         If not provided, the default timeout provided from settings is used.
     :param http2: If True, uses HTTP/2 for requests to the OpenAI server.
         Defaults to True.
+    :param follow_redirects: If True, the HTTP client will follow redirect responses.
+        If not provided, the default value from settings is used.
     :param max_output_tokens: The maximum number of tokens to request for completions.
         If not provided, the default maximum tokens provided from settings is used.
+    :param extra_query: Query parameters to include in requests to the OpenAI server.
+        If "chat_completions", "models", or "text_completions" are included as keys,
+        the values of these keys will be used as the parameters for the respective
+        endpoint.
+        If not provided, no extra query parameters are added.
+    :param extra_body: Body parameters to include in requests to the OpenAI server.
+        If "chat_completions", "models", or "text_completions" are included as keys,
+        the values of these keys will be included in the body for the respective
+        endpoint.
+        If not provided, no extra body parameters are added.
+    :param remove_from_body: Parameters that should be removed from the body of each
+        request.
+        If not provided, no parameters are removed from the body.
     """
     def __init__(
@@ -62,7 +91,13 @@ class OpenAIHTTPBackend(Backend):
         project: Optional[str] = None,
         timeout: Optional[float] = None,
         http2: Optional[bool] = True,
+        follow_redirects: Optional[bool] = None,
         max_output_tokens: Optional[int] = None,
+        extra_query: Optional[dict] = None,
+        extra_body: Optional[dict] = None,
+        remove_from_body: Optional[list[str]] = None,
+        headers: Optional[dict] = None,
+        verify: Optional[bool] = None,
     ):
         super().__init__(type_="openai_http")
         self._target = target or settings.openai.base_url
@@ -79,20 +114,48 @@ class OpenAIHTTPBackend(Backend):
         self._model = model
+        # Start with default headers based on other params
+        default_headers: dict[str, str] = {}
         api_key = api_key or settings.openai.api_key
-        self.authorization = (
-            f"Bearer {api_key}" if api_key else settings.openai.bearer_token
-        )
+        bearer_token = settings.openai.bearer_token
+        if api_key:
+            default_headers["Authorization"] = f"Bearer {api_key}"
+        elif bearer_token:
+            default_headers["Authorization"] = bearer_token
         self.organization = organization or settings.openai.organization
+        if self.organization:
+            default_headers["OpenAI-Organization"] = self.organization
         self.project = project or settings.openai.project
+        if self.project:
+            default_headers["OpenAI-Project"] = self.project
+        # User-provided headers from kwargs or settings override defaults
+        merged_headers = default_headers.copy()
+        merged_headers.update(settings.openai.headers or {})
+        if headers:
+            merged_headers.update(headers)
+        # Remove headers with None values for backward compatibility and convenience
+        self.headers = {k: v for k, v in merged_headers.items() if v is not None}
         self.timeout = timeout if timeout is not None else settings.request_timeout
         self.http2 = http2 if http2 is not None else settings.request_http2
+        self.follow_redirects = (
+            follow_redirects
+            if follow_redirects is not None
+            else settings.request_follow_redirects
+        )
+        self.verify = verify if verify is not None else settings.openai.verify
         self.max_output_tokens = (
             max_output_tokens
             if max_output_tokens is not None
             else settings.openai.max_output_tokens
         )
+        self.extra_query = extra_query
+        self.extra_body = extra_body
+        self.remove_from_body = remove_from_body
         self._async_client: Optional[httpx.AsyncClient] = None
     @property
@@ -120,13 +183,21 @@ class OpenAIHTTPBackend(Backend):
             "max_output_tokens": self.max_output_tokens,
             "timeout": self.timeout,
             "http2": self.http2,
-            "authorization": bool(self.authorization),
-            "organization": self.organization,
-            "project": self.project,
+            "follow_redirects": self.follow_redirects,
+            "headers": self.headers,
             "text_completions_path": TEXT_COMPLETIONS_PATH,
             "chat_completions_path": CHAT_COMPLETIONS_PATH,
         }
+    async def reset(self) -> None:
+        """
+        Reset the connection object. This is useful for backends that
+        reuse connections or have state that needs to be cleared.
+        For this backend, it closes the async client if it exists.
+        """
+        if self._async_client is not None:
+            await self._async_client.aclose()
     async def check_setup(self):
         """
         Check if the backend is setup correctly and can be used for requests.
@@ -165,7 +236,10 @@ class OpenAIHTTPBackend(Backend):
         """
         target = f"{self.target}/v1/models"
         headers = self._headers()
-        response = await self._get_async_client().get(target, headers=headers)
+        params = self._params(MODELS)
+        response = await self._get_async_client().get(
+            target, headers=headers, params=params
+        )
         response.raise_for_status()
         models = []
@@ -210,7 +284,9 @@ class OpenAIHTTPBackend(Backend):
             )
         headers = self._headers()
+        params = self._params(TEXT_COMPLETIONS)
         payload = self._completions_payload(
+            endpoint_type=TEXT_COMPLETIONS,
             orig_kwargs=kwargs,
             max_output_tokens=output_token_count,
             prompt=prompt,
@@ -223,14 +299,16 @@ class OpenAIHTTPBackend(Backend):
                 request_prompt_tokens=prompt_token_count,
                 request_output_tokens=output_token_count,
                 headers=headers,
+                params=params,
                 payload=payload,
             ):
                 yield resp
         except Exception as ex:
             logger.error(
-                "{} request with headers: {} and payload: {} failed: {}",
+                "{} request with headers: {} and params: {} and payload: {} failed: {}",
                 self.__class__.__name__,
                 headers,
+                params,
                 payload,
                 ex,
             )
@@ -282,10 +360,12 @@ class OpenAIHTTPBackend(Backend):
         """
         logger.debug("{} invocation with args: {}", self.__class__.__name__, locals())
         headers = self._headers()
+        params = self._params(CHAT_COMPLETIONS)
         messages = (
             content if raw_content else self._create_chat_messages(content=content)
         )
         payload = self._completions_payload(
+            endpoint_type=CHAT_COMPLETIONS,
             orig_kwargs=kwargs,
             max_output_tokens=output_token_count,
             messages=messages,
@@ -298,14 +378,16 @@ class OpenAIHTTPBackend(Backend):
                 request_prompt_tokens=prompt_token_count,
                 request_output_tokens=output_token_count,
                 headers=headers,
+                params=params,
                 payload=payload,
             ):
                 yield resp
         except Exception as ex:
             logger.error(
-                "{} request with headers: {} and payload: {} failed: {}",
+                "{} request with headers: {} and params: {} and payload: {} failed: {}",
                 self.__class__.__name__,
                 headers,
+                params,
                 payload,
                 ex,
             )
@@ -318,8 +400,13 @@ class OpenAIHTTPBackend(Backend):
         :return: The async HTTP client.
         """
-        if self._async_client is None:
-            client = httpx.AsyncClient(http2=self.http2, timeout=self.timeout)
+        if self._async_client is None or self._async_client.is_closed:
+            client = httpx.AsyncClient(
+                http2=self.http2,
+                timeout=self.timeout,
+                follow_redirects=self.follow_redirects,
+                verify=self.verify,
+            )
             self._async_client = client
         else:
             client = self._async_client
@@ -330,22 +417,44 @@ class OpenAIHTTPBackend(Backend):
         headers = {
             "Content-Type": "application/json",
         }
+        headers.update(self.headers)
+        return headers
-        if self.authorization:
-            headers["Authorization"] = self.authorization
+    def _params(self, endpoint_type: EndpointType) -> dict[str, str]:
+        if self.extra_query is None:
+            return {}
-        if self.organization:
-            headers["OpenAI-Organization"] = self.organization
+        if (
+            CHAT_COMPLETIONS in self.extra_query
+            or MODELS in self.extra_query
+            or TEXT_COMPLETIONS in self.extra_query
+        ):
+            return self.extra_query.get(endpoint_type, {})
-        if self.project:
-            headers["OpenAI-Project"] = self.project
+        return self.extra_query
-        return headers
+    def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
+        if self.extra_body is None:
+            return {}
+        if (
+            CHAT_COMPLETIONS in self.extra_body
+            or MODELS in self.extra_body
+            or TEXT_COMPLETIONS in self.extra_body
+        ):
+            return copy.deepcopy(self.extra_body.get(endpoint_type, {}))
+        return copy.deepcopy(self.extra_body)
     def _completions_payload(
-        self, orig_kwargs: Optional[dict], max_output_tokens: Optional[int], **kwargs
+        self,
+        endpoint_type: CompletionEndpointType,
+        orig_kwargs: Optional[dict],
+        max_output_tokens: Optional[int],
+        **kwargs,
     ) -> dict:
-        payload = orig_kwargs or {}
+        payload = self._extra_body(endpoint_type)
+        payload.update(orig_kwargs or {})
         payload.update(kwargs)
         payload["model"] = self.model
         payload["stream"] = True
@@ -359,8 +468,10 @@ class OpenAIHTTPBackend(Backend):
                 self.__class__.__name__,
                 max_output_tokens or self.max_output_tokens,
             )
-            payload["max_tokens"] = max_output_tokens or self.max_output_tokens
-            payload["max_completion_tokens"] = payload["max_tokens"]
+            max_output_key = settings.openai.max_output_key.get(
+                endpoint_type, "max_tokens"
+            )
+            payload[max_output_key] = max_output_tokens or self.max_output_tokens
             if max_output_tokens:
                 # only set stop and ignore_eos if max_output_tokens set at request level
@@ -368,6 +479,10 @@ class OpenAIHTTPBackend(Backend):
                 payload["stop"] = None
                 payload["ignore_eos"] = True
+        if self.remove_from_body:
+            for key in self.remove_from_body:
+                payload.pop(key, None)
         return payload
     @staticmethod
@@ -438,8 +553,9 @@ class OpenAIHTTPBackend(Backend):
         request_id: Optional[str],
         request_prompt_tokens: Optional[int],
         request_output_tokens: Optional[int],
-        headers: dict,
-        payload: dict,
+        headers: dict[str, str],
+        params: dict[str, str],
+        payload: dict[str, Any],
     ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
         if type_ == "text_completions":
             target = f"{self.target}{TEXT_COMPLETIONS_PATH}"
@@ -449,14 +565,17 @@ class OpenAIHTTPBackend(Backend):
             raise ValueError(f"Unsupported type: {type_}")
         logger.info(
-            "{} making request: {} to target: {} using http2: {} for "
-            "timeout: {} with headers: {} and payload: {}",
+            "{} making request: {} to target: {} using http2: {} following "
+            "redirects: {} for timeout: {} with headers: {} and params: {} and ",
+            "payload: {}",
             self.__class__.__name__,
             request_id,
             target,
             self.http2,
+            self.follow_redirects,
             self.timeout,
             headers,
+            params,
             payload,
         )
@@ -484,7 +603,7 @@ class OpenAIHTTPBackend(Backend):
         start_time = time.time()
         async with self._get_async_client().stream(
-            "POST", target, headers=headers, json=payload
+            "POST", target, headers=headers, params=params, json=payload
         ) as stream:
             stream.raise_for_status()
@@ -528,10 +647,12 @@ class OpenAIHTTPBackend(Backend):
                     response_output_count = usage["output"]
         logger.info(
-            "{} request: {} with headers: {} and payload: {} completed with: {}",
+            "{} request: {} with headers: {} and params: {} and payload: {} completed"
+            "with: {}",
             self.__class__.__name__,
             request_id,
             headers,
+            params,
             payload,
             response_value,
         )
@@ -541,9 +662,11 @@ class OpenAIHTTPBackend(Backend):
             request_args=RequestArgs(
                 target=target,
                 headers=headers,
+                params=params,
                 payload=payload,
                 timeout=self.timeout,
                 http2=self.http2,
+                follow_redirects=self.follow_redirects,
             ),
             start_time=start_time,
             end_time=iter_time,
@@ -568,7 +691,7 @@ class OpenAIHTTPBackend(Backend):
             return data["choices"][0]["text"]
         if type_ == "chat_completions":
-            return data["choices"][0]["delta"]["content"]
+            return data.get("choices", [{}])[0].get("delta", {}).get("content")
         raise ValueError(f"Unsupported type: {type_}")

guidellm/backend/response.py CHANGED Viewed

@@ -6,10 +6,10 @@ from guidellm.config import settings
 from guidellm.objects.pydantic import StandardBaseModel
 __all__ = [
-    "StreamingResponseType",
-    "StreamingTextResponse",
     "RequestArgs",
     "ResponseSummary",
+    "StreamingResponseType",
+    "StreamingTextResponse",
 ]
@@ -48,17 +48,21 @@ class RequestArgs(StandardBaseModel):
     :param target: The target URL or function for the request.
     :param headers: The headers, if any, included in the request such as authorization.
+    :param params: The query parameters, if any, included in the request.
     :param payload: The payload / arguments for the request including the prompt /
         content and other configurations.
     :param timeout: The timeout for the request in seconds, if any.
     :param http2: Whether HTTP/2 was used for the request, if applicable.
+    :param follow_redirects: Whether the request should follow redirect responses.
     """
     target: str
     headers: dict[str, str]
+    params: dict[str, str]
     payload: dict[str, Any]
     timeout: Optional[float] = None
     http2: Optional[bool] = None
+    follow_redirects: Optional[bool] = None
 class ResponseSummary(StandardBaseModel):

guidellm/benchmark/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ from .benchmark import (
     StatusBreakdown,
 )
 from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
-from .entrypoints import benchmark_generative_text
+from .entrypoints import benchmark_generative_text, reimport_benchmarks_report
 from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
 from .profile import (
     AsyncProfile,
@@ -32,42 +32,36 @@ from .progress import (
 )
 __all__ = [
-    # Aggregator
     "AggregatorT",
-    "BenchmarkAggregator",
-    "GenerativeBenchmarkAggregator",
-    # Benchmark
+    "AsyncProfile",
     "Benchmark",
+    "BenchmarkAggregator",
     "BenchmarkArgs",
     "BenchmarkMetrics",
     "BenchmarkRunStats",
     "BenchmarkT",
-    "GenerativeBenchmark",
-    "GenerativeMetrics",
-    "GenerativeTextErrorStats",
-    "GenerativeTextResponseStats",
-    "StatusBreakdown",
-    # Benchmarker
     "Benchmarker",
+    "BenchmarkerProgressDisplay",
     "BenchmarkerResult",
+    "BenchmarkerTaskProgressState",
+    "ConcurrentProfile",
+    "GenerativeBenchmark",
+    "GenerativeBenchmarkAggregator",
     "GenerativeBenchmarker",
-    # Entry points
-    "benchmark_generative_text",
-    # Output
     "GenerativeBenchmarksConsole",
     "GenerativeBenchmarksReport",
-    # Profile
-    "AsyncProfile",
-    "ConcurrentProfile",
+    "GenerativeMetrics",
+    "GenerativeTextBenchmarkerProgressDisplay",
+    "GenerativeTextBenchmarkerTaskProgressState",
+    "GenerativeTextErrorStats",
+    "GenerativeTextResponseStats",
     "Profile",
     "ProfileType",
+    "StatusBreakdown",
     "SweepProfile",
     "SynchronousProfile",
     "ThroughputProfile",
+    "benchmark_generative_text",
     "create_profile",
-    # Progress
-    "BenchmarkerProgressDisplay",
-    "BenchmarkerTaskProgressState",
-    "GenerativeTextBenchmarkerProgressDisplay",
-    "GenerativeTextBenchmarkerTaskProgressState",
+    "reimport_benchmarks_report",
 ]

guidellm/benchmark/aggregator.py CHANGED Viewed

@@ -32,11 +32,11 @@ from guidellm.request import (
     GenerationRequest,
     GenerativeRequestLoaderDescription,
     RequestLoaderDescription,
+    RequestT,
+    ResponseT,
 )
 from guidellm.scheduler import (
     GenerativeRequestsWorkerDescription,
-    RequestT,
-    ResponseT,
     SchedulerRequestResult,
     WorkerDescription,
 )
@@ -403,7 +403,7 @@ class BenchmarkAggregator(
         in_warmup_duration = (
             self.args.warmup_duration
             and result.request_info.worker_start
-            <= (global_start_time - self.args.warmup_duration)
+            <= (global_start_time + self.args.warmup_duration)
         )
         if in_warmup_number or in_warmup_duration:

guidellm/benchmark/benchmark.py CHANGED Viewed

@@ -34,16 +34,16 @@ from guidellm.scheduler import (
 )
 __all__ = [
-    "BenchmarkT",
-    "StatusBreakdown",
-    "BenchmarkArgs",
-    "BenchmarkRunStats",
     "Benchmark",
+    "BenchmarkArgs",
     "BenchmarkMetrics",
-    "GenerativeTextResponseStats",
-    "GenerativeTextErrorStats",
-    "GenerativeMetrics",
+    "BenchmarkRunStats",
+    "BenchmarkT",
     "GenerativeBenchmark",
+    "GenerativeMetrics",
+    "GenerativeTextErrorStats",
+    "GenerativeTextResponseStats",
+    "StatusBreakdown",
 ]
@@ -815,12 +815,11 @@ class GenerativeBenchmark(Benchmark):
                         req.first_token_time or req.start_time
                         for req in total_with_output_first
                     ],
-                    iter_counts=[
-                        req.prompt_tokens + req.output_tokens
-                        for req in total_with_output_first
-                    ],
+                    iter_counts=[req.output_tokens for req in total_with_output_first],
                     first_iter_counts=[
-                        req.prompt_tokens for req in total_with_output_first
+                        # prompt tokens + first token
+                        req.prompt_tokens + 1
+                        for req in total_with_output_first
                     ],
                 ),
             ),

guidellm/benchmark/benchmarker.py CHANGED Viewed

@@ -27,12 +27,12 @@ from guidellm.request import (
     GenerationRequest,
     GenerativeRequestLoaderDescription,
     RequestLoaderDescription,
+    RequestT,
+    ResponseT,
 )
 from guidellm.scheduler import (
     GenerativeRequestsWorker,
     RequestsWorker,
-    RequestT,
-    ResponseT,
     Scheduler,
     SchedulerRequestResult,
     SchedulingStrategy,

guidellm/benchmark/entrypoints.py CHANGED Viewed

@@ -15,10 +15,22 @@ from guidellm.benchmark.output import (
 )
 from guidellm.benchmark.profile import ProfileType, create_profile
 from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
+from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
 from guidellm.request import GenerativeRequestLoader
 from guidellm.scheduler import StrategyType
+async def benchmark_with_scenario(scenario: Scenario, **kwargs):
+    """
+    Run a benchmark using a scenario and specify any extra arguments
+    """
+    if isinstance(scenario, GenerativeTextScenario):
+        return await benchmark_generative_text(**vars(scenario), **kwargs)
+    else:
+        raise ValueError(f"Unsupported Scenario type {type(scenario)}")
 async def benchmark_generative_text(
     target: str,
     backend_type: BackendType,
@@ -38,18 +50,18 @@ async def benchmark_generative_text(
     data_args: Optional[dict[str, Any]],
     data_sampler: Optional[Literal["random"]],
     rate_type: Union[StrategyType, ProfileType],
-    rate: Optional[Union[int, float, list[Union[int, float]]]],
+    rate: Optional[Union[float, list[float]]],
     max_seconds: Optional[float],
     max_requests: Optional[int],
     warmup_percent: Optional[float],
     cooldown_percent: Optional[float],
-    show_progress: bool,
-    show_progress_scheduler_stats: bool,
-    output_console: bool,
     output_path: Optional[Union[str, Path]],
     output_extras: Optional[dict[str, Any]],
     output_sampling: Optional[int],
     random_seed: int,
+    show_progress: bool = True,
+    show_progress_scheduler_stats: bool = False,
+    output_console: bool = True,
 ) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
     console = GenerativeBenchmarksConsole(enabled=show_progress)
     console.print_line("Creating backend...")
@@ -121,13 +133,8 @@ async def benchmark_generative_text(
             )
     if output_console:
-        orig_enabled = console.enabled
-        console.enabled = True
         console.benchmarks = report.benchmarks
-        console.print_benchmarks_metadata()
-        console.print_benchmarks_info()
-        console.print_benchmarks_stats()
-        console.enabled = orig_enabled
+        console.print_full_report()
     if output_path:
         console.print_line("\nSaving benchmarks report...")
@@ -139,3 +146,20 @@ async def benchmark_generative_text(
     console.print_line("\nBenchmarking complete.")
     return report, saved_path
+def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None:
+    """
+    The command-line entry point for re-importing and displaying an
+    existing benchmarks report. Can also specify
+    Assumes the file provided exists.
+    """
+    console = GenerativeBenchmarksConsole(enabled=True)
+    report = GenerativeBenchmarksReport.load_file(file)
+    console.benchmarks = report.benchmarks
+    console.print_full_report()
+    if output_path:
+        console.print_line("\nSaving benchmarks report...")
+        saved_path = report.save_file(output_path)
+        console.print_line(f"Benchmarks report saved to {saved_path}")

guidellm 0.3.0rc20250429__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

guidellm 0.3.0rc20250429py3-none-any.whl → 0.3.1py3-none-any.whl