PyPI - guidellm - Versions diffs - 0.3.0rc20250429__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

guidellm 0.3.0rc20250429py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show

guidellm/__init__.py +8 -13
guidellm/__main__.py +290 -69
guidellm/backend/__init__.py +6 -6
guidellm/backend/backend.py +25 -4
guidellm/backend/openai.py +153 -30
guidellm/backend/response.py +6 -2
guidellm/benchmark/__init__.py +16 -22
guidellm/benchmark/aggregator.py +3 -3
guidellm/benchmark/benchmark.py +11 -12
guidellm/benchmark/benchmarker.py +2 -2
guidellm/benchmark/entrypoints.py +34 -10
guidellm/benchmark/output.py +59 -8
guidellm/benchmark/profile.py +4 -4
guidellm/benchmark/progress.py +2 -2
guidellm/benchmark/scenario.py +104 -0
guidellm/benchmark/scenarios/__init__.py +0 -0
guidellm/config.py +32 -7
guidellm/dataset/__init__.py +4 -4
guidellm/dataset/creator.py +1 -1
guidellm/dataset/synthetic.py +36 -11
guidellm/logger.py +8 -4
guidellm/objects/__init__.py +2 -2
guidellm/objects/pydantic.py +30 -1
guidellm/objects/statistics.py +20 -14
guidellm/preprocess/__init__.py +3 -0
guidellm/preprocess/dataset.py +374 -0
guidellm/presentation/__init__.py +28 -0
guidellm/presentation/builder.py +27 -0
guidellm/presentation/data_models.py +232 -0
guidellm/presentation/injector.py +66 -0
guidellm/request/__init__.py +6 -3
guidellm/request/loader.py +5 -5
guidellm/{scheduler → request}/types.py +4 -1
guidellm/scheduler/__init__.py +10 -15
guidellm/scheduler/queues.py +25 -0
guidellm/scheduler/result.py +21 -3
guidellm/scheduler/scheduler.py +68 -60
guidellm/scheduler/strategy.py +26 -24
guidellm/scheduler/worker.py +64 -103
guidellm/utils/__init__.py +17 -5
guidellm/utils/cli.py +62 -0
guidellm/utils/default_group.py +105 -0
guidellm/utils/dict.py +23 -0
guidellm/utils/hf_datasets.py +36 -0
guidellm/utils/random.py +1 -1
guidellm/utils/text.py +14 -15
guidellm/version.py +6 -0
guidellm-0.3.1.dist-info/METADATA +329 -0
guidellm-0.3.1.dist-info/RECORD +62 -0
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
guidellm-0.3.0rc20250429.dist-info/METADATA +0 -453
guidellm-0.3.0rc20250429.dist-info/RECORD +0 -48
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.0rc20250429.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0

guidellm/presentation/injector.py ADDED Viewed

@@ -0,0 +1,66 @@
+import re
+from pathlib import Path
+from typing import Union
+from loguru import logger
+from guidellm.config import settings
+from guidellm.utils.text import load_text
+def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
+    """
+    Creates a report from the dictionary and saves it to the output path.
+    :param js_data: dict with match str and json data to inject
+    :type js_data: dict
+    :param output_path: the file to save the report to.
+    :type output_path: str
+    :return: the path to the saved report
+    :rtype: str
+    """
+    if not isinstance(output_path, Path):
+        output_path = Path(output_path)
+    html_content = load_text(settings.report_generation.source)
+    report_content = inject_data(
+        js_data,
+        html_content,
+    )
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(report_content)
+    return output_path
+def inject_data(
+    js_data: dict,
+    html: str,
+) -> str:
+    """
+    Injects the json data into the HTML,
+    replacing placeholders only within the <head> section.
+    :param js_data: the json data to inject
+    :type js_data: dict
+    :param html: the html to inject the data into
+    :type html: str
+    :return: the html with the json data injected
+    :rtype: str
+    """
+    head_match = re.search(r"<head[^>]*>(.*?)</head>", html, re.DOTALL | re.IGNORECASE)
+    if not head_match:
+        logger.warning("<head> section missing, returning original HTML.")
+        return html
+    head_content = head_match.group(1)
+    # Replace placeholders only inside the <head> content
+    for placeholder, script in js_data.items():
+        head_content = head_content.replace(placeholder, script)
+    # Rebuild the HTML
+    new_head = f"<head>{head_content}</head>"
+    return html[: head_match.start()] + new_head + html[head_match.end() :]

guidellm/request/__init__.py CHANGED Viewed

@@ -5,11 +5,14 @@ from .loader import (
     RequestLoaderDescription,
 )
 from .request import GenerationRequest
+from .types import RequestT, ResponseT
 __all__ = [
+    "GenerationRequest",
+    "GenerativeRequestLoader",
+    "GenerativeRequestLoaderDescription",
     "RequestLoader",
     "RequestLoaderDescription",
-    "GenerativeRequestLoaderDescription",
-    "GenerativeRequestLoader",
-    "GenerationRequest",
+    "RequestT",
+    "ResponseT",
 ]

guidellm/request/loader.py CHANGED Viewed

@@ -17,10 +17,10 @@ from guidellm.objects import StandardBaseModel
 from guidellm.request.request import GenerationRequest
 __all__ = [
-    "RequestLoaderDescription",
-    "RequestLoader",
-    "GenerativeRequestLoaderDescription",
     "GenerativeRequestLoader",
+    "GenerativeRequestLoaderDescription",
+    "RequestLoader",
+    "RequestLoaderDescription",
 ]
@@ -30,10 +30,10 @@ class RequestLoaderDescription(StandardBaseModel):
 class RequestLoader(Iterable):
     @abstractmethod
-    def __iter__(self): ...
+    def __iter__(self) -> Iterator: ...
     @abstractmethod
-    def __len__(self): ...
+    def __len__(self) -> int: ...
     @property
     @abstractmethod

guidellm/{scheduler → request}/types.py RENAMED Viewed

@@ -1,6 +1,9 @@
 from typing import TypeVar
-__all__ = ["RequestT", "ResponseT"]
+__all__ = [
+    "RequestT",
+    "ResponseT",
+]
 RequestT = TypeVar("RequestT")

guidellm/scheduler/__init__.py CHANGED Viewed

@@ -15,38 +15,33 @@ from .strategy import (
     ThroughputStrategy,
     strategy_display_str,
 )
-from .types import RequestT, ResponseT
 from .worker import (
     GenerativeRequestsWorker,
     GenerativeRequestsWorkerDescription,
     RequestsWorker,
     ResolveStatus,
     WorkerDescription,
-    WorkerProcessRequest,
     WorkerProcessResult,
 )
 __all__ = [
+    "AsyncConstantStrategy",
+    "AsyncPoissonStrategy",
+    "ConcurrentStrategy",
+    "GenerativeRequestsWorker",
+    "GenerativeRequestsWorkerDescription",
+    "RequestsWorker",
+    "ResolveStatus",
+    "Scheduler",
     "SchedulerRequestInfo",
     "SchedulerRequestResult",
     "SchedulerResult",
     "SchedulerRunInfo",
-    "Scheduler",
-    "AsyncConstantStrategy",
-    "AsyncPoissonStrategy",
-    "ConcurrentStrategy",
     "SchedulingStrategy",
     "StrategyType",
     "SynchronousStrategy",
     "ThroughputStrategy",
-    "strategy_display_str",
-    "RequestT",
-    "ResponseT",
-    "WorkerProcessRequest",
-    "WorkerProcessResult",
-    "ResolveStatus",
     "WorkerDescription",
-    "RequestsWorker",
-    "GenerativeRequestsWorkerDescription",
-    "GenerativeRequestsWorker",
+    "WorkerProcessResult",
+    "strategy_display_str",
 ]

guidellm/scheduler/queues.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""
+Helper module for importing the correct queue types.
+"""
+from dataclasses import dataclass
+from queue import Empty as QueueEmpty
+from queue import Full as QueueFull
+from queue import Queue
+from typing import Generic
+from guidellm.request.types import RequestT, ResponseT
+from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult
+__all__ = [
+    "MPQueues",
+    "Queue",
+    "QueueEmpty",
+    "QueueFull",
+]
+@dataclass
+class MPQueues(Generic[RequestT, ResponseT]):
+    requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
+    responses: Queue[WorkerProcessResult[RequestT, ResponseT]]

guidellm/scheduler/result.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from typing import (
     Generic,
     Literal,
@@ -5,14 +6,16 @@ from typing import (
 )
 from guidellm.objects import StandardBaseModel
+from guidellm.request.types import RequestT, ResponseT
 from guidellm.scheduler.strategy import SchedulingStrategy
-from guidellm.scheduler.types import RequestT, ResponseT
 __all__ = [
-    "SchedulerResult",
+    "SchedulerRequestInfo",
     "SchedulerRequestResult",
+    "SchedulerResult",
     "SchedulerRunInfo",
-    "SchedulerRequestInfo",
+    "WorkerProcessRequest",
+    "WorkerProcessResult",
 ]
@@ -135,3 +138,18 @@ class SchedulerRequestResult(
     request: RequestT
     request_info: SchedulerRequestInfo
     response: Optional[ResponseT] = None
+@dataclass
+class WorkerProcessRequest(Generic[RequestT, ResponseT]):
+    request: RequestT
+    timeout_time: float
+    queued_time: float
+@dataclass
+class WorkerProcessResult(Generic[RequestT, ResponseT]):
+    type_: Literal["request_scheduled", "request_start", "request_complete"]
+    request: RequestT
+    response: Optional[ResponseT]
+    info: SchedulerRequestInfo

guidellm/scheduler/scheduler.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import asyncio
 import math
-import multiprocessing
-import multiprocessing.queues
 import time
 from collections.abc import AsyncGenerator, Iterable, Iterator
 from concurrent.futures import ProcessPoolExecutor
+from multiprocessing import Manager
+from threading import Event
 from typing import (
     Any,
     Generic,
@@ -15,17 +15,21 @@ from typing import (
 from loguru import logger
 from guidellm.config import settings
+from guidellm.request.types import (
+    RequestT,
+    ResponseT,
+)
+from guidellm.scheduler.queues import MPQueues, Queue, QueueEmpty
 from guidellm.scheduler.result import (
     SchedulerRequestResult,
     SchedulerResult,
     SchedulerRunInfo,
+    WorkerProcessRequest,
+    WorkerProcessResult,
 )
 from guidellm.scheduler.strategy import SchedulingStrategy
-from guidellm.scheduler.types import RequestT, ResponseT
 from guidellm.scheduler.worker import (
     RequestsWorker,
-    WorkerProcessRequest,
-    WorkerProcessResult,
 )
 __all__ = ["Scheduler"]
@@ -114,18 +118,31 @@ class Scheduler(Generic[RequestT, ResponseT]):
             raise ValueError(f"Invalid max_duration: {max_duration}")
         with (
-            multiprocessing.Manager() as manager,
+            Manager() as manager,
             ProcessPoolExecutor(
                 max_workers=scheduling_strategy.processes_limit
             ) as executor,
         ):
             requests_iter: Optional[Iterator[Any]] = None
-            futures, requests_queue, responses_queue = await self._start_processes(
+            scheduling_strategy.start_time = (
+                time.time() + settings.scheduler_start_delay
+            )  # Add a small delay to allow processes to start
+            futures, queues, stop_event = await self._start_processes(
                 manager, executor, scheduling_strategy
             )
             run_info, requests_iter, times_iter = self._run_setup(
                 futures, scheduling_strategy, max_number, max_duration
             )
+            # Add some initial requests to the queue
+            requests_iter = self._add_requests(
+                requests_iter,
+                queues.requests,
+                times_iter,
+                run_info,
+            )
+            # Wait for the test to start
+            await asyncio.sleep(time.time() - scheduling_strategy.start_time)
             yield SchedulerResult(
                 type_="run_start",
                 run_info=run_info,
@@ -140,7 +157,11 @@ class Scheduler(Generic[RequestT, ResponseT]):
                     if (
                         requests_iter is None
-                        and run_info.completed_requests >= run_info.created_requests
+                        and run_info.processing_requests <= 0
+                        and (  # Ensure we have met one of the end conditions
+                            time.time() >= run_info.end_time
+                            or run_info.completed_requests >= run_info.end_number
+                        )
                     ):
                         # we've exhausted all requests we've wanted to run
                         # and yielded all responses
@@ -148,14 +169,14 @@ class Scheduler(Generic[RequestT, ResponseT]):
                     requests_iter = self._add_requests(
                         requests_iter,
+                        queues.requests,
                         times_iter,
-                        requests_queue,
                         run_info,
                     )
                     await asyncio.sleep(0)  # enable requests to start
                     iter_result = self._check_result_ready(
-                        responses_queue,
+                        queues.responses,
                         run_info,
                     )
                     if iter_result is not None:
@@ -171,7 +192,7 @@ class Scheduler(Generic[RequestT, ResponseT]):
                 run_info=run_info,
             )
-            await self._stop_processes(futures, requests_queue)
+            await self._stop_processes(futures, stop_event)
     async def _start_processes(
         self,
@@ -180,14 +201,17 @@ class Scheduler(Generic[RequestT, ResponseT]):
         scheduling_strategy: SchedulingStrategy,
     ) -> tuple[
         list[asyncio.Future],
-        multiprocessing.Queue,
-        multiprocessing.Queue,
+        MPQueues[RequestT, ResponseT],
+        Event,
     ]:
         await self.worker.prepare_multiprocessing()
-        requests_queue = manager.Queue(
-            maxsize=scheduling_strategy.queued_requests_limit
+        queues: MPQueues[RequestT, ResponseT] = MPQueues(
+            requests=manager.Queue(
+                maxsize=scheduling_strategy.processing_requests_limit
+            ),
+            responses=manager.Queue(),
         )
-        responses_queue = manager.Queue()
+        stop_event = manager.Event()
         num_processes = min(
             scheduling_strategy.processes_limit,
@@ -212,36 +236,22 @@ class Scheduler(Generic[RequestT, ResponseT]):
         futures = []
         loop = asyncio.get_event_loop()
         for id_, requests_limit in zip(process_ids, process_requests_limits):
-            if scheduling_strategy.processing_mode == "sync":
-                futures.append(
-                    loop.run_in_executor(
-                        executor,
-                        self.worker.process_loop_synchronous,
-                        requests_queue,
-                        responses_queue,
-                        id_,
-                    )
-                )
-            elif scheduling_strategy.processing_mode == "async":
-                futures.append(
-                    loop.run_in_executor(
-                        executor,
-                        self.worker.process_loop_asynchronous,
-                        requests_queue,
-                        responses_queue,
-                        requests_limit,
-                        id_,
-                    )
-                )
-            else:
-                raise ValueError(
-                    f"Invalid processing mode: {scheduling_strategy.processing_mode} "
-                    f"for strategy: {scheduling_strategy}"
+            futures.append(
+                loop.run_in_executor(
+                    executor,
+                    self.worker.process_loop_asynchronous,
+                    queues,
+                    scheduling_strategy,
+                    stop_event,
+                    requests_limit,
+                    id_,
+                    num_processes,
                 )
+            )
         await asyncio.sleep(0.1)  # give time for processes to start
-        return futures, requests_queue, responses_queue
+        return futures, queues, stop_event
     def _run_setup(
         self,
@@ -251,9 +261,8 @@ class Scheduler(Generic[RequestT, ResponseT]):
         max_duration: Optional[float],
     ) -> tuple[SchedulerRunInfo, Iterator[Any], Iterator[float]]:
         requests_iter = iter(self.request_loader)
-        start_time = time.time()
         times_iter = iter(scheduling_strategy.request_times())
-        end_time = time.time() + (max_duration or math.inf)
+        end_time = scheduling_strategy.start_time + (max_duration or math.inf)
         end_number = max_number or math.inf
         try:
@@ -271,7 +280,7 @@ class Scheduler(Generic[RequestT, ResponseT]):
             )
         info = SchedulerRunInfo(
-            start_time=start_time,
+            start_time=scheduling_strategy.start_time,
             end_time=end_time,
             end_number=end_number,
             processes=len(processes),
@@ -283,30 +292,29 @@ class Scheduler(Generic[RequestT, ResponseT]):
     def _add_requests(
         self,
         requests_iter: Optional[Iterator[Any]],
+        requests_queue: Queue[WorkerProcessRequest[RequestT, ResponseT]],
         times_iter: Iterator[float],
-        requests_queue: multiprocessing.Queue,
         run_info: SchedulerRunInfo,
     ) -> Optional[Iterator[Any]]:
         if requests_iter is not None:
             try:
                 added_count = 0
-                while (
-                    not requests_queue.full()
-                    and added_count < settings.max_add_requests_per_loop
+                while not requests_queue.full() and added_count < (
+                    run_info.strategy.queued_requests_limit
+                    or settings.min_queued_requests
                 ):
                     if run_info.created_requests >= run_info.end_number:
                         raise StopIteration
                     if (
-                        request_time := next(times_iter)
-                    ) >= run_info.end_time or time.time() >= run_info.end_time:
+                        next(times_iter) >= run_info.end_time
+                        or time.time() >= run_info.end_time
+                    ):
                         raise StopIteration
-                    request = next(requests_iter)
-                    work_req: WorkerProcessRequest[RequestT] = WorkerProcessRequest(
-                        request=request,
-                        start_time=request_time,
+                    work_req = WorkerProcessRequest[RequestT, ResponseT](
+                        request=next(requests_iter),
                         timeout_time=run_info.end_time,
                         queued_time=time.time(),
                     )
@@ -324,14 +332,14 @@ class Scheduler(Generic[RequestT, ResponseT]):
     def _check_result_ready(
         self,
-        responses_queue: multiprocessing.Queue,
+        responses_queue: Queue[WorkerProcessResult[RequestT, ResponseT]],
         run_info: SchedulerRunInfo,
     ) -> Optional[SchedulerRequestResult[RequestT, ResponseT]]:
         try:
             process_response: WorkerProcessResult[RequestT, ResponseT] = (
                 responses_queue.get_nowait()
             )
-        except multiprocessing.queues.Empty:  # type: ignore[attr-defined]
+        except QueueEmpty:
             return None
         if process_response.type_ == "request_scheduled":
@@ -374,9 +382,9 @@ class Scheduler(Generic[RequestT, ResponseT]):
     async def _stop_processes(
         self,
         futures: list[asyncio.Future],
-        requests_queue: multiprocessing.Queue,
+        stop_event: Event,
     ):
-        for _ in futures:
-            requests_queue.put(None)
+        # stop all processes
+        stop_event.set()
         await asyncio.gather(*futures)

guidellm/scheduler/strategy.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import math
-import os
 import random
 import time
 from collections.abc import Generator
@@ -15,13 +14,13 @@ from guidellm.config import settings
 from guidellm.objects import StandardBaseModel
 __all__ = [
-    "StrategyType",
+    "AsyncConstantStrategy",
+    "AsyncPoissonStrategy",
+    "ConcurrentStrategy",
     "SchedulingStrategy",
+    "StrategyType",
     "SynchronousStrategy",
-    "ConcurrentStrategy",
     "ThroughputStrategy",
-    "AsyncConstantStrategy",
-    "AsyncPoissonStrategy",
     "strategy_display_str",
 ]
@@ -44,6 +43,10 @@ class SchedulingStrategy(StandardBaseModel):
     type_: Literal["strategy"] = Field(
         description="The type of scheduling strategy schedule requests with.",
     )
+    start_time: float = Field(
+        default_factory=time.time,
+        description="The start time for the scheduling strategy.",
+    )
     @property
     def processing_mode(self) -> Literal["sync", "async"]:
@@ -68,9 +71,7 @@ class SchedulingStrategy(StandardBaseModel):
         :return: The number of processes for the scheduling strategy.
         """
-        cpu_cores = os.cpu_count() or 1
-        return min(max(1, cpu_cores - 1), settings.max_worker_processes)
+        return settings.max_worker_processes
     @property
     def queued_requests_limit(self) -> Optional[int]:
@@ -175,8 +176,9 @@ class SynchronousStrategy(SchedulingStrategy):
         :return: A generator that yields time.time() for immediate request scheduling.
         """
+        init_time = self.start_time
         while True:
-            yield time.time()
+            yield max(init_time, time.time())
 class ConcurrentStrategy(SchedulingStrategy):
@@ -226,7 +228,8 @@ class ConcurrentStrategy(SchedulingStrategy):
         :return: {self.streams} for the concurrent scheduling strategy to limit
             the worker processes to the number of streams.
         """
-        return self.streams
+        return min(self.streams, settings.max_worker_processes)
     @property
     def queued_requests_limit(self) -> int:
@@ -260,8 +263,9 @@ class ConcurrentStrategy(SchedulingStrategy):
         :return: A generator that yields time.time() for immediate request scheduling.
         """
+        init_time = self.start_time
         while True:
-            yield time.time()
+            yield max(init_time, time.time())
 class ThroughputStrategy(SchedulingStrategy):
@@ -334,10 +338,9 @@ class ThroughputStrategy(SchedulingStrategy):
         :return: A generator that yields the start time.time()
             for immediate request scheduling.
         """
-        start_time = time.time()
+        init_time = self.start_time
         while True:
-            yield start_time
+            yield init_time
 class AsyncConstantStrategy(ThroughputStrategy):
@@ -389,24 +392,24 @@ class AsyncConstantStrategy(ThroughputStrategy):
         :return: A generator that yields timestamps for request scheduling.
         """
-        start_time = time.time()
         constant_increment = 1.0 / self.rate
+        init_time = self.start_time
         # handle bursts first to get to the desired rate
         if self.initial_burst is not None:
             # send an initial burst equal to the rate
             # to reach the target rate
             burst_count = math.floor(self.rate)
             for _ in range(burst_count):
-                yield start_time
+                yield init_time
-            start_time += constant_increment
+            init_time += constant_increment
         counter = 0
         # continue with constant rate after bursting
         while True:
-            yield start_time + constant_increment * counter
+            yield init_time + constant_increment * counter
             counter += 1
@@ -459,24 +462,23 @@ class AsyncPoissonStrategy(ThroughputStrategy):
         :return: A generator that yields timestamps for request scheduling.
         """
-        start_time = time.time()
+        init_time = self.start_time
         if self.initial_burst is not None:
             # send an initial burst equal to the rate
             # to reach the target rate
             burst_count = math.floor(self.rate)
             for _ in range(burst_count):
-                yield start_time
+                yield init_time
         else:
-            yield start_time
+            yield init_time
         # set the random seed for reproducibility
         rand = random.Random(self.random_seed)  # noqa: S311
         while True:
             inter_arrival_time = rand.expovariate(self.rate)
-            start_time += inter_arrival_time
-            yield start_time
+            init_time += inter_arrival_time
+            yield init_time
 def strategy_display_str(strategy: Union[StrategyType, SchedulingStrategy]) -> str:

guidellm 0.3.0rc20250429__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

guidellm 0.3.0rc20250429py3-none-any.whl → 0.3.1py3-none-any.whl