PyPI - guidellm - Versions diffs - 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl - Mend

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

guidellm/__init__.py +5 -2
guidellm/__main__.py +524 -255
guidellm/backends/__init__.py +33 -0
guidellm/backends/backend.py +109 -0
guidellm/backends/openai.py +340 -0
guidellm/backends/response_handlers.py +428 -0
guidellm/benchmark/__init__.py +69 -39
guidellm/benchmark/benchmarker.py +160 -316
guidellm/benchmark/entrypoints.py +560 -127
guidellm/benchmark/outputs/__init__.py +24 -0
guidellm/benchmark/outputs/console.py +633 -0
guidellm/benchmark/outputs/csv.py +721 -0
guidellm/benchmark/outputs/html.py +473 -0
guidellm/benchmark/outputs/output.py +169 -0
guidellm/benchmark/outputs/serialized.py +69 -0
guidellm/benchmark/profiles.py +718 -0
guidellm/benchmark/progress.py +553 -556
guidellm/benchmark/scenarios/__init__.py +40 -0
guidellm/benchmark/scenarios/chat.json +6 -0
guidellm/benchmark/scenarios/rag.json +6 -0
guidellm/benchmark/schemas/__init__.py +66 -0
guidellm/benchmark/schemas/base.py +402 -0
guidellm/benchmark/schemas/generative/__init__.py +55 -0
guidellm/benchmark/schemas/generative/accumulator.py +841 -0
guidellm/benchmark/schemas/generative/benchmark.py +163 -0
guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
guidellm/benchmark/schemas/generative/metrics.py +927 -0
guidellm/benchmark/schemas/generative/report.py +158 -0
guidellm/data/__init__.py +34 -4
guidellm/data/builders.py +541 -0
guidellm/data/collators.py +16 -0
guidellm/data/config.py +120 -0
guidellm/data/deserializers/__init__.py +49 -0
guidellm/data/deserializers/deserializer.py +141 -0
guidellm/data/deserializers/file.py +223 -0
guidellm/data/deserializers/huggingface.py +94 -0
guidellm/data/deserializers/memory.py +194 -0
guidellm/data/deserializers/synthetic.py +246 -0
guidellm/data/entrypoints.py +52 -0
guidellm/data/loaders.py +190 -0
guidellm/data/preprocessors/__init__.py +27 -0
guidellm/data/preprocessors/formatters.py +410 -0
guidellm/data/preprocessors/mappers.py +196 -0
guidellm/data/preprocessors/preprocessor.py +30 -0
guidellm/data/processor.py +29 -0
guidellm/data/schemas.py +175 -0
guidellm/data/utils/__init__.py +6 -0
guidellm/data/utils/dataset.py +94 -0
guidellm/extras/__init__.py +4 -0
guidellm/extras/audio.py +220 -0
guidellm/extras/vision.py +242 -0
guidellm/logger.py +2 -2
guidellm/mock_server/__init__.py +8 -0
guidellm/mock_server/config.py +84 -0
guidellm/mock_server/handlers/__init__.py +17 -0
guidellm/mock_server/handlers/chat_completions.py +280 -0
guidellm/mock_server/handlers/completions.py +280 -0
guidellm/mock_server/handlers/tokenizer.py +142 -0
guidellm/mock_server/models.py +510 -0
guidellm/mock_server/server.py +238 -0
guidellm/mock_server/utils.py +302 -0
guidellm/scheduler/__init__.py +69 -26
guidellm/scheduler/constraints/__init__.py +49 -0
guidellm/scheduler/constraints/constraint.py +325 -0
guidellm/scheduler/constraints/error.py +411 -0
guidellm/scheduler/constraints/factory.py +182 -0
guidellm/scheduler/constraints/request.py +312 -0
guidellm/scheduler/constraints/saturation.py +722 -0
guidellm/scheduler/environments.py +252 -0
guidellm/scheduler/scheduler.py +137 -368
guidellm/scheduler/schemas.py +358 -0
guidellm/scheduler/strategies.py +617 -0
guidellm/scheduler/worker.py +413 -419
guidellm/scheduler/worker_group.py +712 -0
guidellm/schemas/__init__.py +65 -0
guidellm/schemas/base.py +417 -0
guidellm/schemas/info.py +188 -0
guidellm/schemas/request.py +235 -0
guidellm/schemas/request_stats.py +349 -0
guidellm/schemas/response.py +124 -0
guidellm/schemas/statistics.py +1018 -0
guidellm/{config.py → settings.py} +31 -24
guidellm/utils/__init__.py +71 -8
guidellm/utils/auto_importer.py +98 -0
guidellm/utils/cli.py +132 -5
guidellm/utils/console.py +566 -0
guidellm/utils/encoding.py +778 -0
guidellm/utils/functions.py +159 -0
guidellm/utils/hf_datasets.py +1 -2
guidellm/utils/hf_transformers.py +4 -4
guidellm/utils/imports.py +9 -0
guidellm/utils/messaging.py +1118 -0
guidellm/utils/mixins.py +115 -0
guidellm/utils/random.py +3 -4
guidellm/utils/registry.py +220 -0
guidellm/utils/singleton.py +133 -0
guidellm/utils/synchronous.py +159 -0
guidellm/utils/text.py +163 -50
guidellm/utils/typing.py +41 -0
guidellm/version.py +2 -2
guidellm-0.6.0a5.dist-info/METADATA +364 -0
guidellm-0.6.0a5.dist-info/RECORD +109 -0
guidellm/backend/__init__.py +0 -23
guidellm/backend/backend.py +0 -259
guidellm/backend/openai.py +0 -708
guidellm/backend/response.py +0 -136
guidellm/benchmark/aggregator.py +0 -760
guidellm/benchmark/benchmark.py +0 -837
guidellm/benchmark/output.py +0 -997
guidellm/benchmark/profile.py +0 -409
guidellm/benchmark/scenario.py +0 -104
guidellm/data/prideandprejudice.txt.gz +0 -0
guidellm/dataset/__init__.py +0 -22
guidellm/dataset/creator.py +0 -213
guidellm/dataset/entrypoints.py +0 -42
guidellm/dataset/file.py +0 -92
guidellm/dataset/hf_datasets.py +0 -62
guidellm/dataset/in_memory.py +0 -132
guidellm/dataset/synthetic.py +0 -287
guidellm/objects/__init__.py +0 -18
guidellm/objects/pydantic.py +0 -89
guidellm/objects/statistics.py +0 -953
guidellm/preprocess/__init__.py +0 -3
guidellm/preprocess/dataset.py +0 -374
guidellm/presentation/__init__.py +0 -28
guidellm/presentation/builder.py +0 -27
guidellm/presentation/data_models.py +0 -232
guidellm/presentation/injector.py +0 -66
guidellm/request/__init__.py +0 -18
guidellm/request/loader.py +0 -284
guidellm/request/request.py +0 -79
guidellm/request/types.py +0 -10
guidellm/scheduler/queues.py +0 -25
guidellm/scheduler/result.py +0 -155
guidellm/scheduler/strategy.py +0 -495
guidellm-0.3.1.dist-info/METADATA +0 -329
guidellm-0.3.1.dist-info/RECORD +0 -62
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
{guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0

guidellm/scheduler/schemas.py ADDED Viewed

@@ -0,0 +1,358 @@
+"""
+Core data structures and interfaces for the GuideLLM scheduler system.
+Provides type-safe abstractions for distributed request processing, timing
+measurements, and backend interfaces for benchmarking operations. Central to
+the scheduler architecture, enabling request lifecycle tracking, backend
+coordination, and state management across distributed worker processes.
+"""
+from __future__ import annotations
+import time
+from collections.abc import AsyncIterator
+from typing import Any, Generic, Literal, Protocol, TypeVar
+from pydantic import Field
+from typing_extensions import TypeAliasType
+from guidellm.schemas import RequestInfo, StandardBaseModel
+from guidellm.utils import RegistryMixin
+from guidellm.utils.registry import RegistryObjT
+__all__ = [
+    "BackendInterface",
+    "BackendT",
+    "MultiTurnRequestT",
+    "RequestT",
+    "ResponseT",
+    "SchedulerMessagingPydanticRegistry",
+    "SchedulerProgress",
+    "SchedulerState",
+    "SchedulerUpdateAction",
+]
+RequestT = TypeVar("RequestT")
+"Generic request object type for scheduler processing"
+ResponseT = TypeVar("ResponseT")
+"Generic response object type returned by backend processing"
+MultiTurnRequestT = TypeAliasType(
+    "MultiTurnRequestT",
+    list[RequestT | tuple[RequestT, float]] | tuple[RequestT | tuple[RequestT, float]],
+    type_params=(RequestT,),
+)
+"Multi-turn request structure supporting conversation history with optional delays"
+class SchedulerMessagingPydanticRegistry(RegistryMixin[RegistryObjT]):
+    """
+    Registry for Pydantic types used in scheduler inter-process messaging.
+    Enables generic interface for defining Pydantic class types used for
+    communication between distributed scheduler components and worker processes.
+    """
+class BackendInterface(Protocol, Generic[RequestT, ResponseT]):
+    """
+    Protocol defining the interface for request processing backends.
+    Establishes the contract for backend implementations that process requests
+    within the scheduler system. Backends manage initialization, validation,
+    processing, and shutdown lifecycle. All properties must be pickleable before
+    process_startup is called for multi-process environments.
+    Example:
+    ::
+        class CustomBackend(BackendInterface):
+            @property
+            def processes_limit(self) -> int:
+                return 4
+            async def resolve(self, request, request_info, history=None):
+                yield response, updated_request_info
+    """
+    @property
+    def processes_limit(self) -> int | None:
+        """
+        :return: Maximum worker processes supported, or None if unlimited
+        """
+    @property
+    def requests_limit(self) -> int | None:
+        """
+        :return: Maximum concurrent requests supported, or None if unlimited
+        """
+    @property
+    def info(self) -> dict[str, Any]:
+        """
+        :return: Backend metadata including model initialization and configuration
+        """
+    async def process_startup(self) -> None:
+        """
+        Perform backend initialization and startup procedures.
+        :raises Exception: Implementation-specific exceptions for startup failures
+        """
+    async def validate(self) -> None:
+        """
+        Validate backend configuration and operational status.
+        :raises Exception: Implementation-specific exceptions for validation failures
+        """
+    async def process_shutdown(self) -> None:
+        """
+        Perform backend cleanup and shutdown procedures.
+        :raises Exception: Implementation-specific exceptions for shutdown failures
+        """
+    async def resolve(
+        self,
+        request: RequestT,
+        request_info: RequestInfo,
+        history: list[tuple[RequestT, ResponseT]] | None = None,
+    ) -> AsyncIterator[tuple[ResponseT, RequestInfo]]:
+        """
+        Process a request and yield incremental response updates.
+        :param request: The request object to process
+        :param request_info: Scheduling metadata and timing information
+        :param history: Conversation history for multi-turn requests
+        :yield: Tuples of (response, updated_request_info) for each response chunk
+        :raises Exception: Implementation-specific exceptions for processing failures
+        """
+BackendT = TypeVar("BackendT", bound=BackendInterface)
+"Generic backend interface type for request processing"
+class SchedulerProgress(StandardBaseModel):
+    """
+    Progress tracking data for scheduler operations.
+    Provides estimates for remaining work in scheduler operations, including
+    fraction complete, request counts, and duration. Used by constraints and
+    monitoring systems to track execution progress and make termination decisions.
+    """
+    remaining_requests: float | None = Field(
+        description="Estimated number of remaining requests to process", default=None
+    )
+    total_requests: float | None = Field(
+        description="Total number of requests to process", default=None
+    )
+    remaining_duration: float | None = Field(
+        description="Estimated remaining duration in seconds", default=None
+    )
+    total_duration: float | None = Field(
+        description="Total duration in seconds to process for", default=None
+    )
+    stop_time: float | None = Field(
+        description="The timestamp the processing stopped at", default=None
+    )
+    @property
+    def remaining_fraction(self) -> float | None:
+        """
+        :return: Estimated fraction of remaining progress, if known
+        """
+        fraction: float | None = None
+        if (requests_fraction := self.remaining_requests_fraction) is not None:
+            fraction = requests_fraction
+        if (duration_fraction := self.remaining_duration_fraction) is not None:
+            fraction = (
+                duration_fraction
+                if fraction is None
+                else min(fraction, duration_fraction)
+            )
+        return fraction
+    @property
+    def remaining_requests_fraction(self) -> float | None:
+        """
+        :return: Estimated fraction of remaining requests, if known
+        """
+        return (
+            self.remaining_requests / float(self.total_requests)
+            if self.remaining_requests is not None
+            and self.total_requests is not None
+            and self.total_requests > 0
+            else None
+        )
+    @property
+    def remaining_duration_fraction(self) -> float | None:
+        """
+        :return: Estimated fraction of remaining duration, if known
+        """
+        return (
+            self.remaining_duration / float(self.total_duration)
+            if self.remaining_duration is not None
+            and self.total_duration is not None
+            and self.total_duration > 0
+            else None
+        )
+    def combine(self, other: SchedulerProgress) -> SchedulerProgress:
+        """
+        Combine two progress instances, taking the minimum remaining estimates.
+        :param other: Another progress instance to combine with
+        :return: New progress instance with combined estimates
+        """
+        if (other_req_fraction := other.remaining_requests_fraction) is not None and (
+            (cur_req_fraction := self.remaining_requests_fraction) is None
+            or other_req_fraction < cur_req_fraction
+        ):
+            # Only update if the other is more advanced (lower fraction)
+            self.remaining_requests = other.remaining_requests
+            self.total_requests = other.total_requests
+        if (other_dur_fraction := other.remaining_duration_fraction) is not None and (
+            (cur_dur_fraction := self.remaining_duration_fraction) is None
+            or other_dur_fraction < cur_dur_fraction
+        ):
+            # Only update if the other is more advanced (lower fraction)
+            self.remaining_duration = other.remaining_duration
+            self.total_duration = other.total_duration
+        if other.stop_time is not None and (
+            self.stop_time is None or other.stop_time < self.stop_time
+        ):
+            # Only update if the other has an earlier stop time
+            self.stop_time = other.stop_time
+        return self
+class SchedulerUpdateAction(StandardBaseModel):
+    """
+    Control directives for scheduler behavior and operations.
+    Encapsulates control signals for scheduler operations including request
+    queuing and processing directives. Used by constraints to communicate
+    termination conditions and progress to scheduler components.
+    Example:
+    ::
+        action = SchedulerUpdateAction(
+            request_queuing="stop",
+            request_processing="continue",
+            metadata={"reason": "max_requests_reached"}
+        )
+    """
+    request_queuing: Literal["continue", "stop"] = Field(
+        default="continue", description="Action to take for request queuing operations"
+    )
+    request_processing: Literal["continue", "stop_local", "stop_all"] = Field(
+        default="continue",
+        description="Action to take for request processing operations",
+    )
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional context and data for the scheduler action",
+    )
+    progress: SchedulerProgress = Field(
+        default_factory=lambda: SchedulerProgress(),
+        description="Progress information for the scheduler action",
+    )
+class SchedulerState(StandardBaseModel):
+    """
+    Comprehensive state tracking for scheduler execution.
+    Tracks scheduler execution progress, request counts, timing information,
+    and constraint enforcement. Central to scheduler coordination, providing
+    real-time metrics for monitoring and decision-making across distributed
+    worker processes.
+    Example:
+    ::
+        state = SchedulerState(node_id=0, num_processes=4)
+        state.created_requests += 1
+        state.queued_requests += 1
+        completion_rate = state.processed_requests / state.created_requests
+    """
+    node_id: int = Field(
+        description="Unique identifier for this scheduler node", default=-1
+    )
+    num_processes: int = Field(
+        description="Number of worker processes in this scheduler", default=-1
+    )
+    start_time: float = Field(
+        description="Unix timestamp when the scheduler started",
+        default_factory=time.time,
+    )
+    end_time: float | None = Field(
+        default=None, description="Unix timestamp when the scheduler stopped"
+    )
+    start_requests_time: float | None = Field(
+        default=None, description="Unix timestamp of the first sent request"
+    )
+    end_requests_time: float | None = Field(
+        default=None, description="Unix timestamp of the last finalized request"
+    )
+    end_queuing_time: float | None = Field(
+        default=None, description="Unix timestamp when request queuing stopped"
+    )
+    end_queuing_constraints: dict[str, SchedulerUpdateAction] = Field(
+        default_factory=dict,
+        description="Constraints that triggered queuing termination",
+    )
+    end_processing_time: float | None = Field(
+        default=None, description="Unix timestamp when request processing stopped"
+    )
+    end_processing_constraints: dict[str, SchedulerUpdateAction] = Field(
+        default_factory=dict,
+        description="Constraints that triggered processing termination",
+    )
+    scheduler_constraints: dict[str, SchedulerUpdateAction] = Field(
+        default_factory=dict,
+        description="Latest state from all constraints applied during scheduler run",
+    )
+    progress: SchedulerProgress = Field(
+        default_factory=lambda: SchedulerProgress(),
+        description="Overall progress information for the scheduler run",
+    )
+    created_requests: int = Field(
+        default=0, description="Total number of requests created"
+    )
+    queued_requests: int = Field(
+        default=0, description="Total number of requests queued for processing"
+    )
+    pending_requests: int = Field(
+        default=0,
+        description="Number of requests pending processing within a worker",
+    )
+    processing_requests: int = Field(
+        default=0, description="Number of requests currently being processed"
+    )
+    processed_requests: int = Field(
+        default=0, description="Number of requests that completed processing"
+    )
+    successful_requests: int = Field(
+        default=0, description="Number of requests that completed successfully"
+    )
+    errored_requests: int = Field(
+        default=0, description="Number of requests that failed with errors"
+    )
+    cancelled_requests: int = Field(
+        default=0, description="Number of requests that were cancelled"
+    )

guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

guidellm 0.3.1py3-none-any.whl → 0.6.0a5py3-none-any.whl