PyPI - guidellm - Versions diffs - 0.4.0a169__tar.gz → 0.4.0a173__tar.gz - Mend

guidellm 0.4.0a169tar.gz → 0.4.0a173tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guidellm might be problematic. Click here for more details.

Files changed (103) hide show

{guidellm-0.4.0a169/src/guidellm.egg-info → guidellm-0.4.0a173}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: guidellm
-Version: 0.4.0a169
+Version: 0.4.0a173
 Summary: Guidance platform for deploying and managing large language models.
 Author: Red Hat
 License: Apache-2.0

{guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/constraints.py RENAMED Viewed

@@ -1005,9 +1005,7 @@ class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
         return self.model_dump()
     def __call__(
-        self,
-        state: SchedulerState,
-        request_info: RequestInfo,  # noqa: ARG002
+        self, state: SchedulerState, _request: RequestInfo
     ) -> SchedulerUpdateAction:
         create_exceeded = state.created_requests >= self.num_requests
         processed_exceeded = state.processed_requests >= self.num_requests

{guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/environments.py RENAMED Viewed

@@ -84,7 +84,7 @@ class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
     async def update_run_iteration(
         self,
         response: ResponseT | None,
-        request: RequestT,
+        request: RequestT | MultiTurnRequestT[RequestT],
         request_info: RequestInfo,
         state: SchedulerState,
     ):
@@ -201,7 +201,7 @@ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
     async def update_run_iteration(
         self,
         response: ResponseT | None,
-        request: RequestT,
+        request: RequestT | MultiTurnRequestT[RequestT],
         request_info: RequestInfo,
         state: SchedulerState,
     ):

{guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/scheduler.py RENAMED Viewed

@@ -69,7 +69,7 @@ class Scheduler(
     ) -> AsyncIterator[
         tuple[
             ResponseT | None,
-            RequestT,
+            RequestT | MultiTurnRequestT[RequestT],
             RequestInfo,
             SchedulerState,
         ]

{guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/strategies.py RENAMED Viewed

@@ -70,8 +70,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
         description="Number of worker processes to use for this strategy",
         ge=0,
     )
-    max_concurrency: int = Field(
-        default=0,
+    max_concurrency: int | None = Field(
+        default=None,
         description="Maximum number of concurrent requests to allow",
         ge=0,
     )
@@ -122,8 +122,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
         self.startup_duration = startup_duration
         self._processes_request_index = Value("i", 0)
-        self._processes_lock = Lock()
         self._processes_start_time = Value("d", -1.0)
+        self._processes_lock = Lock()
     def init_processes_start(self, start_time: float):
         """
@@ -137,6 +137,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
                 "SchedulingStrategy init_processes_start called before "
                 "init_processes_timings"
             )
+        if self._processes_start_time is None:
+            raise RuntimeError(
+                "_processes_lock is not None but _processes_start_time is None"
+            )
         with self._processes_lock:
             self._processes_start_time.value = start_time
@@ -153,6 +157,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
                 "SchedulingStrategy get_processes_start_time called before "
                 "init_processes_timings"
             )
+        if self._processes_start_time is None:
+            raise RuntimeError(
+                "_processes_lock is not None but _processes_start_time is None"
+            )
         while self._cached_processes_start_time is None:
             with self._processes_lock:
@@ -175,6 +183,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
                 "SchedulingStrategy next_request_index called before "
                 "init_processes_timings"
             )
+        if self._processes_request_index is None:
+            raise RuntimeError(
+                "_processes_lock is not None but _processes_request_index is None"
+            )
         with self._processes_lock:
             self._processes_request_index.value += 1
@@ -369,7 +381,8 @@ class ThroughputStrategy(SchedulingStrategy):
         start_time = await self.get_processes_start_time()
         if (
-            self.startup_duration > 0
+            self.max_concurrency is not None
+            and self.startup_duration > 0
             and (time.time() - start_time) < self.startup_duration
             and (current_index := self.next_request_index()) <= self.max_concurrency
         ):
@@ -477,6 +490,8 @@ class AsyncPoissonStrategy(ThroughputStrategy):
         :param startup_duration: Duration in seconds for request startup ramping
         """
         super().init_processes_timings(worker_count, max_concurrency, startup_duration)
+        if self._processes_lock is None:
+            raise RuntimeError("_processes_lock is None in init_processes_timings")
         with self._processes_lock:
             self._offset = Value("d", -1.0)
@@ -487,6 +502,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
         :param start_time: Unix timestamp when request processing should begin
         """
         ThroughputStrategy.init_processes_start(self, start_time)
+        if self._processes_lock is None:
+            raise RuntimeError("_processes_lock is None in init_processes_start")
+        if self._offset is None:
+            raise RuntimeError("_offset is None in init_processes_start; was "
+                               "init_processes_timings not called?")
         with self._processes_lock:
             self._offset.value = start_time
@@ -505,6 +526,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
         next_delay = self._random.expovariate(self.rate)
+        if self._processes_lock is None:
+            raise RuntimeError("_processes_lock is None in next_request_time; was "
+                               "init_processes_timings not called?")
+        if self._offset is None:
+            raise RuntimeError("_offset is None in next_request_time; was "
+                               "init_processes_timings not called?")
         with self._processes_lock:
             self._offset.value += next_delay

{guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/worker.py RENAMED Viewed

@@ -23,11 +23,9 @@ try:
         bool, "Flag indicating uvloop availability for event loop optimization"
     ] = True
 except ImportError:
-    uvloop = None
+    uvloop = None  # type: ignore[assignment] # Optional dependency
-    HAS_UVLOOP: Annotated[
-        bool, "Flag indicating uvloop availability for event loop optimization"
-    ] = False
+    HAS_UVLOOP = False
 from guidellm.scheduler.schemas import (
@@ -84,6 +82,10 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
                 RequestT | MultiTurnRequestT[RequestT],
                 RequestInfo,
             ],
+            tuple[
+                RequestT | MultiTurnRequestT[RequestT],
+                RequestInfo,
+            ],
         ],
         backend: BackendInterface[RequestT, ResponseT],
         strategy: SchedulingStrategy,
@@ -201,8 +203,11 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
     async def _stop_monitor(
         self,
-    ) -> Literal["error_event", "shutdown_event"]:
-        """Monitor shutdown and error events for worker termination."""
+    ) -> None:
+        """
+        Monitor shutdown and error events for worker termination.
+        :raises RuntimeError if the work process received an error signal.
+        """
         exit_key = await wait_for_sync_objects(
             {
                 "error_event": self.error_event,
@@ -322,7 +327,7 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
         """Cancel all remaining queued requests until worker process terminates."""
         while True:
             try:
-                request: RequestT
+                request: RequestT | MultiTurnRequestT[RequestT]
                 request_info: RequestInfo
                 request, request_info = await self.messaging.get(
                     timeout=self.messaging.poll_interval
@@ -350,31 +355,19 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
         try:
             # Pull request from the queue, update state, and send "pending" update
-            request, request_info = await self.messaging.get()
-            request_info.timings.dequeued = time.time()
-            request_info.scheduler_node_id = self.messaging.worker_index or -1
-            request_info.timings.targeted_start = target_start
-            self._send_update("pending", response, request, request_info)
-            if request is None or request_info is None:
-                raise RuntimeError("Received invalid request or request info")
-            if isinstance(request, list | tuple):
-                raise NotImplementedError("Multi-turn requests are not yet supported")
-            # Schedule the request
-            current_time = time.time()
-            request_info.timings.scheduled_at = current_time
-            if target_start > current_time:
-                await asyncio.sleep(target_start - current_time)
-                # Adapt delay so that scheduled at reflects the sleep time
-                request_info.timings.scheduled_at = target_start
-            # Process the request with the backend
-            request_info.timings.resolve_start = time.time()
-            self._send_update("in_progress", response, request, request_info)
-            async for resp, info in self.backend.resolve(request, request_info, None):
+            request, request_info = await self._dequeue_next_request(target_start)
+            # Schedule the request and send "in_progress" update
+            await self._schedule_request(request, request_info, target_start)
+            async for resp, info in self.backend.resolve(  # type: ignore[attr-defined]
+                request, request_info, None
+            ):
                 response = resp
                 request_info = info
+                if request_info is None:
+                    raise RuntimeError("Received invalid request info from backend")
             # Complete the request
             request_info.timings.resolve_end = time.time()
@@ -397,6 +390,39 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
             if request_info is not None:
                 self.strategy.request_completed(request_info)
+    async def _dequeue_next_request(
+        self, target_start: float
+    ) -> tuple[RequestT, RequestInfo]:
+        request, request_info = await self.messaging.get()
+        dequeued_time = time.time()  # Ensure accurate dequeue timing
+        if request is None or request_info is None:
+            raise RuntimeError("Received invalid request or request info")
+        if isinstance(request, list | tuple):
+            raise NotImplementedError("Multi-turn requests are not yet supported")
+        request_info.timings.dequeued = dequeued_time
+        request_info.scheduler_node_id = self.messaging.worker_index or -1
+        request_info.timings.targeted_start = target_start
+        self._send_update("pending", None, request, request_info)
+        return request, request_info
+    async def _schedule_request(
+        self,
+        request: RequestT,
+        request_info: RequestInfo,
+        target_start: float
+    ):
+        current_time = time.time()
+        request_info.timings.scheduled_at = current_time
+        if target_start > current_time:
+            await asyncio.sleep(target_start - current_time)
+            # Adapt delay so that scheduled at reflects the sleep time
+            request_info.timings.scheduled_at = target_start
+        # Process the request with the backend
+        request_info.timings.resolve_start = time.time()
+        self._send_update("in_progress", None, request, request_info)
     def _send_update(
         self,
         new_status: Literal[

{guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/worker_group.py RENAMED Viewed

@@ -84,7 +84,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
         backend: BackendInterface[RequestT, ResponseT],
         strategy: SchedulingStrategy,
         startup_duration: float,
-        **constraints: dict[str, Constraint],
+        **constraints: Constraint,
     ):
         """
         Initialize a worker process group for distributed request processing.
@@ -232,7 +232,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
                     worker_index=rank,
                     max_buffer_send_size=None,
                     max_buffer_receive_size=per_proc_max_buffer_size,
-                ),
+                ),  # The non-group worker lacks the SchedulerState type. Type err.
                 backend=self.backend,
                 strategy=self.strategy,
                 async_limit=async_limit,
@@ -478,9 +478,9 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
             num_processes=len(processes),
             start_time=start_time,
         )
-        self._queued_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
-        self._pending_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
-        self._processing_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
+        self._queued_request_ids: set[str] = set()
+        self._pending_request_ids: set[str] = set()
+        self._processing_request_ids: set[str] = set()
     def requests_generator(
         self, requests: Iterable[RequestT | MultiTurnRequestT[RequestT]]
@@ -517,11 +517,13 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
                 )
                 state_update = self._locked_update(request_info)
                 request_info.timings.queued = time.time()
+                if self.messaging.buffer_receive_queue is None:
+                    raise RuntimeError("buffer receive queue is None")
                 self.messaging.buffer_receive_queue.sync_put(
                     (None, request, request_info, state_update.state)
                 )
-                yield (request, request_info)
+                yield request, request_info
                 if state_update.stop_queueing:
                     self.stop_send_requests_event.set()
@@ -530,8 +532,8 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
             # Reached the end, inject a RequestsExhaustedConstraint to record
             self._locked_update(
                 info=None,
-                requests_exhausted={
-                    "requests_exhausted": RequestsExhaustedConstraint(
+                add_constraints={
+                    "requests_exhausted": RequestsExhaustedConstraint(  # type: ignore[dict-item]
                         num_requests=count
                     )
                 },
@@ -610,10 +612,10 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
     def _locked_update(
         self,
         info: RequestInfo | None = None,
-        **add_constraints: dict[str, Constraint],
+        add_constraints: dict[str, Constraint] | None = None,
     ) -> _StateUpdate:
         with self._update_lock:
-            if add_constraints:
+            if add_constraints is not None:
                 self.constraints.update(add_constraints)
             if info is not None:
@@ -631,34 +633,34 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
     def _update_state_request_counts(self, info: RequestInfo):
         if info.status == "queued":
-            self._queued_requests.add(info.request_id)
-            self._state.queued_requests = len(self._queued_requests)
+            self._queued_request_ids.add(info.request_id)
+            self._state.queued_requests = len(self._queued_request_ids)
             self._state.created_requests += 1
         elif info.status == "pending":
-            self._queued_requests.remove(info.request_id)
-            self._state.queued_requests = len(self._queued_requests)
-            self._pending_requests.add(info.request_id)
-            self._state.pending_requests = len(self._pending_requests)
+            self._queued_request_ids.remove(info.request_id)
+            self._state.queued_requests = len(self._queued_request_ids)
+            self._pending_request_ids.add(info.request_id)
+            self._state.pending_requests = len(self._pending_request_ids)
         elif info.status == "in_progress":
-            self._pending_requests.remove(info.request_id)
-            self._state.pending_requests = len(self._pending_requests)
-            self._processing_requests.add(info.request_id)
-            self._state.processing_requests = len(self._processing_requests)
+            self._pending_request_ids.remove(info.request_id)
+            self._state.pending_requests = len(self._pending_request_ids)
+            self._processing_request_ids.add(info.request_id)
+            self._state.processing_requests = len(self._processing_request_ids)
         elif info.status == "completed":
-            self._processing_requests.remove(info.request_id)
-            self._state.processing_requests = len(self._processing_requests)
+            self._processing_request_ids.remove(info.request_id)
+            self._state.processing_requests = len(self._processing_request_ids)
             self._state.processed_requests += 1
             self._state.successful_requests += 1
         elif info.status in ("errored", "cancelled"):
-            if info.request_id in self._queued_requests:
-                self._queued_requests.remove(info.request_id)
-                self._state.queued_requests = len(self._queued_requests)
-            elif info.request_id in self._pending_requests:
-                self._pending_requests.remove(info.request_id)
-                self._state.pending_requests = len(self._pending_requests)
-            elif info.request_id in self._processing_requests:
-                self._processing_requests.remove(info.request_id)
-                self._state.processing_requests = len(self._processing_requests)
+            if info.request_id in self._queued_request_ids:
+                self._queued_request_ids.remove(info.request_id)
+                self._state.queued_requests = len(self._queued_request_ids)
+            elif info.request_id in self._pending_request_ids:
+                self._pending_request_ids.remove(info.request_id)
+                self._state.pending_requests = len(self._pending_request_ids)
+            elif info.request_id in self._processing_request_ids:
+                self._processing_request_ids.remove(info.request_id)
+                self._state.processing_requests = len(self._processing_request_ids)
             self._state.processed_requests += 1
             self._state.errored_requests += 1 if info.status == "errored" else 0

guidellm-0.4.0a173/src/guidellm/version.py ADDED Viewed

@@ -0,0 +1,6 @@
+version = "0.4.0a173"
+build_type = "nightly"
+build_iteration = "173"
+git_commit = "585917501a31f053ff01d695ceeb6d1f23007cf0"
+git_branch = "main"
+git_last_tag = "v0.3.1"

{guidellm-0.4.0a169 → guidellm-0.4.0a173/src/guidellm.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: guidellm
-Version: 0.4.0a169
+Version: 0.4.0a173
 Summary: Guidance platform for deploying and managing large language models.
 Author: Red Hat
 License: Apache-2.0

guidellm-0.4.0a169/src/guidellm/version.py DELETED Viewed

@@ -1,6 +0,0 @@
-version = "0.4.0a169"
-build_type = "nightly"
-build_iteration = "169"
-git_commit = "dd219f1aeda5997644baee0fd9efef286a9fa2c1"
-git_branch = "main"
-git_last_tag = "v0.3.1"