guidellm 0.4.0a169__tar.gz → 0.4.0a173__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (103) hide show
  1. {guidellm-0.4.0a169/src/guidellm.egg-info → guidellm-0.4.0a173}/PKG-INFO +1 -1
  2. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/constraints.py +1 -3
  3. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/environments.py +2 -2
  4. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/scheduler.py +1 -1
  5. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/strategies.py +31 -4
  6. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/worker.py +56 -30
  7. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/worker_group.py +33 -31
  8. guidellm-0.4.0a173/src/guidellm/version.py +6 -0
  9. {guidellm-0.4.0a169 → guidellm-0.4.0a173/src/guidellm.egg-info}/PKG-INFO +1 -1
  10. guidellm-0.4.0a169/src/guidellm/version.py +0 -6
  11. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/LICENSE +0 -0
  12. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/MANIFEST.in +0 -0
  13. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/README.md +0 -0
  14. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/pyproject.toml +0 -0
  15. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/setup.cfg +0 -0
  16. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/setup.py +0 -0
  17. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/__init__.py +0 -0
  18. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/__main__.py +0 -0
  19. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/backends/__init__.py +0 -0
  20. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/backends/backend.py +0 -0
  21. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/backends/openai.py +0 -0
  22. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/backends/response_handlers.py +0 -0
  23. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/__init__.py +0 -0
  24. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/benchmarker.py +0 -0
  25. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/entrypoints.py +0 -0
  26. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/output.py +0 -0
  27. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/profile.py +0 -0
  28. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/progress.py +0 -0
  29. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/scenarios/__init__.py +0 -0
  30. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/scenarios/chat.json +0 -0
  31. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/scenarios/rag.json +0 -0
  32. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/benchmark/schemas.py +0 -0
  33. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/__init__.py +0 -0
  34. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/collators.py +0 -0
  35. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/deserializers/__init__.py +0 -0
  36. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/deserializers/deserializer.py +0 -0
  37. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/deserializers/file.py +0 -0
  38. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/deserializers/huggingface.py +0 -0
  39. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/deserializers/memory.py +0 -0
  40. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/deserializers/synthetic.py +0 -0
  41. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/loaders.py +0 -0
  42. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/preprocessors/__init__.py +0 -0
  43. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/preprocessors/formatters.py +0 -0
  44. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/preprocessors/mappers.py +0 -0
  45. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/preprocessors/preprocessor.py +0 -0
  46. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/processor.py +0 -0
  47. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/schemas.py +0 -0
  48. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/utils/__init__.py +0 -0
  49. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/data/utils/dataset.py +0 -0
  50. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/extras/__init__.py +0 -0
  51. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/extras/audio.py +0 -0
  52. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/extras/vision.py +0 -0
  53. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/logger.py +0 -0
  54. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/__init__.py +0 -0
  55. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/config.py +0 -0
  56. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/handlers/__init__.py +0 -0
  57. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/handlers/chat_completions.py +0 -0
  58. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/handlers/completions.py +0 -0
  59. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/handlers/tokenizer.py +0 -0
  60. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/models.py +0 -0
  61. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/server.py +0 -0
  62. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/mock_server/utils.py +0 -0
  63. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/preprocess/__init__.py +0 -0
  64. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/preprocess/dataset.py +0 -0
  65. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/presentation/__init__.py +0 -0
  66. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/presentation/builder.py +0 -0
  67. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/presentation/data_models.py +0 -0
  68. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/presentation/injector.py +0 -0
  69. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/__init__.py +0 -0
  70. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/scheduler/schemas.py +0 -0
  71. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/schemas/__init__.py +0 -0
  72. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/schemas/info.py +0 -0
  73. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/schemas/request.py +0 -0
  74. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/schemas/response.py +0 -0
  75. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/schemas/stats.py +0 -0
  76. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/settings.py +0 -0
  77. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/__init__.py +0 -0
  78. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/auto_importer.py +0 -0
  79. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/cli.py +0 -0
  80. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/colors.py +0 -0
  81. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/console.py +0 -0
  82. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/default_group.py +0 -0
  83. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/dict.py +0 -0
  84. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/encoding.py +0 -0
  85. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/functions.py +0 -0
  86. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/hf_datasets.py +0 -0
  87. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/hf_transformers.py +0 -0
  88. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/imports.py +0 -0
  89. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/messaging.py +0 -0
  90. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/mixins.py +0 -0
  91. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/pydantic_utils.py +0 -0
  92. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/random.py +0 -0
  93. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/registry.py +0 -0
  94. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/singleton.py +0 -0
  95. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/statistics.py +0 -0
  96. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/synchronous.py +0 -0
  97. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/text.py +0 -0
  98. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm/utils/typing.py +0 -0
  99. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm.egg-info/SOURCES.txt +0 -0
  100. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm.egg-info/dependency_links.txt +0 -0
  101. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm.egg-info/entry_points.txt +0 -0
  102. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm.egg-info/requires.txt +0 -0
  103. {guidellm-0.4.0a169 → guidellm-0.4.0a173}/src/guidellm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.4.0a169
3
+ Version: 0.4.0a173
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License: Apache-2.0
@@ -1005,9 +1005,7 @@ class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
1005
1005
  return self.model_dump()
1006
1006
 
1007
1007
  def __call__(
1008
- self,
1009
- state: SchedulerState,
1010
- request_info: RequestInfo, # noqa: ARG002
1008
+ self, state: SchedulerState, _request: RequestInfo
1011
1009
  ) -> SchedulerUpdateAction:
1012
1010
  create_exceeded = state.created_requests >= self.num_requests
1013
1011
  processed_exceeded = state.processed_requests >= self.num_requests
@@ -84,7 +84,7 @@ class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
84
84
  async def update_run_iteration(
85
85
  self,
86
86
  response: ResponseT | None,
87
- request: RequestT,
87
+ request: RequestT | MultiTurnRequestT[RequestT],
88
88
  request_info: RequestInfo,
89
89
  state: SchedulerState,
90
90
  ):
@@ -201,7 +201,7 @@ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
201
201
  async def update_run_iteration(
202
202
  self,
203
203
  response: ResponseT | None,
204
- request: RequestT,
204
+ request: RequestT | MultiTurnRequestT[RequestT],
205
205
  request_info: RequestInfo,
206
206
  state: SchedulerState,
207
207
  ):
@@ -69,7 +69,7 @@ class Scheduler(
69
69
  ) -> AsyncIterator[
70
70
  tuple[
71
71
  ResponseT | None,
72
- RequestT,
72
+ RequestT | MultiTurnRequestT[RequestT],
73
73
  RequestInfo,
74
74
  SchedulerState,
75
75
  ]
@@ -70,8 +70,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
70
70
  description="Number of worker processes to use for this strategy",
71
71
  ge=0,
72
72
  )
73
- max_concurrency: int = Field(
74
- default=0,
73
+ max_concurrency: int | None = Field(
74
+ default=None,
75
75
  description="Maximum number of concurrent requests to allow",
76
76
  ge=0,
77
77
  )
@@ -122,8 +122,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
122
122
  self.startup_duration = startup_duration
123
123
 
124
124
  self._processes_request_index = Value("i", 0)
125
- self._processes_lock = Lock()
126
125
  self._processes_start_time = Value("d", -1.0)
126
+ self._processes_lock = Lock()
127
127
 
128
128
  def init_processes_start(self, start_time: float):
129
129
  """
@@ -137,6 +137,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
137
137
  "SchedulingStrategy init_processes_start called before "
138
138
  "init_processes_timings"
139
139
  )
140
+ if self._processes_start_time is None:
141
+ raise RuntimeError(
142
+ "_processes_lock is not None but _processes_start_time is None"
143
+ )
140
144
 
141
145
  with self._processes_lock:
142
146
  self._processes_start_time.value = start_time
@@ -153,6 +157,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
153
157
  "SchedulingStrategy get_processes_start_time called before "
154
158
  "init_processes_timings"
155
159
  )
160
+ if self._processes_start_time is None:
161
+ raise RuntimeError(
162
+ "_processes_lock is not None but _processes_start_time is None"
163
+ )
156
164
 
157
165
  while self._cached_processes_start_time is None:
158
166
  with self._processes_lock:
@@ -175,6 +183,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
175
183
  "SchedulingStrategy next_request_index called before "
176
184
  "init_processes_timings"
177
185
  )
186
+ if self._processes_request_index is None:
187
+ raise RuntimeError(
188
+ "_processes_lock is not None but _processes_request_index is None"
189
+ )
178
190
 
179
191
  with self._processes_lock:
180
192
  self._processes_request_index.value += 1
@@ -369,7 +381,8 @@ class ThroughputStrategy(SchedulingStrategy):
369
381
  start_time = await self.get_processes_start_time()
370
382
 
371
383
  if (
372
- self.startup_duration > 0
384
+ self.max_concurrency is not None
385
+ and self.startup_duration > 0
373
386
  and (time.time() - start_time) < self.startup_duration
374
387
  and (current_index := self.next_request_index()) <= self.max_concurrency
375
388
  ):
@@ -477,6 +490,8 @@ class AsyncPoissonStrategy(ThroughputStrategy):
477
490
  :param startup_duration: Duration in seconds for request startup ramping
478
491
  """
479
492
  super().init_processes_timings(worker_count, max_concurrency, startup_duration)
493
+ if self._processes_lock is None:
494
+ raise RuntimeError("_processes_lock is None in init_processes_timings")
480
495
  with self._processes_lock:
481
496
  self._offset = Value("d", -1.0)
482
497
 
@@ -487,6 +502,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
487
502
  :param start_time: Unix timestamp when request processing should begin
488
503
  """
489
504
  ThroughputStrategy.init_processes_start(self, start_time)
505
+
506
+ if self._processes_lock is None:
507
+ raise RuntimeError("_processes_lock is None in init_processes_start")
508
+ if self._offset is None:
509
+ raise RuntimeError("_offset is None in init_processes_start; was "
510
+ "init_processes_timings not called?")
490
511
  with self._processes_lock:
491
512
  self._offset.value = start_time
492
513
 
@@ -505,6 +526,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
505
526
 
506
527
  next_delay = self._random.expovariate(self.rate)
507
528
 
529
+ if self._processes_lock is None:
530
+ raise RuntimeError("_processes_lock is None in next_request_time; was "
531
+ "init_processes_timings not called?")
532
+ if self._offset is None:
533
+ raise RuntimeError("_offset is None in next_request_time; was "
534
+ "init_processes_timings not called?")
508
535
  with self._processes_lock:
509
536
  self._offset.value += next_delay
510
537
 
@@ -23,11 +23,9 @@ try:
23
23
  bool, "Flag indicating uvloop availability for event loop optimization"
24
24
  ] = True
25
25
  except ImportError:
26
- uvloop = None
26
+ uvloop = None # type: ignore[assignment] # Optional dependency
27
27
 
28
- HAS_UVLOOP: Annotated[
29
- bool, "Flag indicating uvloop availability for event loop optimization"
30
- ] = False
28
+ HAS_UVLOOP = False
31
29
 
32
30
 
33
31
  from guidellm.scheduler.schemas import (
@@ -84,6 +82,10 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
84
82
  RequestT | MultiTurnRequestT[RequestT],
85
83
  RequestInfo,
86
84
  ],
85
+ tuple[
86
+ RequestT | MultiTurnRequestT[RequestT],
87
+ RequestInfo,
88
+ ],
87
89
  ],
88
90
  backend: BackendInterface[RequestT, ResponseT],
89
91
  strategy: SchedulingStrategy,
@@ -201,8 +203,11 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
201
203
 
202
204
  async def _stop_monitor(
203
205
  self,
204
- ) -> Literal["error_event", "shutdown_event"]:
205
- """Monitor shutdown and error events for worker termination."""
206
+ ) -> None:
207
+ """
208
+ Monitor shutdown and error events for worker termination.
209
+ :raises RuntimeError if the work process received an error signal.
210
+ """
206
211
  exit_key = await wait_for_sync_objects(
207
212
  {
208
213
  "error_event": self.error_event,
@@ -322,7 +327,7 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
322
327
  """Cancel all remaining queued requests until worker process terminates."""
323
328
  while True:
324
329
  try:
325
- request: RequestT
330
+ request: RequestT | MultiTurnRequestT[RequestT]
326
331
  request_info: RequestInfo
327
332
  request, request_info = await self.messaging.get(
328
333
  timeout=self.messaging.poll_interval
@@ -350,31 +355,19 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
350
355
 
351
356
  try:
352
357
  # Pull request from the queue, update state, and send "pending" update
353
- request, request_info = await self.messaging.get()
354
- request_info.timings.dequeued = time.time()
355
- request_info.scheduler_node_id = self.messaging.worker_index or -1
356
- request_info.timings.targeted_start = target_start
357
- self._send_update("pending", response, request, request_info)
358
-
359
- if request is None or request_info is None:
360
- raise RuntimeError("Received invalid request or request info")
361
- if isinstance(request, list | tuple):
362
- raise NotImplementedError("Multi-turn requests are not yet supported")
363
-
364
- # Schedule the request
365
- current_time = time.time()
366
- request_info.timings.scheduled_at = current_time
367
- if target_start > current_time:
368
- await asyncio.sleep(target_start - current_time)
369
- # Adapt delay so that scheduled at reflects the sleep time
370
- request_info.timings.scheduled_at = target_start
371
-
372
- # Process the request with the backend
373
- request_info.timings.resolve_start = time.time()
374
- self._send_update("in_progress", response, request, request_info)
375
- async for resp, info in self.backend.resolve(request, request_info, None):
358
+ request, request_info = await self._dequeue_next_request(target_start)
359
+
360
+ # Schedule the request and send "in_progress" update
361
+ await self._schedule_request(request, request_info, target_start)
362
+
363
+ async for resp, info in self.backend.resolve( # type: ignore[attr-defined]
364
+ request, request_info, None
365
+ ):
366
+
376
367
  response = resp
377
368
  request_info = info
369
+ if request_info is None:
370
+ raise RuntimeError("Received invalid request info from backend")
378
371
 
379
372
  # Complete the request
380
373
  request_info.timings.resolve_end = time.time()
@@ -397,6 +390,39 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
397
390
  if request_info is not None:
398
391
  self.strategy.request_completed(request_info)
399
392
 
393
+ async def _dequeue_next_request(
394
+ self, target_start: float
395
+ ) -> tuple[RequestT, RequestInfo]:
396
+ request, request_info = await self.messaging.get()
397
+ dequeued_time = time.time() # Ensure accurate dequeue timing
398
+ if request is None or request_info is None:
399
+ raise RuntimeError("Received invalid request or request info")
400
+ if isinstance(request, list | tuple):
401
+ raise NotImplementedError("Multi-turn requests are not yet supported")
402
+
403
+ request_info.timings.dequeued = dequeued_time
404
+ request_info.scheduler_node_id = self.messaging.worker_index or -1
405
+ request_info.timings.targeted_start = target_start
406
+ self._send_update("pending", None, request, request_info)
407
+ return request, request_info
408
+
409
+ async def _schedule_request(
410
+ self,
411
+ request: RequestT,
412
+ request_info: RequestInfo,
413
+ target_start: float
414
+ ):
415
+ current_time = time.time()
416
+ request_info.timings.scheduled_at = current_time
417
+ if target_start > current_time:
418
+ await asyncio.sleep(target_start - current_time)
419
+ # Adapt delay so that scheduled at reflects the sleep time
420
+ request_info.timings.scheduled_at = target_start
421
+
422
+ # Process the request with the backend
423
+ request_info.timings.resolve_start = time.time()
424
+ self._send_update("in_progress", None, request, request_info)
425
+
400
426
  def _send_update(
401
427
  self,
402
428
  new_status: Literal[
@@ -84,7 +84,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
84
84
  backend: BackendInterface[RequestT, ResponseT],
85
85
  strategy: SchedulingStrategy,
86
86
  startup_duration: float,
87
- **constraints: dict[str, Constraint],
87
+ **constraints: Constraint,
88
88
  ):
89
89
  """
90
90
  Initialize a worker process group for distributed request processing.
@@ -232,7 +232,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
232
232
  worker_index=rank,
233
233
  max_buffer_send_size=None,
234
234
  max_buffer_receive_size=per_proc_max_buffer_size,
235
- ),
235
+ ), # The non-group worker lacks the SchedulerState type. Type err.
236
236
  backend=self.backend,
237
237
  strategy=self.strategy,
238
238
  async_limit=async_limit,
@@ -478,9 +478,9 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
478
478
  num_processes=len(processes),
479
479
  start_time=start_time,
480
480
  )
481
- self._queued_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
482
- self._pending_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
483
- self._processing_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
481
+ self._queued_request_ids: set[str] = set()
482
+ self._pending_request_ids: set[str] = set()
483
+ self._processing_request_ids: set[str] = set()
484
484
 
485
485
  def requests_generator(
486
486
  self, requests: Iterable[RequestT | MultiTurnRequestT[RequestT]]
@@ -517,11 +517,13 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
517
517
  )
518
518
  state_update = self._locked_update(request_info)
519
519
  request_info.timings.queued = time.time()
520
+ if self.messaging.buffer_receive_queue is None:
521
+ raise RuntimeError("buffer receive queue is None")
520
522
  self.messaging.buffer_receive_queue.sync_put(
521
523
  (None, request, request_info, state_update.state)
522
524
  )
523
525
 
524
- yield (request, request_info)
526
+ yield request, request_info
525
527
 
526
528
  if state_update.stop_queueing:
527
529
  self.stop_send_requests_event.set()
@@ -530,8 +532,8 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
530
532
  # Reached the end, inject a RequestsExhaustedConstraint to record
531
533
  self._locked_update(
532
534
  info=None,
533
- requests_exhausted={
534
- "requests_exhausted": RequestsExhaustedConstraint(
535
+ add_constraints={
536
+ "requests_exhausted": RequestsExhaustedConstraint( # type: ignore[dict-item]
535
537
  num_requests=count
536
538
  )
537
539
  },
@@ -610,10 +612,10 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
610
612
  def _locked_update(
611
613
  self,
612
614
  info: RequestInfo | None = None,
613
- **add_constraints: dict[str, Constraint],
615
+ add_constraints: dict[str, Constraint] | None = None,
614
616
  ) -> _StateUpdate:
615
617
  with self._update_lock:
616
- if add_constraints:
618
+ if add_constraints is not None:
617
619
  self.constraints.update(add_constraints)
618
620
 
619
621
  if info is not None:
@@ -631,34 +633,34 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
631
633
 
632
634
  def _update_state_request_counts(self, info: RequestInfo):
633
635
  if info.status == "queued":
634
- self._queued_requests.add(info.request_id)
635
- self._state.queued_requests = len(self._queued_requests)
636
+ self._queued_request_ids.add(info.request_id)
637
+ self._state.queued_requests = len(self._queued_request_ids)
636
638
  self._state.created_requests += 1
637
639
  elif info.status == "pending":
638
- self._queued_requests.remove(info.request_id)
639
- self._state.queued_requests = len(self._queued_requests)
640
- self._pending_requests.add(info.request_id)
641
- self._state.pending_requests = len(self._pending_requests)
640
+ self._queued_request_ids.remove(info.request_id)
641
+ self._state.queued_requests = len(self._queued_request_ids)
642
+ self._pending_request_ids.add(info.request_id)
643
+ self._state.pending_requests = len(self._pending_request_ids)
642
644
  elif info.status == "in_progress":
643
- self._pending_requests.remove(info.request_id)
644
- self._state.pending_requests = len(self._pending_requests)
645
- self._processing_requests.add(info.request_id)
646
- self._state.processing_requests = len(self._processing_requests)
645
+ self._pending_request_ids.remove(info.request_id)
646
+ self._state.pending_requests = len(self._pending_request_ids)
647
+ self._processing_request_ids.add(info.request_id)
648
+ self._state.processing_requests = len(self._processing_request_ids)
647
649
  elif info.status == "completed":
648
- self._processing_requests.remove(info.request_id)
649
- self._state.processing_requests = len(self._processing_requests)
650
+ self._processing_request_ids.remove(info.request_id)
651
+ self._state.processing_requests = len(self._processing_request_ids)
650
652
  self._state.processed_requests += 1
651
653
  self._state.successful_requests += 1
652
654
  elif info.status in ("errored", "cancelled"):
653
- if info.request_id in self._queued_requests:
654
- self._queued_requests.remove(info.request_id)
655
- self._state.queued_requests = len(self._queued_requests)
656
- elif info.request_id in self._pending_requests:
657
- self._pending_requests.remove(info.request_id)
658
- self._state.pending_requests = len(self._pending_requests)
659
- elif info.request_id in self._processing_requests:
660
- self._processing_requests.remove(info.request_id)
661
- self._state.processing_requests = len(self._processing_requests)
655
+ if info.request_id in self._queued_request_ids:
656
+ self._queued_request_ids.remove(info.request_id)
657
+ self._state.queued_requests = len(self._queued_request_ids)
658
+ elif info.request_id in self._pending_request_ids:
659
+ self._pending_request_ids.remove(info.request_id)
660
+ self._state.pending_requests = len(self._pending_request_ids)
661
+ elif info.request_id in self._processing_request_ids:
662
+ self._processing_request_ids.remove(info.request_id)
663
+ self._state.processing_requests = len(self._processing_request_ids)
662
664
 
663
665
  self._state.processed_requests += 1
664
666
  self._state.errored_requests += 1 if info.status == "errored" else 0
@@ -0,0 +1,6 @@
1
+ version = "0.4.0a173"
2
+ build_type = "nightly"
3
+ build_iteration = "173"
4
+ git_commit = "585917501a31f053ff01d695ceeb6d1f23007cf0"
5
+ git_branch = "main"
6
+ git_last_tag = "v0.3.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.4.0a169
3
+ Version: 0.4.0a173
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License: Apache-2.0
@@ -1,6 +0,0 @@
1
- version = "0.4.0a169"
2
- build_type = "nightly"
3
- build_iteration = "169"
4
- git_commit = "dd219f1aeda5997644baee0fd9efef286a9fa2c1"
5
- git_branch = "main"
6
- git_last_tag = "v0.3.1"
File without changes
File without changes
File without changes
File without changes
File without changes