guidellm 0.4.0a155__py3-none-any.whl → 0.4.0a173__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (32) hide show
  1. guidellm/__main__.py +4 -3
  2. guidellm/benchmark/benchmarker.py +2 -0
  3. guidellm/benchmark/entrypoints.py +1 -0
  4. guidellm/benchmark/output.py +3 -1
  5. guidellm/benchmark/schemas.py +2 -1
  6. guidellm/data/deserializers/deserializer.py +79 -44
  7. guidellm/data/deserializers/file.py +14 -14
  8. guidellm/data/deserializers/huggingface.py +1 -1
  9. guidellm/data/deserializers/memory.py +20 -18
  10. guidellm/data/deserializers/synthetic.py +18 -16
  11. guidellm/data/loaders.py +7 -3
  12. guidellm/data/preprocessors/formatters.py +24 -32
  13. guidellm/data/preprocessors/mappers.py +2 -2
  14. guidellm/data/preprocessors/preprocessor.py +5 -3
  15. guidellm/data/processor.py +3 -2
  16. guidellm/data/utils/__init__.py +0 -4
  17. guidellm/data/utils/dataset.py +2 -2
  18. guidellm/scheduler/constraints.py +1 -3
  19. guidellm/scheduler/environments.py +2 -2
  20. guidellm/scheduler/scheduler.py +1 -1
  21. guidellm/scheduler/strategies.py +31 -4
  22. guidellm/scheduler/worker.py +56 -30
  23. guidellm/scheduler/worker_group.py +33 -31
  24. guidellm/schemas/request.py +10 -0
  25. guidellm/utils/cli.py +26 -1
  26. {guidellm-0.4.0a155.dist-info → guidellm-0.4.0a173.dist-info}/METADATA +1 -1
  27. {guidellm-0.4.0a155.dist-info → guidellm-0.4.0a173.dist-info}/RECORD +31 -32
  28. guidellm/data/utils/functions.py +0 -18
  29. {guidellm-0.4.0a155.dist-info → guidellm-0.4.0a173.dist-info}/WHEEL +0 -0
  30. {guidellm-0.4.0a155.dist-info → guidellm-0.4.0a173.dist-info}/entry_points.txt +0 -0
  31. {guidellm-0.4.0a155.dist-info → guidellm-0.4.0a173.dist-info}/licenses/LICENSE +0 -0
  32. {guidellm-0.4.0a155.dist-info → guidellm-0.4.0a173.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Protocol, Union, runtime_checkable
3
+ from typing import Any, Protocol, runtime_checkable
4
4
 
5
5
  from datasets import Dataset, IterableDataset
6
6
 
7
+ from guidellm.schemas import GenerationRequest
7
8
  from guidellm.utils import RegistryMixin
8
9
 
9
10
  __all__ = ["DataDependentPreprocessor", "DatasetPreprocessor", "PreprocessorRegistry"]
@@ -11,7 +12,8 @@ __all__ = ["DataDependentPreprocessor", "DatasetPreprocessor", "PreprocessorRegi
11
12
 
12
13
  @runtime_checkable
13
14
  class DatasetPreprocessor(Protocol):
14
- def __call__(self, item: dict[str, Any]) -> dict[str, Any]: ...
15
+ def __call__(self, item: dict[str, Any]) -> (
16
+ GenerationRequest | dict[str, Any]): ...
15
17
 
16
18
 
17
19
  @runtime_checkable
@@ -24,6 +26,6 @@ class DataDependentPreprocessor(DatasetPreprocessor, Protocol):
24
26
 
25
27
 
26
28
  class PreprocessorRegistry(
27
- RegistryMixin[Union[DataDependentPreprocessor, type[DataDependentPreprocessor]]]
29
+ RegistryMixin[DataDependentPreprocessor | type[DataDependentPreprocessor]]
28
30
  ):
29
31
  pass
@@ -23,8 +23,9 @@ class ProcessorFactory:
23
23
  if isinstance(self.processor, PreTrainedTokenizerBase):
24
24
  return self.processor
25
25
  else:
26
- self.processor = AutoTokenizer.from_pretrained(
26
+ from_pretrained = AutoTokenizer.from_pretrained(
27
27
  self.processor,
28
28
  **(self.processor_args or {}),
29
29
  )
30
- return self.processor
30
+ self.processor = from_pretrained
31
+ return from_pretrained
@@ -1,10 +1,6 @@
1
1
  from .dataset import DEFAULT_SPLITS, resolve_dataset_split
2
- from .functions import (
3
- text_stats,
4
- )
5
2
 
6
3
  __all__ = [
7
4
  "DEFAULT_SPLITS",
8
5
  "resolve_dataset_split",
9
- "text_stats",
10
6
  ]
@@ -73,7 +73,7 @@ def resolve_dataset_split(
73
73
  dataset: Dataset | IterableDataset | DatasetDict | IterableDatasetDict,
74
74
  split: str | None = None,
75
75
  ) -> Dataset | IterableDataset:
76
- if split is not None and isinstance(dataset, (DatasetDict, IterableDatasetDict)):
76
+ if split is not None and isinstance(dataset, DatasetDict | IterableDatasetDict):
77
77
  if split in dataset:
78
78
  return dataset[split]
79
79
 
@@ -83,7 +83,7 @@ def resolve_dataset_split(
83
83
  f"Requested split '{split}' but dataset has no splits: {dataset}."
84
84
  )
85
85
 
86
- if isinstance(dataset, (Dataset, IterableDataset)):
86
+ if isinstance(dataset, Dataset | IterableDataset):
87
87
  return dataset
88
88
 
89
89
  for _, default_splits in DEFAULT_SPLITS.items():
@@ -1005,9 +1005,7 @@ class RequestsExhaustedConstraint(StandardBaseModel, InfoMixin):
1005
1005
  return self.model_dump()
1006
1006
 
1007
1007
  def __call__(
1008
- self,
1009
- state: SchedulerState,
1010
- request_info: RequestInfo, # noqa: ARG002
1008
+ self, state: SchedulerState, _request: RequestInfo
1011
1009
  ) -> SchedulerUpdateAction:
1012
1010
  create_exceeded = state.created_requests >= self.num_requests
1013
1011
  processed_exceeded = state.processed_requests >= self.num_requests
@@ -84,7 +84,7 @@ class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
84
84
  async def update_run_iteration(
85
85
  self,
86
86
  response: ResponseT | None,
87
- request: RequestT,
87
+ request: RequestT | MultiTurnRequestT[RequestT],
88
88
  request_info: RequestInfo,
89
89
  state: SchedulerState,
90
90
  ):
@@ -201,7 +201,7 @@ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
201
201
  async def update_run_iteration(
202
202
  self,
203
203
  response: ResponseT | None,
204
- request: RequestT,
204
+ request: RequestT | MultiTurnRequestT[RequestT],
205
205
  request_info: RequestInfo,
206
206
  state: SchedulerState,
207
207
  ):
@@ -69,7 +69,7 @@ class Scheduler(
69
69
  ) -> AsyncIterator[
70
70
  tuple[
71
71
  ResponseT | None,
72
- RequestT,
72
+ RequestT | MultiTurnRequestT[RequestT],
73
73
  RequestInfo,
74
74
  SchedulerState,
75
75
  ]
@@ -70,8 +70,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
70
70
  description="Number of worker processes to use for this strategy",
71
71
  ge=0,
72
72
  )
73
- max_concurrency: int = Field(
74
- default=0,
73
+ max_concurrency: int | None = Field(
74
+ default=None,
75
75
  description="Maximum number of concurrent requests to allow",
76
76
  ge=0,
77
77
  )
@@ -122,8 +122,8 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
122
122
  self.startup_duration = startup_duration
123
123
 
124
124
  self._processes_request_index = Value("i", 0)
125
- self._processes_lock = Lock()
126
125
  self._processes_start_time = Value("d", -1.0)
126
+ self._processes_lock = Lock()
127
127
 
128
128
  def init_processes_start(self, start_time: float):
129
129
  """
@@ -137,6 +137,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
137
137
  "SchedulingStrategy init_processes_start called before "
138
138
  "init_processes_timings"
139
139
  )
140
+ if self._processes_start_time is None:
141
+ raise RuntimeError(
142
+ "_processes_lock is not None but _processes_start_time is None"
143
+ )
140
144
 
141
145
  with self._processes_lock:
142
146
  self._processes_start_time.value = start_time
@@ -153,6 +157,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
153
157
  "SchedulingStrategy get_processes_start_time called before "
154
158
  "init_processes_timings"
155
159
  )
160
+ if self._processes_start_time is None:
161
+ raise RuntimeError(
162
+ "_processes_lock is not None but _processes_start_time is None"
163
+ )
156
164
 
157
165
  while self._cached_processes_start_time is None:
158
166
  with self._processes_lock:
@@ -175,6 +183,10 @@ class SchedulingStrategy(PydanticClassRegistryMixin["SchedulingStrategy"], InfoM
175
183
  "SchedulingStrategy next_request_index called before "
176
184
  "init_processes_timings"
177
185
  )
186
+ if self._processes_request_index is None:
187
+ raise RuntimeError(
188
+ "_processes_lock is not None but _processes_request_index is None"
189
+ )
178
190
 
179
191
  with self._processes_lock:
180
192
  self._processes_request_index.value += 1
@@ -369,7 +381,8 @@ class ThroughputStrategy(SchedulingStrategy):
369
381
  start_time = await self.get_processes_start_time()
370
382
 
371
383
  if (
372
- self.startup_duration > 0
384
+ self.max_concurrency is not None
385
+ and self.startup_duration > 0
373
386
  and (time.time() - start_time) < self.startup_duration
374
387
  and (current_index := self.next_request_index()) <= self.max_concurrency
375
388
  ):
@@ -477,6 +490,8 @@ class AsyncPoissonStrategy(ThroughputStrategy):
477
490
  :param startup_duration: Duration in seconds for request startup ramping
478
491
  """
479
492
  super().init_processes_timings(worker_count, max_concurrency, startup_duration)
493
+ if self._processes_lock is None:
494
+ raise RuntimeError("_processes_lock is None in init_processes_timings")
480
495
  with self._processes_lock:
481
496
  self._offset = Value("d", -1.0)
482
497
 
@@ -487,6 +502,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
487
502
  :param start_time: Unix timestamp when request processing should begin
488
503
  """
489
504
  ThroughputStrategy.init_processes_start(self, start_time)
505
+
506
+ if self._processes_lock is None:
507
+ raise RuntimeError("_processes_lock is None in init_processes_start")
508
+ if self._offset is None:
509
+ raise RuntimeError("_offset is None in init_processes_start; was "
510
+ "init_processes_timings not called?")
490
511
  with self._processes_lock:
491
512
  self._offset.value = start_time
492
513
 
@@ -505,6 +526,12 @@ class AsyncPoissonStrategy(ThroughputStrategy):
505
526
 
506
527
  next_delay = self._random.expovariate(self.rate)
507
528
 
529
+ if self._processes_lock is None:
530
+ raise RuntimeError("_processes_lock is None in next_request_time; was "
531
+ "init_processes_timings not called?")
532
+ if self._offset is None:
533
+ raise RuntimeError("_offset is None in next_request_time; was "
534
+ "init_processes_timings not called?")
508
535
  with self._processes_lock:
509
536
  self._offset.value += next_delay
510
537
 
@@ -23,11 +23,9 @@ try:
23
23
  bool, "Flag indicating uvloop availability for event loop optimization"
24
24
  ] = True
25
25
  except ImportError:
26
- uvloop = None
26
+ uvloop = None # type: ignore[assignment] # Optional dependency
27
27
 
28
- HAS_UVLOOP: Annotated[
29
- bool, "Flag indicating uvloop availability for event loop optimization"
30
- ] = False
28
+ HAS_UVLOOP = False
31
29
 
32
30
 
33
31
  from guidellm.scheduler.schemas import (
@@ -84,6 +82,10 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
84
82
  RequestT | MultiTurnRequestT[RequestT],
85
83
  RequestInfo,
86
84
  ],
85
+ tuple[
86
+ RequestT | MultiTurnRequestT[RequestT],
87
+ RequestInfo,
88
+ ],
87
89
  ],
88
90
  backend: BackendInterface[RequestT, ResponseT],
89
91
  strategy: SchedulingStrategy,
@@ -201,8 +203,11 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
201
203
 
202
204
  async def _stop_monitor(
203
205
  self,
204
- ) -> Literal["error_event", "shutdown_event"]:
205
- """Monitor shutdown and error events for worker termination."""
206
+ ) -> None:
207
+ """
208
+ Monitor shutdown and error events for worker termination.
209
+ :raises RuntimeError if the work process received an error signal.
210
+ """
206
211
  exit_key = await wait_for_sync_objects(
207
212
  {
208
213
  "error_event": self.error_event,
@@ -322,7 +327,7 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
322
327
  """Cancel all remaining queued requests until worker process terminates."""
323
328
  while True:
324
329
  try:
325
- request: RequestT
330
+ request: RequestT | MultiTurnRequestT[RequestT]
326
331
  request_info: RequestInfo
327
332
  request, request_info = await self.messaging.get(
328
333
  timeout=self.messaging.poll_interval
@@ -350,31 +355,19 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
350
355
 
351
356
  try:
352
357
  # Pull request from the queue, update state, and send "pending" update
353
- request, request_info = await self.messaging.get()
354
- request_info.timings.dequeued = time.time()
355
- request_info.scheduler_node_id = self.messaging.worker_index or -1
356
- request_info.timings.targeted_start = target_start
357
- self._send_update("pending", response, request, request_info)
358
-
359
- if request is None or request_info is None:
360
- raise RuntimeError("Received invalid request or request info")
361
- if isinstance(request, list | tuple):
362
- raise NotImplementedError("Multi-turn requests are not yet supported")
363
-
364
- # Schedule the request
365
- current_time = time.time()
366
- request_info.timings.scheduled_at = current_time
367
- if target_start > current_time:
368
- await asyncio.sleep(target_start - current_time)
369
- # Adapt delay so that scheduled at reflects the sleep time
370
- request_info.timings.scheduled_at = target_start
371
-
372
- # Process the request with the backend
373
- request_info.timings.resolve_start = time.time()
374
- self._send_update("in_progress", response, request, request_info)
375
- async for resp, info in self.backend.resolve(request, request_info, None):
358
+ request, request_info = await self._dequeue_next_request(target_start)
359
+
360
+ # Schedule the request and send "in_progress" update
361
+ await self._schedule_request(request, request_info, target_start)
362
+
363
+ async for resp, info in self.backend.resolve( # type: ignore[attr-defined]
364
+ request, request_info, None
365
+ ):
366
+
376
367
  response = resp
377
368
  request_info = info
369
+ if request_info is None:
370
+ raise RuntimeError("Received invalid request info from backend")
378
371
 
379
372
  # Complete the request
380
373
  request_info.timings.resolve_end = time.time()
@@ -397,6 +390,39 @@ class WorkerProcess(Generic[RequestT, ResponseT]):
397
390
  if request_info is not None:
398
391
  self.strategy.request_completed(request_info)
399
392
 
393
+ async def _dequeue_next_request(
394
+ self, target_start: float
395
+ ) -> tuple[RequestT, RequestInfo]:
396
+ request, request_info = await self.messaging.get()
397
+ dequeued_time = time.time() # Ensure accurate dequeue timing
398
+ if request is None or request_info is None:
399
+ raise RuntimeError("Received invalid request or request info")
400
+ if isinstance(request, list | tuple):
401
+ raise NotImplementedError("Multi-turn requests are not yet supported")
402
+
403
+ request_info.timings.dequeued = dequeued_time
404
+ request_info.scheduler_node_id = self.messaging.worker_index or -1
405
+ request_info.timings.targeted_start = target_start
406
+ self._send_update("pending", None, request, request_info)
407
+ return request, request_info
408
+
409
+ async def _schedule_request(
410
+ self,
411
+ request: RequestT,
412
+ request_info: RequestInfo,
413
+ target_start: float
414
+ ):
415
+ current_time = time.time()
416
+ request_info.timings.scheduled_at = current_time
417
+ if target_start > current_time:
418
+ await asyncio.sleep(target_start - current_time)
419
+ # Adapt delay so that scheduled at reflects the sleep time
420
+ request_info.timings.scheduled_at = target_start
421
+
422
+ # Process the request with the backend
423
+ request_info.timings.resolve_start = time.time()
424
+ self._send_update("in_progress", None, request, request_info)
425
+
400
426
  def _send_update(
401
427
  self,
402
428
  new_status: Literal[
@@ -84,7 +84,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
84
84
  backend: BackendInterface[RequestT, ResponseT],
85
85
  strategy: SchedulingStrategy,
86
86
  startup_duration: float,
87
- **constraints: dict[str, Constraint],
87
+ **constraints: Constraint,
88
88
  ):
89
89
  """
90
90
  Initialize a worker process group for distributed request processing.
@@ -232,7 +232,7 @@ class WorkerProcessGroup(Generic[RequestT, ResponseT]):
232
232
  worker_index=rank,
233
233
  max_buffer_send_size=None,
234
234
  max_buffer_receive_size=per_proc_max_buffer_size,
235
- ),
235
+ ), # The non-group worker lacks the SchedulerState type. Type err.
236
236
  backend=self.backend,
237
237
  strategy=self.strategy,
238
238
  async_limit=async_limit,
@@ -478,9 +478,9 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
478
478
  num_processes=len(processes),
479
479
  start_time=start_time,
480
480
  )
481
- self._queued_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
482
- self._pending_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
483
- self._processing_requests: set[RequestT | MultiTurnRequestT[RequestT]] = set()
481
+ self._queued_request_ids: set[str] = set()
482
+ self._pending_request_ids: set[str] = set()
483
+ self._processing_request_ids: set[str] = set()
484
484
 
485
485
  def requests_generator(
486
486
  self, requests: Iterable[RequestT | MultiTurnRequestT[RequestT]]
@@ -517,11 +517,13 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
517
517
  )
518
518
  state_update = self._locked_update(request_info)
519
519
  request_info.timings.queued = time.time()
520
+ if self.messaging.buffer_receive_queue is None:
521
+ raise RuntimeError("buffer receive queue is None")
520
522
  self.messaging.buffer_receive_queue.sync_put(
521
523
  (None, request, request_info, state_update.state)
522
524
  )
523
525
 
524
- yield (request, request_info)
526
+ yield request, request_info
525
527
 
526
528
  if state_update.stop_queueing:
527
529
  self.stop_send_requests_event.set()
@@ -530,8 +532,8 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
530
532
  # Reached the end, inject a RequestsExhaustedConstraint to record
531
533
  self._locked_update(
532
534
  info=None,
533
- requests_exhausted={
534
- "requests_exhausted": RequestsExhaustedConstraint(
535
+ add_constraints={
536
+ "requests_exhausted": RequestsExhaustedConstraint( # type: ignore[dict-item]
535
537
  num_requests=count
536
538
  )
537
539
  },
@@ -610,10 +612,10 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
610
612
  def _locked_update(
611
613
  self,
612
614
  info: RequestInfo | None = None,
613
- **add_constraints: dict[str, Constraint],
615
+ add_constraints: dict[str, Constraint] | None = None,
614
616
  ) -> _StateUpdate:
615
617
  with self._update_lock:
616
- if add_constraints:
618
+ if add_constraints is not None:
617
619
  self.constraints.update(add_constraints)
618
620
 
619
621
  if info is not None:
@@ -631,34 +633,34 @@ class WorkerGroupState(Generic[RequestT, ResponseT]):
631
633
 
632
634
  def _update_state_request_counts(self, info: RequestInfo):
633
635
  if info.status == "queued":
634
- self._queued_requests.add(info.request_id)
635
- self._state.queued_requests = len(self._queued_requests)
636
+ self._queued_request_ids.add(info.request_id)
637
+ self._state.queued_requests = len(self._queued_request_ids)
636
638
  self._state.created_requests += 1
637
639
  elif info.status == "pending":
638
- self._queued_requests.remove(info.request_id)
639
- self._state.queued_requests = len(self._queued_requests)
640
- self._pending_requests.add(info.request_id)
641
- self._state.pending_requests = len(self._pending_requests)
640
+ self._queued_request_ids.remove(info.request_id)
641
+ self._state.queued_requests = len(self._queued_request_ids)
642
+ self._pending_request_ids.add(info.request_id)
643
+ self._state.pending_requests = len(self._pending_request_ids)
642
644
  elif info.status == "in_progress":
643
- self._pending_requests.remove(info.request_id)
644
- self._state.pending_requests = len(self._pending_requests)
645
- self._processing_requests.add(info.request_id)
646
- self._state.processing_requests = len(self._processing_requests)
645
+ self._pending_request_ids.remove(info.request_id)
646
+ self._state.pending_requests = len(self._pending_request_ids)
647
+ self._processing_request_ids.add(info.request_id)
648
+ self._state.processing_requests = len(self._processing_request_ids)
647
649
  elif info.status == "completed":
648
- self._processing_requests.remove(info.request_id)
649
- self._state.processing_requests = len(self._processing_requests)
650
+ self._processing_request_ids.remove(info.request_id)
651
+ self._state.processing_requests = len(self._processing_request_ids)
650
652
  self._state.processed_requests += 1
651
653
  self._state.successful_requests += 1
652
654
  elif info.status in ("errored", "cancelled"):
653
- if info.request_id in self._queued_requests:
654
- self._queued_requests.remove(info.request_id)
655
- self._state.queued_requests = len(self._queued_requests)
656
- elif info.request_id in self._pending_requests:
657
- self._pending_requests.remove(info.request_id)
658
- self._state.pending_requests = len(self._pending_requests)
659
- elif info.request_id in self._processing_requests:
660
- self._processing_requests.remove(info.request_id)
661
- self._state.processing_requests = len(self._processing_requests)
655
+ if info.request_id in self._queued_request_ids:
656
+ self._queued_request_ids.remove(info.request_id)
657
+ self._state.queued_requests = len(self._queued_request_ids)
658
+ elif info.request_id in self._pending_request_ids:
659
+ self._pending_request_ids.remove(info.request_id)
660
+ self._state.pending_requests = len(self._pending_request_ids)
661
+ elif info.request_id in self._processing_request_ids:
662
+ self._processing_request_ids.remove(info.request_id)
663
+ self._state.processing_requests = len(self._processing_request_ids)
662
664
 
663
665
  self._state.processed_requests += 1
664
666
  self._state.errored_requests += 1 if info.status == "errored" else 0
@@ -169,6 +169,16 @@ class UsageMetrics(StandardBaseDict):
169
169
  self.video_tokens or 0
170
170
  ) + (self.audio_tokens or 0) or None
171
171
 
172
+ def add_text_metrics(self, text):
173
+ """
174
+ Adds the metrics from the given text to the fields
175
+ `text_characters` and `text_words`.
176
+
177
+ :param text: Text to add metrics from
178
+ """
179
+ self.text_characters = (self.text_characters or 0) + len(text)
180
+ self.text_words = (self.text_words or 0) + len(text.split())
181
+
172
182
 
173
183
  class GenerationRequest(StandardBaseModel):
174
184
  """
guidellm/utils/cli.py CHANGED
@@ -3,9 +3,34 @@ from typing import Any
3
3
 
4
4
  import click
5
5
 
6
- __all__ = ["Union", "format_list_arg", "parse_json", "set_if_not_default"]
6
+ __all__ = [
7
+ "Union",
8
+ "format_list_arg",
9
+ "parse_json",
10
+ "parse_list_floats",
11
+ "set_if_not_default",
12
+ ]
7
13
 
8
14
 
15
+ def parse_list_floats(ctx, param, value): # noqa: ARG001
16
+ """
17
+ Callback to parse a comma-separated string into a list of floats.
18
+ """
19
+ # This callback only runs if the --rate option is provided by the user.
20
+ # If it's not, 'value' will be None, and Click will use the 'default'.
21
+ if value is None:
22
+ return None # Keep the default
23
+
24
+ try:
25
+ # Split by comma, strip any whitespace, and convert to float
26
+ return [float(item.strip()) for item in value.split(",")]
27
+ except ValueError as e:
28
+ # Raise a Click error if any part isn't a valid float
29
+ raise click.BadParameter(
30
+ f"Value '{value}' is not a valid comma-separated list "
31
+ f"of floats/ints. Error: {e}"
32
+ ) from e
33
+
9
34
  def parse_json(ctx, param, value): # noqa: ARG001
10
35
  if value is None or value == [None]:
11
36
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.4.0a155
3
+ Version: 0.4.0a173
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  guidellm/__init__.py,sha256=1zl-PT9IZJvDfdLSMviPLzhVE3_ZXpizmc9s7UWa6kQ,1206
2
- guidellm/__main__.py,sha256=bzKBbZP4qXx9u5QhYZTp6tWqqf5NpcGfTxAUV3giKbA,20512
2
+ guidellm/__main__.py,sha256=uU5K-QV7rHBARdSTwsNRAPLVoTVT5NQ6DoHx7jssZyc,20554
3
3
  guidellm/logger.py,sha256=6qGOeff8hOJF6p57Zietq6qr64N7E40CJSQSQcUFgKc,2912
4
4
  guidellm/settings.py,sha256=C4miDtWaI5lJ4NBXxfuUitt5-6_FCzZPzM1Bjie9XoA,7283
5
5
  guidellm/version.py,sha256=NIzyWA7lNdSpf2MtPJuOjvW5h6E9nGDea2G4nGFDbgY,127
@@ -8,33 +8,32 @@ guidellm/backends/backend.py,sha256=Qz7z3s6rWwdYGVC-CbPvSFKWSsPiJsSFKBFyfvBG6rY,
8
8
  guidellm/backends/openai.py,sha256=0At-DMt3_kD0Qa788j_HJ1BcFiUh-SFReoYyaTy8k1g,13007
9
9
  guidellm/backends/response_handlers.py,sha256=jwoAGzy-BXxYzMroq08dEUA5ooWoscRpGdU5I4cCEUw,17136
10
10
  guidellm/benchmark/__init__.py,sha256=E9g3x0Peopsapw6Pkk9AAthUv3tIOaH59Ai6_92xnzM,2337
11
- guidellm/benchmark/benchmarker.py,sha256=I3pgAUQSnp1TFrDDGqZ5El-ZQRYyLQer4cfEyI16NVQ,6444
12
- guidellm/benchmark/entrypoints.py,sha256=hbuluEL4Hlj5IOgaagANyzWauMp8kxIv83UytXQYLDI,17938
13
- guidellm/benchmark/output.py,sha256=dLlZFCOaAvNZmdsMGzXFdPFKIvM_H63uurxWh4dj69U,27243
11
+ guidellm/benchmark/benchmarker.py,sha256=wZNStFlAcygFBPPA5aee9RipXr-jq-GiH8dUUrijwvo,6500
12
+ guidellm/benchmark/entrypoints.py,sha256=U_0JvZYCkQ1Z-qkF8tN_JeuhuGn8mINScwi819bdzl8,17962
13
+ guidellm/benchmark/output.py,sha256=jHa7u6wTh_YSBdO2oDo47079KReXHZ-AKB6zgo8SMvg,27308
14
14
  guidellm/benchmark/profile.py,sha256=RvQdmVLNLU-V8U8xIXm3vH8tY7Hp_4jNPQe5dombj8g,24007
15
15
  guidellm/benchmark/progress.py,sha256=oZqZZ_vInmifBNd490ZTgcCjaGy2_slViEABSWDJgHI,25976
16
- guidellm/benchmark/schemas.py,sha256=pULZ0F0HbubtRDQBrU9XhJqY0me2GK8IdAXjcqLaRk0,81246
16
+ guidellm/benchmark/schemas.py,sha256=eC6WXTXoY3-6Q4SSBLs7VtgfLSilwP9FhsHdRH-6fr4,81249
17
17
  guidellm/benchmark/scenarios/__init__.py,sha256=SmaYf8hfByJU4LVJ7pZKNxJPYBObl7UKpoaJEmLPdTI,1276
18
18
  guidellm/benchmark/scenarios/chat.json,sha256=4H_ByPCv_9azHn6iTxCY3FfpoUtlbShDPdNyzDwHJVQ,226
19
19
  guidellm/benchmark/scenarios/rag.json,sha256=BIpifJoAtWgB3NRRYK51ZuCH4Zvh1OeBFanB7vcxS-E,231
20
20
  guidellm/data/__init__.py,sha256=0-7B9vBgY6QHT4svxnBhGN4DoA4jE_9HZxOgndyOiUM,743
21
21
  guidellm/data/collators.py,sha256=j4OLGqwbt4sNnTqE8iSbe11qmgJBHnyWjipNeEz1SCk,445
22
- guidellm/data/loaders.py,sha256=OcZh2-Mlt4ZQ6xAI5tpRp3THiYMBGT0DZEQrYupjBdA,5105
23
- guidellm/data/processor.py,sha256=8IzCiAr3n-6RAFZrr5dMfsh-7hpK2xF9s_xSQVRK0Mo,807
22
+ guidellm/data/loaders.py,sha256=4JxpnOXD-Oju9TcyBumW2JRLFqMo8-EIHN5DpKZJmCA,5413
23
+ guidellm/data/processor.py,sha256=HjSdXKJ67Y2tqcxNoP4LWkefNDLzuN_5nM8MXW_RVec,854
24
24
  guidellm/data/schemas.py,sha256=6KUv2OPW_f369SzEhyPBXPDMGoQtJzEPJalBCpp3f84,280
25
25
  guidellm/data/deserializers/__init__.py,sha256=NhfbgebOun2FgWTNPydto2f3LNSTlOqdaxeFpDvQUhY,1608
26
- guidellm/data/deserializers/deserializer.py,sha256=tNmMFUbCsIFnaWJ6vEXrie0jAUKlVA4Gyl7hhnetiEk,3805
27
- guidellm/data/deserializers/file.py,sha256=6XVc8MrHnHAc06hVXb_X0TH2DAL9S-oIXcK2HPMj9SI,7501
28
- guidellm/data/deserializers/huggingface.py,sha256=p4eN_jjuM_ChtyLcQy4QrTEW-tpsf3mIfYm3GGK-1JQ,2908
29
- guidellm/data/deserializers/memory.py,sha256=4u--QmbxyKgU2asNFo-a7DjKLSlH0ggSb6T8axVsevM,6621
30
- guidellm/data/deserializers/synthetic.py,sha256=tFs3Z6_ZwY-UdScJWXbswAZ31fVW8p9ISuOQM1qCvZM,12069
26
+ guidellm/data/deserializers/deserializer.py,sha256=SOCtXik1fVeS7yHgNFMihkq3RmqA-klgmLJULVmSjFU,5007
27
+ guidellm/data/deserializers/file.py,sha256=PzFOJcPuXrUM-OK6EbDIKhzEXDPp7X4xfe_wHZxXqKw,7445
28
+ guidellm/data/deserializers/huggingface.py,sha256=uk2WBH9WJ8csaRrw6wXQ5Xiz_pj_fN5c4jDV9pWf1F0,2954
29
+ guidellm/data/deserializers/memory.py,sha256=F6o2JwIUgcZHdeRkT051AS76i6wWlIw-XGH09_pOqDs,6670
30
+ guidellm/data/deserializers/synthetic.py,sha256=Gcx39gwW0ZvFQSplMfT4ULXfGAVp2v7YiByKmRH5C7Y,12188
31
31
  guidellm/data/preprocessors/__init__.py,sha256=khp1-m5EqJ6I40qFAYVv71LncrEXzKBmRocxQG5-ZuE,757
32
- guidellm/data/preprocessors/formatters.py,sha256=DV_-29rFuSqV7yyQETJ92FBPG9yrkOacEE1nhxXRVyc,14764
33
- guidellm/data/preprocessors/mappers.py,sha256=nn2zXkabgv0NVT4Iods-cJ3UGsdCpCsqDi81b7K9M_k,6769
34
- guidellm/data/preprocessors/preprocessor.py,sha256=7_9qezg820_JqEPizoIlGtMxZgEox17V2MxUHfkz5J8,747
35
- guidellm/data/utils/__init__.py,sha256=lsVIrDXiZgLXdGDeNqm4y5Ilai6jiMOpxVhyFap5ocA,186
36
- guidellm/data/utils/dataset.py,sha256=8VO7n_6F4ARSXitvzOCngtO-WokNfFb25lA-mijS7UE,2325
37
- guidellm/data/utils/functions.py,sha256=cuNCTzhiqFUCBpvwwVMgBqQORGz4q1XS3FUfXdxw-gQ,390
32
+ guidellm/data/preprocessors/formatters.py,sha256=F5BHtJZ6PdmevS9LI6e9TJPwUKnuSsZbt7qS8n2H_eM,14078
33
+ guidellm/data/preprocessors/mappers.py,sha256=7UBdRF2cdADqPbsri_1Mv3FhsQLJtUoIe_lSBV9owEQ,6715
34
+ guidellm/data/preprocessors/preprocessor.py,sha256=OqzAaabEG0v3VSxBfKEVda3ZnTmVjn4OesB2AOC9rhA,812
35
+ guidellm/data/utils/__init__.py,sha256=A8cJqUC7UVSr3upkkD6PzONGQUkjSV9HhMp31NJbI7E,125
36
+ guidellm/data/utils/dataset.py,sha256=ZaKlgGM_L6gcHghAfo5vG0NaHzPtpDWHddD1KX7E7_c,2323
38
37
  guidellm/extras/__init__.py,sha256=bNtt6CNDhwMM5XlL1q74j_df-1xoXavTShB05LjDYMw,96
39
38
  guidellm/extras/audio.py,sha256=ECDK5IFFBhfae1UQrOGGQCE_7wSCuTySo-TThpm4WfU,6421
40
39
  guidellm/extras/vision.py,sha256=hU8e7ryUnMZOT6_utR9GKhayvCXYPljeSwCx8S4-nIQ,7691
@@ -54,21 +53,21 @@ guidellm/presentation/builder.py,sha256=eSKsUUx7RbmsqgMRKxcaMuYqquanzS5moc8Uv9TI
54
53
  guidellm/presentation/data_models.py,sha256=1CjeHaevj2r4oHLcBQeDY0BQ9VXTol46KULr3F-ps48,7424
55
54
  guidellm/presentation/injector.py,sha256=mDo0hvrh4NE2c4RZK5GoegtzWVcvw_zEpvm5sRy7xGE,1834
56
55
  guidellm/scheduler/__init__.py,sha256=dj-RZDd5B6H6mt7dPAYGDayqa2k6ngqwfKgRUeCkwDw,2511
57
- guidellm/scheduler/constraints.py,sha256=a_cDJtmREU1rpbJ1UYxsdH9xW5fnOYd_uLj3VbfStb0,39590
58
- guidellm/scheduler/environments.py,sha256=W_kp2dICO7Z-NWoji9MwgDu0HjPDhUJNGXyll_LL45Y,8829
59
- guidellm/scheduler/scheduler.py,sha256=7y3PMAmkNwEYQt-I5ZPkilZZIaw1ad3Hs7z6ZgtQVBw,6986
56
+ guidellm/scheduler/constraints.py,sha256=MtuqMTtsiyDTzC7kEZzheqqvfgnsY45bhvaOmGDOrL8,39553
57
+ guidellm/scheduler/environments.py,sha256=eRQ9eLvQ61Yyo3nN_leYHTerla9mPuhr8tJD6_30XtM,8889
58
+ guidellm/scheduler/scheduler.py,sha256=_svO8hMJt4aNFHOjndi5Ac-GW7R7J1AVO9k3x_vzehY,7016
60
59
  guidellm/scheduler/schemas.py,sha256=iVxT0GaDK8q8ruuCL5D4046WrCf7X4v2KeZWfP8gdng,9461
61
- guidellm/scheduler/strategies.py,sha256=LfYogQCubT3eCKKEUQBWcsBh19kg8J5_9NZz77YAlGc,17796
62
- guidellm/scheduler/worker.py,sha256=4-RBw1znsR_Fv4O4agT1XEHETp7Fz9TOaOMBRIBm7-U,17157
63
- guidellm/scheduler/worker_group.py,sha256=czOxgbWJ0BKBqa0HOdqt09-n-xR4VUkiatJHsOCbJio,28597
60
+ guidellm/scheduler/strategies.py,sha256=hZbZfeUCTqIBnY30J7m7QwBMPiyLQ6LlqmRVNyrI_Vs,19182
61
+ guidellm/scheduler/worker.py,sha256=hXM0TpbwDakduRdGaSRShPnHDj-ZQJw2eVlRnagk8FM,17979
62
+ guidellm/scheduler/worker_group.py,sha256=l8T-IoitC--bBqhNhWhPM9rKofNfRlJX6HyzS2hBtBA,28799
64
63
  guidellm/schemas/__init__.py,sha256=4odN5dEqgRQaxsPpYLnyls0JAlDoEhbWnbYTnPZspN8,879
65
64
  guidellm/schemas/info.py,sha256=A_LVqpQteCQvf6XXPi02m1pYX2-vOtCEswLLTZfa9_Y,5678
66
- guidellm/schemas/request.py,sha256=K25Ph56nyhFMA9pzzX52uo4dG0K_Agb7paUvKsafAaI,7567
65
+ guidellm/schemas/request.py,sha256=YoOV6zmHFF9LoNZdF8810H9H3d9BEoNXufUzqTteH7g,7920
67
66
  guidellm/schemas/response.py,sha256=ROesx1rDI7g1jRoGbZjmGROazBxcT-3NVwQTIhwI2O4,4578
68
67
  guidellm/schemas/stats.py,sha256=4FPdMtoAVv-vQMLTaWYgIcqof7z6_nLHxYGV_lD1L9g,7507
69
68
  guidellm/utils/__init__.py,sha256=XGBV3fdETLihLn97_Sd0KM1B4hneoe3d1Oh0nMKObv8,3040
70
69
  guidellm/utils/auto_importer.py,sha256=rkraMx815TasixoFn0bwtp--7V7TxuEvfZUVFB8V5L0,3658
71
- guidellm/utils/cli.py,sha256=kw7A0HSTZaZDdAElHczo1WLCcL9DVlt13HG3a9mu_00,3545
70
+ guidellm/utils/cli.py,sha256=oMqLEw2pk9aCYQEuwmSsTO349Mil5Q6CKwocW3akmLo,4343
72
71
  guidellm/utils/colors.py,sha256=D0IGz8A346-Pt5qgnP3S5uV-VgngJoXbfToVCOna41k,175
73
72
  guidellm/utils/console.py,sha256=IC9vZ0PpwW9SxReZA3BGXyNRK20tdV0FDNuUmbMwUlE,4382
74
73
  guidellm/utils/default_group.py,sha256=iZ47bwRcUCxkX04Zdg0qpmqKtFg4P7lt5_hpw1CnKkA,4167
@@ -88,9 +87,9 @@ guidellm/utils/statistics.py,sha256=KzUYm4fVNVtDd6FRCRBnqYmFcea-9n0JKCAZyqeZLM8,
88
87
  guidellm/utils/synchronous.py,sha256=rRkWwbDf1ty607KUhDKsqV4HcdKU5o0-1s5hwdG-Hak,5209
89
88
  guidellm/utils/text.py,sha256=0K8yUEB4gzztevxzuiMXossSoHhvzcHoKqRhQYQdOrg,11644
90
89
  guidellm/utils/typing.py,sha256=jt0o7SRbDhnvrifR3l4hN8oL3uJNxl8aMnvaoABb-MU,1235
91
- guidellm-0.4.0a155.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
92
- guidellm-0.4.0a155.dist-info/METADATA,sha256=AEyyL1EHQ5t9SiABaf8IN5VxX3nB710-2pguCKQXw4o,21923
93
- guidellm-0.4.0a155.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
94
- guidellm-0.4.0a155.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
95
- guidellm-0.4.0a155.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
96
- guidellm-0.4.0a155.dist-info/RECORD,,
90
+ guidellm-0.4.0a173.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
91
+ guidellm-0.4.0a173.dist-info/METADATA,sha256=tgoCM_1UQSUh_WYNTeUKrZehj6HJUVJnwDzkmCRhEMY,21923
92
+ guidellm-0.4.0a173.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
93
+ guidellm-0.4.0a173.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
94
+ guidellm-0.4.0a173.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
95
+ guidellm-0.4.0a173.dist-info/RECORD,,