guidellm 0.5.2__tar.gz → 0.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {guidellm-0.5.2/src/guidellm.egg-info → guidellm-0.5.4}/PKG-INFO +7 -6
  2. {guidellm-0.5.2 → guidellm-0.5.4}/pyproject.toml +3 -3
  3. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/__main__.py +1 -1
  4. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/benchmarker.py +1 -1
  5. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/profiles.py +3 -1
  6. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/base.py +1 -1
  7. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/accumulator.py +15 -0
  8. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/entrypoints.py +1 -1
  9. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/synthetic.py +1 -1
  10. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/processor.py +1 -1
  11. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/tokenizer.py +6 -3
  12. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/utils.py +8 -4
  13. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/strategies.py +40 -4
  14. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/settings.py +3 -3
  15. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/hf_transformers.py +3 -1
  16. guidellm-0.5.4/src/guidellm/version.py +6 -0
  17. {guidellm-0.5.2 → guidellm-0.5.4/src/guidellm.egg-info}/PKG-INFO +7 -6
  18. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/requires.txt +7 -6
  19. guidellm-0.5.2/src/guidellm/version.py +0 -6
  20. {guidellm-0.5.2 → guidellm-0.5.4}/LICENSE +0 -0
  21. {guidellm-0.5.2 → guidellm-0.5.4}/MANIFEST.in +0 -0
  22. {guidellm-0.5.2 → guidellm-0.5.4}/README.md +0 -0
  23. {guidellm-0.5.2 → guidellm-0.5.4}/setup.cfg +0 -0
  24. {guidellm-0.5.2 → guidellm-0.5.4}/setup.py +0 -0
  25. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/__init__.py +0 -0
  26. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/__init__.py +0 -0
  27. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/backend.py +0 -0
  28. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/openai.py +0 -0
  29. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/response_handlers.py +0 -0
  30. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/__init__.py +0 -0
  31. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/entrypoints.py +0 -0
  32. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/__init__.py +0 -0
  33. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/console.py +0 -0
  34. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/csv.py +0 -0
  35. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/html.py +0 -0
  36. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/output.py +0 -0
  37. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/serialized.py +0 -0
  38. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/progress.py +0 -0
  39. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/scenarios/__init__.py +0 -0
  40. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/scenarios/chat.json +0 -0
  41. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/scenarios/rag.json +0 -0
  42. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/__init__.py +0 -0
  43. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/__init__.py +0 -0
  44. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/benchmark.py +0 -0
  45. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/metrics.py +0 -0
  46. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/report.py +0 -0
  47. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/__init__.py +0 -0
  48. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/builders.py +0 -0
  49. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/collators.py +0 -0
  50. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/config.py +0 -0
  51. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/__init__.py +0 -0
  52. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/deserializer.py +0 -0
  53. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/file.py +0 -0
  54. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/huggingface.py +0 -0
  55. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/memory.py +0 -0
  56. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/entrypoints.py +0 -0
  57. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/loaders.py +0 -0
  58. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/__init__.py +0 -0
  59. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/formatters.py +0 -0
  60. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/mappers.py +0 -0
  61. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/preprocessor.py +0 -0
  62. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/schemas.py +0 -0
  63. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/utils/__init__.py +0 -0
  64. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/utils/dataset.py +0 -0
  65. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/extras/__init__.py +0 -0
  66. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/extras/audio.py +0 -0
  67. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/extras/vision.py +0 -0
  68. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/logger.py +0 -0
  69. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/__init__.py +0 -0
  70. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/config.py +0 -0
  71. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/__init__.py +0 -0
  72. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/chat_completions.py +0 -0
  73. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/completions.py +0 -0
  74. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/models.py +0 -0
  75. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/server.py +0 -0
  76. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/__init__.py +0 -0
  77. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/__init__.py +0 -0
  78. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/constraint.py +0 -0
  79. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/error.py +0 -0
  80. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/factory.py +0 -0
  81. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/request.py +0 -0
  82. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/saturation.py +0 -0
  83. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/environments.py +0 -0
  84. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/scheduler.py +0 -0
  85. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/schemas.py +0 -0
  86. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/worker.py +0 -0
  87. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/worker_group.py +0 -0
  88. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/__init__.py +0 -0
  89. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/base.py +0 -0
  90. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/info.py +0 -0
  91. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/request.py +0 -0
  92. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/request_stats.py +0 -0
  93. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/response.py +0 -0
  94. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/statistics.py +0 -0
  95. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/__init__.py +0 -0
  96. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/auto_importer.py +0 -0
  97. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/cli.py +0 -0
  98. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/colors.py +0 -0
  99. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/console.py +0 -0
  100. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/default_group.py +0 -0
  101. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/dict.py +0 -0
  102. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/encoding.py +0 -0
  103. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/functions.py +0 -0
  104. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/hf_datasets.py +0 -0
  105. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/imports.py +0 -0
  106. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/messaging.py +0 -0
  107. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/mixins.py +0 -0
  108. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/random.py +0 -0
  109. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/registry.py +0 -0
  110. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/singleton.py +0 -0
  111. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/synchronous.py +0 -0
  112. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/text.py +0 -0
  113. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/typing.py +0 -0
  114. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/SOURCES.txt +0 -0
  115. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/dependency_links.txt +0 -0
  116. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/entry_points.txt +0 -0
  117. {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.5.2
3
+ Version: 0.5.4
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License: Apache-2.0
@@ -33,17 +33,18 @@ Requires-Dist: transformers
33
33
  Requires-Dist: uvloop>=0.18
34
34
  Requires-Dist: torch
35
35
  Provides-Extra: all
36
- Requires-Dist: guidellm[audio,openai,perf,vision]; extra == "all"
36
+ Requires-Dist: guidellm[audio,perf,tokenizers,vision]; extra == "all"
37
37
  Provides-Extra: recommended
38
- Requires-Dist: guidellm[openai,perf]; extra == "recommended"
38
+ Requires-Dist: guidellm[perf,tokenizers]; extra == "recommended"
39
39
  Provides-Extra: perf
40
40
  Requires-Dist: orjson; extra == "perf"
41
41
  Requires-Dist: msgpack; extra == "perf"
42
42
  Requires-Dist: msgspec; extra == "perf"
43
43
  Requires-Dist: uvloop; extra == "perf"
44
- Provides-Extra: openai
45
- Requires-Dist: tiktoken>=0.11.0; extra == "openai"
46
- Requires-Dist: blobfile>=3.1.0; extra == "openai"
44
+ Provides-Extra: tokenizers
45
+ Requires-Dist: tiktoken; extra == "tokenizers"
46
+ Requires-Dist: blobfile; extra == "tokenizers"
47
+ Requires-Dist: mistral-common; extra == "tokenizers"
47
48
  Provides-Extra: audio
48
49
  Requires-Dist: datasets[audio]>=4.1.0; extra == "audio"
49
50
  Requires-Dist: torch==2.9.*; extra == "audio"
@@ -74,11 +74,11 @@ dependencies = [
74
74
 
75
75
  [project.optional-dependencies]
76
76
  # Meta Extras
77
- all = ["guidellm[perf,openai,audio,vision]"]
78
- recommended = ["guidellm[perf,openai]"]
77
+ all = ["guidellm[perf,tokenizers,audio,vision]"]
78
+ recommended = ["guidellm[perf,tokenizers]"]
79
79
  # Feature Extras
80
80
  perf = ["orjson", "msgpack", "msgspec", "uvloop"]
81
- openai = ["tiktoken>=0.11.0", "blobfile>=3.1.0"]
81
+ tokenizers = ["tiktoken", "blobfile", "mistral-common"]
82
82
  audio = [
83
83
  # Lowest version with full torchcodec support
84
84
  "datasets[audio]>=4.1.0",
@@ -334,7 +334,7 @@ def benchmark():
334
334
  default=BenchmarkGenerativeTextArgs.get_default("rampup"),
335
335
  help=(
336
336
  "The time, in seconds, to ramp up the request rate over. "
337
- "Only applicable for Throughput/Concurrent strategies"
337
+ "Applicable for Throughput, Concurrent, and Constant strategies"
338
338
  ),
339
339
  )
340
340
  @click.option(
@@ -64,7 +64,7 @@ class Benchmarker(
64
64
  environment: Environment,
65
65
  warmup: TransientPhaseConfig,
66
66
  cooldown: TransientPhaseConfig,
67
- sample_requests: int | None = 20,
67
+ sample_requests: int | None = None,
68
68
  prefer_response_metrics: bool = True,
69
69
  progress: (
70
70
  BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
@@ -557,7 +557,9 @@ class AsyncProfile(Profile):
557
557
 
558
558
  if self.strategy_type == "constant":
559
559
  return AsyncConstantStrategy(
560
- rate=current_rate, max_concurrency=self.max_concurrency
560
+ rate=current_rate,
561
+ max_concurrency=self.max_concurrency,
562
+ rampup_duration=self.rampup_duration,
561
563
  )
562
564
  elif self.strategy_type == "poisson":
563
565
  return AsyncPoissonStrategy(
@@ -273,7 +273,7 @@ class BenchmarkConfig(StandardBaseDict):
273
273
  description="Constraint definitions applied to scheduler strategy execution",
274
274
  )
275
275
  sample_requests: int | None = Field(
276
- default=20,
276
+ default=None,
277
277
  description="Request count for statistical sampling in final metrics",
278
278
  )
279
279
  warmup: TransientPhaseConfig = Field(
@@ -787,6 +787,21 @@ class GenerativeBenchmarkAccumulator(
787
787
  description="Running metrics for incomplete requests",
788
788
  )
789
789
 
790
+ def model_post_init(self, __context):
791
+ """
792
+ Initialize child accumulators with config values after model construction.
793
+
794
+ Propagates sample_requests from config to child request accumulators to ensure
795
+ consistent sampling behavior across completed, errored, and incomplete request
796
+ collections. This ensures the --sample-requests option functions correctly.
797
+ """
798
+ super().model_post_init(__context)
799
+
800
+ # Propagate sample_requests from config to child accumulators
801
+ self.completed.sample_requests = self.config.sample_requests
802
+ self.errored.sample_requests = self.config.sample_requests
803
+ self.incomplete.sample_requests = self.config.sample_requests
804
+
790
805
  def update_estimate(
791
806
  self,
792
807
  response: GenerationResponse | None,
@@ -239,7 +239,7 @@ class BenchmarkGenerativeTextArgs(StandardBaseModel):
239
239
  )
240
240
  # Benchmarker configuration
241
241
  sample_requests: int | None = Field(
242
- default=10,
242
+ default=None,
243
243
  description="Number of requests to sample for detailed metrics (None for all)",
244
244
  )
245
245
  warmup: int | float | dict | TransientPhaseConfig | None = Field(
@@ -150,7 +150,7 @@ class _SyntheticTextExamplesIterable(_BaseExamplesIterable):
150
150
  text = unique + faker.text(max_nb_chars=num_chars)
151
151
  prompt_token_ids = self.processor.encode(text)
152
152
 
153
- return self.processor.decode(
153
+ return self.processor.decode( # type: ignore[return-value]
154
154
  prompt_token_ids[:prompt_tokens_count], skip_special_tokens=True
155
155
  )
156
156
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
  from typing import Any
5
5
 
6
- from transformers import AutoTokenizer, PreTrainedTokenizerBase # type: ignore[import]
6
+ from transformers import AutoTokenizer, PreTrainedTokenizerBase
7
7
 
8
8
  __all__ = ["ProcessorFactory"]
9
9
 
@@ -13,7 +13,7 @@ from pydantic import ValidationError
13
13
  from sanic import response
14
14
  from sanic.request import Request
15
15
  from sanic.response import HTTPResponse
16
- from transformers.tokenization_utils import PreTrainedTokenizer
16
+ from transformers import AutoTokenizer
17
17
 
18
18
  from guidellm.mock_server.config import MockServerConfig
19
19
  from guidellm.mock_server.models import (
@@ -55,7 +55,7 @@ class TokenizerHandler:
55
55
  self.tokenizer = (
56
56
  MockTokenizer()
57
57
  if config.processor is None
58
- else PreTrainedTokenizer.from_pretrained(config.processor)
58
+ else AutoTokenizer.from_pretrained(config.processor)
59
59
  )
60
60
 
61
61
  async def tokenize(self, request: Request) -> HTTPResponse:
@@ -96,6 +96,9 @@ class TokenizerHandler:
96
96
 
97
97
  tokens = self.tokenizer.tokenize(req_data.text)
98
98
  token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
99
+ # This should never happen in our mock tokenizer, but matches function sig
100
+ if not isinstance(token_ids, list):
101
+ token_ids = [token_ids]
99
102
 
100
103
  return response.json(
101
104
  TokenizeResponse(tokens=token_ids, count=len(token_ids)).model_dump()
@@ -139,4 +142,4 @@ class TokenizerHandler:
139
142
 
140
143
  text = self.tokenizer.decode(req_data.tokens, skip_special_tokens=False)
141
144
 
142
- return response.json(DetokenizeResponse(text=text).model_dump())
145
+ return response.json(DetokenizeResponse(text=text).model_dump()) # type: ignore[arg-type]
@@ -14,7 +14,11 @@ import re
14
14
  from collections.abc import Generator
15
15
 
16
16
  from faker import Faker
17
- from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer, TextInput
17
+ from transformers.tokenization_utils_base import (
18
+ AddedToken,
19
+ PreTrainedTokenizerBase,
20
+ TextInput,
21
+ )
18
22
 
19
23
  __all__ = [
20
24
  "MockTokenizer",
@@ -25,7 +29,7 @@ __all__ = [
25
29
  ]
26
30
 
27
31
 
28
- class MockTokenizer(PreTrainedTokenizer):
32
+ class MockTokenizer(PreTrainedTokenizerBase):
29
33
  """
30
34
  Mock tokenizer implementation for testing text processing workflows.
31
35
 
@@ -207,7 +211,7 @@ class MockTokenizer(PreTrainedTokenizer):
207
211
 
208
212
  def create_fake_text(
209
213
  num_tokens: int,
210
- processor: PreTrainedTokenizer,
214
+ processor: PreTrainedTokenizerBase,
211
215
  seed: int = 42,
212
216
  fake: Faker | None = None,
213
217
  ) -> str:
@@ -229,7 +233,7 @@ def create_fake_text(
229
233
 
230
234
  def create_fake_tokens_str(
231
235
  num_tokens: int,
232
- processor: PreTrainedTokenizer,
236
+ processor: PreTrainedTokenizerBase,
233
237
  seed: int = 42,
234
238
  fake: Faker | None = None,
235
239
  ) -> list[str]:
@@ -16,6 +16,7 @@ throughput (maximum load), constant-rate (steady intervals), and Poisson-distrib
16
16
  from __future__ import annotations
17
17
 
18
18
  import asyncio
19
+ import math
19
20
  import random
20
21
  from abc import abstractmethod
21
22
  from multiprocessing import Event, Value, synchronize
@@ -453,6 +454,13 @@ class AsyncConstantStrategy(SchedulingStrategy):
453
454
  default=None,
454
455
  description="Maximum number of concurrent requests to schedule",
455
456
  )
457
+ rampup_duration: NonNegativeFloat = Field(
458
+ default=0.0,
459
+ description=(
460
+ "Duration in seconds to linearly ramp up from 0 to target rate "
461
+ "at the beginning of each strategy run"
462
+ ),
463
+ )
456
464
 
457
465
  def __str__(self) -> str:
458
466
  """
@@ -476,19 +484,47 @@ class AsyncConstantStrategy(SchedulingStrategy):
476
484
 
477
485
  async def next_request_time(self, worker_index: PositiveInt) -> float:
478
486
  """
479
- Calculate next request time at fixed intervals.
487
+ Calculate next request time at fixed intervals with optional linear rampup.
480
488
 
481
489
  Schedules requests at uniform intervals determined by the configured rate,
482
- independent of request completion times.
490
+ independent of request completion times. If rampup_duration is set, the rate
491
+ increases linearly from 0 to the target rate during the rampup period, then
492
+ continues at the constant rate.
483
493
 
484
494
  :param worker_index: Unused for constant strategy
485
- :return: Start time plus constant interval based on request index
495
+ :return: Start time plus interval based on request index and
496
+ rampup configuration
486
497
  """
487
498
  _ = worker_index # unused
488
499
  current_index = self.next_request_index()
489
500
  start_time = await self.get_processes_start_time()
490
501
 
491
- return start_time + current_index / self.rate
502
+ if self.rampup_duration > 0:
503
+ # Calculate number of requests that would be sent during rampup
504
+ # Cumulative requests by time t during rampup:
505
+ # n = rate * t² / (2 * rampup_duration)
506
+ # At end of rampup (t = rampup_duration), n_rampup is calculated below
507
+ n_rampup = self.rate * self.rampup_duration / 2.0
508
+
509
+ if current_index == 1:
510
+ # First request at start_time
511
+ return start_time
512
+ elif current_index <= n_rampup:
513
+ # During rampup: solve for t where
514
+ # n = rate * t² / (2 * rampup_duration)
515
+ time_offset = math.sqrt(
516
+ 2.0 * current_index * self.rampup_duration / self.rate
517
+ )
518
+ return start_time + time_offset
519
+ else:
520
+ # After rampup: continue at constant rate
521
+ time_offset = (
522
+ self.rampup_duration + (current_index - n_rampup) / self.rate
523
+ )
524
+ return start_time + time_offset
525
+ else:
526
+ # No rampup: uniform intervals
527
+ return start_time + current_index / self.rate
492
528
 
493
529
  def request_completed(self, request_info: RequestInfo):
494
530
  """
@@ -31,9 +31,9 @@ class Environment(str, Enum):
31
31
 
32
32
 
33
33
  ENV_REPORT_MAPPING = {
34
- Environment.PROD: "https://blog.vllm.ai/guidellm/ui/v0.5.0/index.html",
35
- Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/v0.4.0/index.html",
36
- Environment.DEV: "https://blog.vllm.ai/guidellm/ui/dev/index.html",
34
+ Environment.PROD: "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/v0.5.3/index.html",
35
+ Environment.STAGING: "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/release/v0.4.0/index.html",
36
+ Environment.DEV: "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/dev/index.html",
37
37
  Environment.LOCAL: "http://localhost:3000/index.html",
38
38
  }
39
39
 
@@ -1,7 +1,8 @@
1
1
  from pathlib import Path
2
2
  from typing import Any
3
3
 
4
- from transformers import AutoTokenizer, PreTrainedTokenizerBase # type: ignore[import]
4
+ from transformers import AutoTokenizer
5
+ from transformers.tokenization_utils_base import PreTrainedTokenizerBase
5
6
 
6
7
  __all__ = [
7
8
  "check_load_processor",
@@ -16,6 +17,7 @@ def check_load_processor(
16
17
  if processor is None:
17
18
  raise ValueError(f"Processor/Tokenizer is required for {error_msg}.")
18
19
 
20
+ loaded: PreTrainedTokenizerBase
19
21
  try:
20
22
  if isinstance(processor, str | Path):
21
23
  loaded = AutoTokenizer.from_pretrained(
@@ -0,0 +1,6 @@
1
+ version = "0.5.4"
2
+ build_type = "release"
3
+ build_iteration = "0"
4
+ git_commit = "fc31a9ebfaef291d141cb1fbfd64ad82dec9ecbf"
5
+ git_branch = "HEAD"
6
+ git_last_tag = "v0.5.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.5.2
3
+ Version: 0.5.4
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License: Apache-2.0
@@ -33,17 +33,18 @@ Requires-Dist: transformers
33
33
  Requires-Dist: uvloop>=0.18
34
34
  Requires-Dist: torch
35
35
  Provides-Extra: all
36
- Requires-Dist: guidellm[audio,openai,perf,vision]; extra == "all"
36
+ Requires-Dist: guidellm[audio,perf,tokenizers,vision]; extra == "all"
37
37
  Provides-Extra: recommended
38
- Requires-Dist: guidellm[openai,perf]; extra == "recommended"
38
+ Requires-Dist: guidellm[perf,tokenizers]; extra == "recommended"
39
39
  Provides-Extra: perf
40
40
  Requires-Dist: orjson; extra == "perf"
41
41
  Requires-Dist: msgpack; extra == "perf"
42
42
  Requires-Dist: msgspec; extra == "perf"
43
43
  Requires-Dist: uvloop; extra == "perf"
44
- Provides-Extra: openai
45
- Requires-Dist: tiktoken>=0.11.0; extra == "openai"
46
- Requires-Dist: blobfile>=3.1.0; extra == "openai"
44
+ Provides-Extra: tokenizers
45
+ Requires-Dist: tiktoken; extra == "tokenizers"
46
+ Requires-Dist: blobfile; extra == "tokenizers"
47
+ Requires-Dist: mistral-common; extra == "tokenizers"
47
48
  Provides-Extra: audio
48
49
  Requires-Dist: datasets[audio]>=4.1.0; extra == "audio"
49
50
  Requires-Dist: torch==2.9.*; extra == "audio"
@@ -20,7 +20,7 @@ uvloop>=0.18
20
20
  torch
21
21
 
22
22
  [all]
23
- guidellm[audio,openai,perf,vision]
23
+ guidellm[audio,perf,tokenizers,vision]
24
24
 
25
25
  [audio]
26
26
  datasets[audio]>=4.1.0
@@ -56,10 +56,6 @@ types-requests~=2.32.0
56
56
  types-toml
57
57
  mkdocs-linkcheck~=1.0.6
58
58
 
59
- [openai]
60
- tiktoken>=0.11.0
61
- blobfile>=3.1.0
62
-
63
59
  [perf]
64
60
  orjson
65
61
  msgpack
@@ -67,7 +63,12 @@ msgspec
67
63
  uvloop
68
64
 
69
65
  [recommended]
70
- guidellm[openai,perf]
66
+ guidellm[perf,tokenizers]
67
+
68
+ [tokenizers]
69
+ tiktoken
70
+ blobfile
71
+ mistral-common
71
72
 
72
73
  [vision]
73
74
  datasets[vision]
@@ -1,6 +0,0 @@
1
- version = "0.5.2"
2
- build_type = "release"
3
- build_iteration = "0"
4
- git_commit = "26dfc10677055a08fdaac470839983222f1ee7ff"
5
- git_branch = "HEAD"
6
- git_last_tag = "v0.5.2"
File without changes
File without changes
File without changes
File without changes
File without changes