guidellm 0.5.2__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {guidellm-0.5.2/src/guidellm.egg-info → guidellm-0.5.4}/PKG-INFO +7 -6
- {guidellm-0.5.2 → guidellm-0.5.4}/pyproject.toml +3 -3
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/__main__.py +1 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/benchmarker.py +1 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/profiles.py +3 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/base.py +1 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/accumulator.py +15 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/entrypoints.py +1 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/synthetic.py +1 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/processor.py +1 -1
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/tokenizer.py +6 -3
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/utils.py +8 -4
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/strategies.py +40 -4
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/settings.py +3 -3
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/hf_transformers.py +3 -1
- guidellm-0.5.4/src/guidellm/version.py +6 -0
- {guidellm-0.5.2 → guidellm-0.5.4/src/guidellm.egg-info}/PKG-INFO +7 -6
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/requires.txt +7 -6
- guidellm-0.5.2/src/guidellm/version.py +0 -6
- {guidellm-0.5.2 → guidellm-0.5.4}/LICENSE +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/MANIFEST.in +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/README.md +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/setup.cfg +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/setup.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/backend.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/openai.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/backends/response_handlers.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/entrypoints.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/console.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/csv.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/html.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/output.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/outputs/serialized.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/progress.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/scenarios/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/scenarios/chat.json +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/scenarios/rag.json +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/benchmark.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/metrics.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/benchmark/schemas/generative/report.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/builders.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/collators.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/config.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/deserializer.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/file.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/huggingface.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/deserializers/memory.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/entrypoints.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/loaders.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/formatters.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/mappers.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/preprocessors/preprocessor.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/schemas.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/utils/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/data/utils/dataset.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/extras/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/extras/audio.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/extras/vision.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/logger.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/config.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/chat_completions.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/handlers/completions.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/models.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/mock_server/server.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/constraint.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/error.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/factory.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/request.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/constraints/saturation.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/environments.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/scheduler.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/schemas.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/worker.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/scheduler/worker_group.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/base.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/info.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/request.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/request_stats.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/response.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/schemas/statistics.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/__init__.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/auto_importer.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/cli.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/colors.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/console.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/default_group.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/dict.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/encoding.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/functions.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/hf_datasets.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/imports.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/messaging.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/mixins.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/random.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/registry.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/singleton.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/synchronous.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/text.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm/utils/typing.py +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/SOURCES.txt +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/dependency_links.txt +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/entry_points.txt +0 -0
- {guidellm-0.5.2 → guidellm-0.5.4}/src/guidellm.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: guidellm
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Guidance platform for deploying and managing large language models.
|
|
5
5
|
Author: Red Hat
|
|
6
6
|
License: Apache-2.0
|
|
@@ -33,17 +33,18 @@ Requires-Dist: transformers
|
|
|
33
33
|
Requires-Dist: uvloop>=0.18
|
|
34
34
|
Requires-Dist: torch
|
|
35
35
|
Provides-Extra: all
|
|
36
|
-
Requires-Dist: guidellm[audio,
|
|
36
|
+
Requires-Dist: guidellm[audio,perf,tokenizers,vision]; extra == "all"
|
|
37
37
|
Provides-Extra: recommended
|
|
38
|
-
Requires-Dist: guidellm[
|
|
38
|
+
Requires-Dist: guidellm[perf,tokenizers]; extra == "recommended"
|
|
39
39
|
Provides-Extra: perf
|
|
40
40
|
Requires-Dist: orjson; extra == "perf"
|
|
41
41
|
Requires-Dist: msgpack; extra == "perf"
|
|
42
42
|
Requires-Dist: msgspec; extra == "perf"
|
|
43
43
|
Requires-Dist: uvloop; extra == "perf"
|
|
44
|
-
Provides-Extra:
|
|
45
|
-
Requires-Dist: tiktoken
|
|
46
|
-
Requires-Dist: blobfile
|
|
44
|
+
Provides-Extra: tokenizers
|
|
45
|
+
Requires-Dist: tiktoken; extra == "tokenizers"
|
|
46
|
+
Requires-Dist: blobfile; extra == "tokenizers"
|
|
47
|
+
Requires-Dist: mistral-common; extra == "tokenizers"
|
|
47
48
|
Provides-Extra: audio
|
|
48
49
|
Requires-Dist: datasets[audio]>=4.1.0; extra == "audio"
|
|
49
50
|
Requires-Dist: torch==2.9.*; extra == "audio"
|
|
@@ -74,11 +74,11 @@ dependencies = [
|
|
|
74
74
|
|
|
75
75
|
[project.optional-dependencies]
|
|
76
76
|
# Meta Extras
|
|
77
|
-
all = ["guidellm[perf,
|
|
78
|
-
recommended = ["guidellm[perf,
|
|
77
|
+
all = ["guidellm[perf,tokenizers,audio,vision]"]
|
|
78
|
+
recommended = ["guidellm[perf,tokenizers]"]
|
|
79
79
|
# Feature Extras
|
|
80
80
|
perf = ["orjson", "msgpack", "msgspec", "uvloop"]
|
|
81
|
-
|
|
81
|
+
tokenizers = ["tiktoken", "blobfile", "mistral-common"]
|
|
82
82
|
audio = [
|
|
83
83
|
# Lowest version with full torchcodec support
|
|
84
84
|
"datasets[audio]>=4.1.0",
|
|
@@ -334,7 +334,7 @@ def benchmark():
|
|
|
334
334
|
default=BenchmarkGenerativeTextArgs.get_default("rampup"),
|
|
335
335
|
help=(
|
|
336
336
|
"The time, in seconds, to ramp up the request rate over. "
|
|
337
|
-
"
|
|
337
|
+
"Applicable for Throughput, Concurrent, and Constant strategies"
|
|
338
338
|
),
|
|
339
339
|
)
|
|
340
340
|
@click.option(
|
|
@@ -64,7 +64,7 @@ class Benchmarker(
|
|
|
64
64
|
environment: Environment,
|
|
65
65
|
warmup: TransientPhaseConfig,
|
|
66
66
|
cooldown: TransientPhaseConfig,
|
|
67
|
-
sample_requests: int | None =
|
|
67
|
+
sample_requests: int | None = None,
|
|
68
68
|
prefer_response_metrics: bool = True,
|
|
69
69
|
progress: (
|
|
70
70
|
BenchmarkerProgress[BenchmarkAccumulatorT, BenchmarkT] | None
|
|
@@ -557,7 +557,9 @@ class AsyncProfile(Profile):
|
|
|
557
557
|
|
|
558
558
|
if self.strategy_type == "constant":
|
|
559
559
|
return AsyncConstantStrategy(
|
|
560
|
-
rate=current_rate,
|
|
560
|
+
rate=current_rate,
|
|
561
|
+
max_concurrency=self.max_concurrency,
|
|
562
|
+
rampup_duration=self.rampup_duration,
|
|
561
563
|
)
|
|
562
564
|
elif self.strategy_type == "poisson":
|
|
563
565
|
return AsyncPoissonStrategy(
|
|
@@ -273,7 +273,7 @@ class BenchmarkConfig(StandardBaseDict):
|
|
|
273
273
|
description="Constraint definitions applied to scheduler strategy execution",
|
|
274
274
|
)
|
|
275
275
|
sample_requests: int | None = Field(
|
|
276
|
-
default=
|
|
276
|
+
default=None,
|
|
277
277
|
description="Request count for statistical sampling in final metrics",
|
|
278
278
|
)
|
|
279
279
|
warmup: TransientPhaseConfig = Field(
|
|
@@ -787,6 +787,21 @@ class GenerativeBenchmarkAccumulator(
|
|
|
787
787
|
description="Running metrics for incomplete requests",
|
|
788
788
|
)
|
|
789
789
|
|
|
790
|
+
def model_post_init(self, __context):
|
|
791
|
+
"""
|
|
792
|
+
Initialize child accumulators with config values after model construction.
|
|
793
|
+
|
|
794
|
+
Propagates sample_requests from config to child request accumulators to ensure
|
|
795
|
+
consistent sampling behavior across completed, errored, and incomplete request
|
|
796
|
+
collections. This ensures the --sample-requests option functions correctly.
|
|
797
|
+
"""
|
|
798
|
+
super().model_post_init(__context)
|
|
799
|
+
|
|
800
|
+
# Propagate sample_requests from config to child accumulators
|
|
801
|
+
self.completed.sample_requests = self.config.sample_requests
|
|
802
|
+
self.errored.sample_requests = self.config.sample_requests
|
|
803
|
+
self.incomplete.sample_requests = self.config.sample_requests
|
|
804
|
+
|
|
790
805
|
def update_estimate(
|
|
791
806
|
self,
|
|
792
807
|
response: GenerationResponse | None,
|
|
@@ -239,7 +239,7 @@ class BenchmarkGenerativeTextArgs(StandardBaseModel):
|
|
|
239
239
|
)
|
|
240
240
|
# Benchmarker configuration
|
|
241
241
|
sample_requests: int | None = Field(
|
|
242
|
-
default=
|
|
242
|
+
default=None,
|
|
243
243
|
description="Number of requests to sample for detailed metrics (None for all)",
|
|
244
244
|
)
|
|
245
245
|
warmup: int | float | dict | TransientPhaseConfig | None = Field(
|
|
@@ -150,7 +150,7 @@ class _SyntheticTextExamplesIterable(_BaseExamplesIterable):
|
|
|
150
150
|
text = unique + faker.text(max_nb_chars=num_chars)
|
|
151
151
|
prompt_token_ids = self.processor.encode(text)
|
|
152
152
|
|
|
153
|
-
return self.processor.decode(
|
|
153
|
+
return self.processor.decode( # type: ignore[return-value]
|
|
154
154
|
prompt_token_ids[:prompt_tokens_count], skip_special_tokens=True
|
|
155
155
|
)
|
|
156
156
|
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
-
from transformers import AutoTokenizer, PreTrainedTokenizerBase
|
|
6
|
+
from transformers import AutoTokenizer, PreTrainedTokenizerBase
|
|
7
7
|
|
|
8
8
|
__all__ = ["ProcessorFactory"]
|
|
9
9
|
|
|
@@ -13,7 +13,7 @@ from pydantic import ValidationError
|
|
|
13
13
|
from sanic import response
|
|
14
14
|
from sanic.request import Request
|
|
15
15
|
from sanic.response import HTTPResponse
|
|
16
|
-
from transformers
|
|
16
|
+
from transformers import AutoTokenizer
|
|
17
17
|
|
|
18
18
|
from guidellm.mock_server.config import MockServerConfig
|
|
19
19
|
from guidellm.mock_server.models import (
|
|
@@ -55,7 +55,7 @@ class TokenizerHandler:
|
|
|
55
55
|
self.tokenizer = (
|
|
56
56
|
MockTokenizer()
|
|
57
57
|
if config.processor is None
|
|
58
|
-
else
|
|
58
|
+
else AutoTokenizer.from_pretrained(config.processor)
|
|
59
59
|
)
|
|
60
60
|
|
|
61
61
|
async def tokenize(self, request: Request) -> HTTPResponse:
|
|
@@ -96,6 +96,9 @@ class TokenizerHandler:
|
|
|
96
96
|
|
|
97
97
|
tokens = self.tokenizer.tokenize(req_data.text)
|
|
98
98
|
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
|
99
|
+
# This should never happen in our mock tokenizer, but matches function sig
|
|
100
|
+
if not isinstance(token_ids, list):
|
|
101
|
+
token_ids = [token_ids]
|
|
99
102
|
|
|
100
103
|
return response.json(
|
|
101
104
|
TokenizeResponse(tokens=token_ids, count=len(token_ids)).model_dump()
|
|
@@ -139,4 +142,4 @@ class TokenizerHandler:
|
|
|
139
142
|
|
|
140
143
|
text = self.tokenizer.decode(req_data.tokens, skip_special_tokens=False)
|
|
141
144
|
|
|
142
|
-
return response.json(DetokenizeResponse(text=text).model_dump())
|
|
145
|
+
return response.json(DetokenizeResponse(text=text).model_dump()) # type: ignore[arg-type]
|
|
@@ -14,7 +14,11 @@ import re
|
|
|
14
14
|
from collections.abc import Generator
|
|
15
15
|
|
|
16
16
|
from faker import Faker
|
|
17
|
-
from transformers.
|
|
17
|
+
from transformers.tokenization_utils_base import (
|
|
18
|
+
AddedToken,
|
|
19
|
+
PreTrainedTokenizerBase,
|
|
20
|
+
TextInput,
|
|
21
|
+
)
|
|
18
22
|
|
|
19
23
|
__all__ = [
|
|
20
24
|
"MockTokenizer",
|
|
@@ -25,7 +29,7 @@ __all__ = [
|
|
|
25
29
|
]
|
|
26
30
|
|
|
27
31
|
|
|
28
|
-
class MockTokenizer(
|
|
32
|
+
class MockTokenizer(PreTrainedTokenizerBase):
|
|
29
33
|
"""
|
|
30
34
|
Mock tokenizer implementation for testing text processing workflows.
|
|
31
35
|
|
|
@@ -207,7 +211,7 @@ class MockTokenizer(PreTrainedTokenizer):
|
|
|
207
211
|
|
|
208
212
|
def create_fake_text(
|
|
209
213
|
num_tokens: int,
|
|
210
|
-
processor:
|
|
214
|
+
processor: PreTrainedTokenizerBase,
|
|
211
215
|
seed: int = 42,
|
|
212
216
|
fake: Faker | None = None,
|
|
213
217
|
) -> str:
|
|
@@ -229,7 +233,7 @@ def create_fake_text(
|
|
|
229
233
|
|
|
230
234
|
def create_fake_tokens_str(
|
|
231
235
|
num_tokens: int,
|
|
232
|
-
processor:
|
|
236
|
+
processor: PreTrainedTokenizerBase,
|
|
233
237
|
seed: int = 42,
|
|
234
238
|
fake: Faker | None = None,
|
|
235
239
|
) -> list[str]:
|
|
@@ -16,6 +16,7 @@ throughput (maximum load), constant-rate (steady intervals), and Poisson-distrib
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
import asyncio
|
|
19
|
+
import math
|
|
19
20
|
import random
|
|
20
21
|
from abc import abstractmethod
|
|
21
22
|
from multiprocessing import Event, Value, synchronize
|
|
@@ -453,6 +454,13 @@ class AsyncConstantStrategy(SchedulingStrategy):
|
|
|
453
454
|
default=None,
|
|
454
455
|
description="Maximum number of concurrent requests to schedule",
|
|
455
456
|
)
|
|
457
|
+
rampup_duration: NonNegativeFloat = Field(
|
|
458
|
+
default=0.0,
|
|
459
|
+
description=(
|
|
460
|
+
"Duration in seconds to linearly ramp up from 0 to target rate "
|
|
461
|
+
"at the beginning of each strategy run"
|
|
462
|
+
),
|
|
463
|
+
)
|
|
456
464
|
|
|
457
465
|
def __str__(self) -> str:
|
|
458
466
|
"""
|
|
@@ -476,19 +484,47 @@ class AsyncConstantStrategy(SchedulingStrategy):
|
|
|
476
484
|
|
|
477
485
|
async def next_request_time(self, worker_index: PositiveInt) -> float:
|
|
478
486
|
"""
|
|
479
|
-
Calculate next request time at fixed intervals.
|
|
487
|
+
Calculate next request time at fixed intervals with optional linear rampup.
|
|
480
488
|
|
|
481
489
|
Schedules requests at uniform intervals determined by the configured rate,
|
|
482
|
-
independent of request completion times.
|
|
490
|
+
independent of request completion times. If rampup_duration is set, the rate
|
|
491
|
+
increases linearly from 0 to the target rate during the rampup period, then
|
|
492
|
+
continues at the constant rate.
|
|
483
493
|
|
|
484
494
|
:param worker_index: Unused for constant strategy
|
|
485
|
-
:return: Start time plus
|
|
495
|
+
:return: Start time plus interval based on request index and
|
|
496
|
+
rampup configuration
|
|
486
497
|
"""
|
|
487
498
|
_ = worker_index # unused
|
|
488
499
|
current_index = self.next_request_index()
|
|
489
500
|
start_time = await self.get_processes_start_time()
|
|
490
501
|
|
|
491
|
-
|
|
502
|
+
if self.rampup_duration > 0:
|
|
503
|
+
# Calculate number of requests that would be sent during rampup
|
|
504
|
+
# Cumulative requests by time t during rampup:
|
|
505
|
+
# n = rate * t² / (2 * rampup_duration)
|
|
506
|
+
# At end of rampup (t = rampup_duration), n_rampup is calculated below
|
|
507
|
+
n_rampup = self.rate * self.rampup_duration / 2.0
|
|
508
|
+
|
|
509
|
+
if current_index == 1:
|
|
510
|
+
# First request at start_time
|
|
511
|
+
return start_time
|
|
512
|
+
elif current_index <= n_rampup:
|
|
513
|
+
# During rampup: solve for t where
|
|
514
|
+
# n = rate * t² / (2 * rampup_duration)
|
|
515
|
+
time_offset = math.sqrt(
|
|
516
|
+
2.0 * current_index * self.rampup_duration / self.rate
|
|
517
|
+
)
|
|
518
|
+
return start_time + time_offset
|
|
519
|
+
else:
|
|
520
|
+
# After rampup: continue at constant rate
|
|
521
|
+
time_offset = (
|
|
522
|
+
self.rampup_duration + (current_index - n_rampup) / self.rate
|
|
523
|
+
)
|
|
524
|
+
return start_time + time_offset
|
|
525
|
+
else:
|
|
526
|
+
# No rampup: uniform intervals
|
|
527
|
+
return start_time + current_index / self.rate
|
|
492
528
|
|
|
493
529
|
def request_completed(self, request_info: RequestInfo):
|
|
494
530
|
"""
|
|
@@ -31,9 +31,9 @@ class Environment(str, Enum):
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
ENV_REPORT_MAPPING = {
|
|
34
|
-
Environment.PROD: "https://
|
|
35
|
-
Environment.STAGING: "https://
|
|
36
|
-
Environment.DEV: "https://
|
|
34
|
+
Environment.PROD: "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/v0.5.3/index.html",
|
|
35
|
+
Environment.STAGING: "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/release/v0.4.0/index.html",
|
|
36
|
+
Environment.DEV: "https://raw.githubusercontent.com/vllm-project/guidellm/refs/heads/gh-pages/ui/dev/index.html",
|
|
37
37
|
Environment.LOCAL: "http://localhost:3000/index.html",
|
|
38
38
|
}
|
|
39
39
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
from transformers import AutoTokenizer
|
|
4
|
+
from transformers import AutoTokenizer
|
|
5
|
+
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"check_load_processor",
|
|
@@ -16,6 +17,7 @@ def check_load_processor(
|
|
|
16
17
|
if processor is None:
|
|
17
18
|
raise ValueError(f"Processor/Tokenizer is required for {error_msg}.")
|
|
18
19
|
|
|
20
|
+
loaded: PreTrainedTokenizerBase
|
|
19
21
|
try:
|
|
20
22
|
if isinstance(processor, str | Path):
|
|
21
23
|
loaded = AutoTokenizer.from_pretrained(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: guidellm
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Guidance platform for deploying and managing large language models.
|
|
5
5
|
Author: Red Hat
|
|
6
6
|
License: Apache-2.0
|
|
@@ -33,17 +33,18 @@ Requires-Dist: transformers
|
|
|
33
33
|
Requires-Dist: uvloop>=0.18
|
|
34
34
|
Requires-Dist: torch
|
|
35
35
|
Provides-Extra: all
|
|
36
|
-
Requires-Dist: guidellm[audio,
|
|
36
|
+
Requires-Dist: guidellm[audio,perf,tokenizers,vision]; extra == "all"
|
|
37
37
|
Provides-Extra: recommended
|
|
38
|
-
Requires-Dist: guidellm[
|
|
38
|
+
Requires-Dist: guidellm[perf,tokenizers]; extra == "recommended"
|
|
39
39
|
Provides-Extra: perf
|
|
40
40
|
Requires-Dist: orjson; extra == "perf"
|
|
41
41
|
Requires-Dist: msgpack; extra == "perf"
|
|
42
42
|
Requires-Dist: msgspec; extra == "perf"
|
|
43
43
|
Requires-Dist: uvloop; extra == "perf"
|
|
44
|
-
Provides-Extra:
|
|
45
|
-
Requires-Dist: tiktoken
|
|
46
|
-
Requires-Dist: blobfile
|
|
44
|
+
Provides-Extra: tokenizers
|
|
45
|
+
Requires-Dist: tiktoken; extra == "tokenizers"
|
|
46
|
+
Requires-Dist: blobfile; extra == "tokenizers"
|
|
47
|
+
Requires-Dist: mistral-common; extra == "tokenizers"
|
|
47
48
|
Provides-Extra: audio
|
|
48
49
|
Requires-Dist: datasets[audio]>=4.1.0; extra == "audio"
|
|
49
50
|
Requires-Dist: torch==2.9.*; extra == "audio"
|
|
@@ -20,7 +20,7 @@ uvloop>=0.18
|
|
|
20
20
|
torch
|
|
21
21
|
|
|
22
22
|
[all]
|
|
23
|
-
guidellm[audio,
|
|
23
|
+
guidellm[audio,perf,tokenizers,vision]
|
|
24
24
|
|
|
25
25
|
[audio]
|
|
26
26
|
datasets[audio]>=4.1.0
|
|
@@ -56,10 +56,6 @@ types-requests~=2.32.0
|
|
|
56
56
|
types-toml
|
|
57
57
|
mkdocs-linkcheck~=1.0.6
|
|
58
58
|
|
|
59
|
-
[openai]
|
|
60
|
-
tiktoken>=0.11.0
|
|
61
|
-
blobfile>=3.1.0
|
|
62
|
-
|
|
63
59
|
[perf]
|
|
64
60
|
orjson
|
|
65
61
|
msgpack
|
|
@@ -67,7 +63,12 @@ msgspec
|
|
|
67
63
|
uvloop
|
|
68
64
|
|
|
69
65
|
[recommended]
|
|
70
|
-
guidellm[
|
|
66
|
+
guidellm[perf,tokenizers]
|
|
67
|
+
|
|
68
|
+
[tokenizers]
|
|
69
|
+
tiktoken
|
|
70
|
+
blobfile
|
|
71
|
+
mistral-common
|
|
71
72
|
|
|
72
73
|
[vision]
|
|
73
74
|
datasets[vision]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|