guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-performance mock server for OpenAI and vLLM API compatibility testing.
|
|
3
|
+
|
|
4
|
+
This module provides a Sanic-based mock server that simulates OpenAI and vLLM APIs
|
|
5
|
+
with configurable latency, token generation patterns, and response characteristics.
|
|
6
|
+
The server supports both streaming and non-streaming endpoints, enabling realistic
|
|
7
|
+
performance testing and validation of GuideLLM benchmarking workflows without
|
|
8
|
+
requiring actual model deployments.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import time
|
|
14
|
+
|
|
15
|
+
from sanic import Sanic, response
|
|
16
|
+
from sanic.exceptions import NotFound
|
|
17
|
+
from sanic.log import logger
|
|
18
|
+
from sanic.request import Request
|
|
19
|
+
from sanic.response import HTTPResponse
|
|
20
|
+
|
|
21
|
+
from guidellm.mock_server.config import MockServerConfig
|
|
22
|
+
from guidellm.mock_server.handlers import (
|
|
23
|
+
ChatCompletionsHandler,
|
|
24
|
+
CompletionsHandler,
|
|
25
|
+
TokenizerHandler,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = ["MockServer"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MockServer:
|
|
32
|
+
"""
|
|
33
|
+
High-performance mock server implementing OpenAI and vLLM API endpoints.
|
|
34
|
+
|
|
35
|
+
Provides a Sanic-based web server that simulates API responses with configurable
|
|
36
|
+
timing characteristics for testing and benchmarking purposes. Supports chat
|
|
37
|
+
completions, text completions, tokenization endpoints, and model listing with
|
|
38
|
+
realistic latency patterns to enable comprehensive performance validation.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
::
|
|
42
|
+
config = ServerConfig(model="test-model", port=8080)
|
|
43
|
+
server = MockServer(config)
|
|
44
|
+
server.run()
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, config: MockServerConfig) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Initialize the mock server with configuration.
|
|
50
|
+
|
|
51
|
+
:param config: Server configuration containing network settings and response
|
|
52
|
+
timing parameters
|
|
53
|
+
"""
|
|
54
|
+
self.config = config
|
|
55
|
+
self.app = Sanic("guidellm-mock-server")
|
|
56
|
+
self.chat_handler = ChatCompletionsHandler(config)
|
|
57
|
+
self.completions_handler = CompletionsHandler(config)
|
|
58
|
+
self.tokenizer_handler = TokenizerHandler(config)
|
|
59
|
+
|
|
60
|
+
self._setup_middleware()
|
|
61
|
+
self._setup_routes()
|
|
62
|
+
self._setup_error_handlers()
|
|
63
|
+
|
|
64
|
+
def _setup_middleware(self):
|
|
65
|
+
"""Setup middleware for CORS, logging, etc."""
|
|
66
|
+
|
|
67
|
+
@self.app.middleware("request")
|
|
68
|
+
async def add_cors_headers(_request: Request):
|
|
69
|
+
"""Add CORS headers to all requests."""
|
|
70
|
+
|
|
71
|
+
@self.app.middleware("response")
|
|
72
|
+
async def add_response_headers(_request: Request, resp: HTTPResponse):
|
|
73
|
+
"""Add standard response headers."""
|
|
74
|
+
resp.headers["Access-Control-Allow-Origin"] = "*"
|
|
75
|
+
resp.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
|
|
76
|
+
resp.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
|
|
77
|
+
resp.headers["Server"] = "guidellm-mock-server"
|
|
78
|
+
|
|
79
|
+
def _setup_routes(self): # noqa: C901
|
|
80
|
+
@self.app.get("/health")
|
|
81
|
+
async def health_check(_request: Request):
|
|
82
|
+
return response.json({"status": "healthy", "timestamp": time.time()})
|
|
83
|
+
|
|
84
|
+
@self.app.get("/v1/models")
|
|
85
|
+
async def list_models(_request: Request):
|
|
86
|
+
return response.json(
|
|
87
|
+
{
|
|
88
|
+
"object": "list",
|
|
89
|
+
"data": [
|
|
90
|
+
{
|
|
91
|
+
"id": self.config.model,
|
|
92
|
+
"object": "model",
|
|
93
|
+
"created": int(time.time()),
|
|
94
|
+
"owned_by": "guidellm-mock",
|
|
95
|
+
}
|
|
96
|
+
],
|
|
97
|
+
}
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@self.app.route("/v1/chat/completions", methods=["POST", "OPTIONS"])
|
|
101
|
+
async def chat_completions(request: Request):
|
|
102
|
+
if request.method == "OPTIONS":
|
|
103
|
+
return response.text("", status=204)
|
|
104
|
+
return await self.chat_handler.handle(request)
|
|
105
|
+
|
|
106
|
+
@self.app.route("/v1/completions", methods=["POST", "OPTIONS"])
|
|
107
|
+
async def completions(request: Request):
|
|
108
|
+
if request.method == "OPTIONS":
|
|
109
|
+
return response.text("", status=204)
|
|
110
|
+
return await self.completions_handler.handle(request)
|
|
111
|
+
|
|
112
|
+
@self.app.route("/tokenize", methods=["POST", "OPTIONS"])
|
|
113
|
+
async def tokenize(request: Request):
|
|
114
|
+
if request.method == "OPTIONS":
|
|
115
|
+
return response.text("", status=204)
|
|
116
|
+
return await self.tokenizer_handler.tokenize(request)
|
|
117
|
+
|
|
118
|
+
@self.app.route("/detokenize", methods=["POST", "OPTIONS"])
|
|
119
|
+
async def detokenize(request: Request):
|
|
120
|
+
if request.method == "OPTIONS":
|
|
121
|
+
return response.text("", status=204)
|
|
122
|
+
return await self.tokenizer_handler.detokenize(request)
|
|
123
|
+
|
|
124
|
+
def _setup_error_handlers(self):
|
|
125
|
+
"""Setup error handlers."""
|
|
126
|
+
|
|
127
|
+
@self.app.exception(Exception)
|
|
128
|
+
async def generic_error_handler(_request: Request, exception: Exception):
|
|
129
|
+
logger.error(f"Unhandled exception: {exception}")
|
|
130
|
+
return response.json(
|
|
131
|
+
{
|
|
132
|
+
"error": {
|
|
133
|
+
"message": "Internal server error",
|
|
134
|
+
"type": type(exception).__name__,
|
|
135
|
+
"error": str(exception),
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
status=500,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
@self.app.exception(NotFound)
|
|
142
|
+
async def not_found_handler(_request: Request, _exception):
|
|
143
|
+
return response.json(
|
|
144
|
+
{
|
|
145
|
+
"error": {
|
|
146
|
+
"message": "Not Found",
|
|
147
|
+
"type": "not_found_error",
|
|
148
|
+
"code": "not_found",
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
status=404,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def run(self) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Start the mock server with configured settings.
|
|
157
|
+
|
|
158
|
+
Runs the Sanic application in single-process mode with access logging enabled
|
|
159
|
+
for debugging and monitoring request patterns during testing.
|
|
160
|
+
"""
|
|
161
|
+
self.app.run(
|
|
162
|
+
host=self.config.host,
|
|
163
|
+
port=self.config.port,
|
|
164
|
+
debug=False,
|
|
165
|
+
single_process=True,
|
|
166
|
+
access_log=True,
|
|
167
|
+
register_sys_signals=False, # Disable signal handlers for threading
|
|
168
|
+
)
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mock server utilities for text generation and tokenization testing.
|
|
3
|
+
|
|
4
|
+
This module provides mock tokenization and text generation utilities for testing
|
|
5
|
+
guidellm's mock server functionality. It includes a mock tokenizer that simulates
|
|
6
|
+
tokenization processes, functions to generate reproducible fake text with specific
|
|
7
|
+
token counts, and timing generators for realistic benchmarking scenarios.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import random
|
|
13
|
+
import re
|
|
14
|
+
from collections.abc import Generator
|
|
15
|
+
|
|
16
|
+
from faker import Faker
|
|
17
|
+
from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer, TextInput
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"MockTokenizer",
|
|
21
|
+
"create_fake_text",
|
|
22
|
+
"create_fake_tokens_str",
|
|
23
|
+
"sample_number",
|
|
24
|
+
"times_generator",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MockTokenizer(PreTrainedTokenizer):
|
|
29
|
+
"""
|
|
30
|
+
Mock tokenizer implementation for testing text processing workflows.
|
|
31
|
+
|
|
32
|
+
Provides a simplified tokenizer that splits text using regex patterns and
|
|
33
|
+
generates deterministic token IDs based on string hashing. Used for testing
|
|
34
|
+
guidellm components without requiring actual model tokenizers.
|
|
35
|
+
|
|
36
|
+
:cvar VocabSize: Fixed vocabulary size for the mock tokenizer
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
VocabSize = 100000007
|
|
40
|
+
|
|
41
|
+
def __len__(self) -> int:
|
|
42
|
+
"""
|
|
43
|
+
Get the vocabulary size of the tokenizer.
|
|
44
|
+
|
|
45
|
+
:return: The total number of tokens in the vocabulary
|
|
46
|
+
"""
|
|
47
|
+
return self.VocabSize
|
|
48
|
+
|
|
49
|
+
def __call__(self, text: str | list[str], **kwargs) -> list[int]: # noqa: ARG002
|
|
50
|
+
"""
|
|
51
|
+
Tokenize text and return token IDs (callable interface).
|
|
52
|
+
|
|
53
|
+
:param text: Input text to tokenize
|
|
54
|
+
:return: List of token IDs
|
|
55
|
+
"""
|
|
56
|
+
if isinstance(text, str):
|
|
57
|
+
tokens = self.tokenize(text)
|
|
58
|
+
return self.convert_tokens_to_ids(tokens)
|
|
59
|
+
elif isinstance(text, list):
|
|
60
|
+
# Handle batch processing
|
|
61
|
+
result = []
|
|
62
|
+
for t in text:
|
|
63
|
+
result.extend(self.__call__(t))
|
|
64
|
+
return result
|
|
65
|
+
else:
|
|
66
|
+
msg = f"text input must be of type `str` or `list[str]`, got {type(text)}"
|
|
67
|
+
raise ValueError(msg)
|
|
68
|
+
|
|
69
|
+
def tokenize(self, text: TextInput, **_kwargs) -> list[str]: # type: ignore[override]
|
|
70
|
+
"""
|
|
71
|
+
Tokenize input text into a list of token strings.
|
|
72
|
+
|
|
73
|
+
Splits text using regex to separate words, punctuation, and whitespace
|
|
74
|
+
into individual tokens for processing.
|
|
75
|
+
|
|
76
|
+
:param text: Input text to tokenize
|
|
77
|
+
:return: List of token strings from the input text
|
|
78
|
+
"""
|
|
79
|
+
# Split text into tokens: words, spaces, and punctuation
|
|
80
|
+
return re.findall(r"\w+|[^\w\s]|\s+", text)
|
|
81
|
+
|
|
82
|
+
def convert_tokens_to_ids(self, tokens: str | list[str]) -> list[int]:
|
|
83
|
+
"""
|
|
84
|
+
Convert token strings to numeric token IDs.
|
|
85
|
+
|
|
86
|
+
Uses deterministic hashing to generate consistent token IDs for
|
|
87
|
+
reproducible testing scenarios.
|
|
88
|
+
|
|
89
|
+
:param tokens: Single token string or list of token strings
|
|
90
|
+
:return: Single token ID or list of token IDs
|
|
91
|
+
"""
|
|
92
|
+
if isinstance(tokens, str):
|
|
93
|
+
return [hash(tokens) % self.VocabSize]
|
|
94
|
+
return [hash(token) % self.VocabSize for token in tokens]
|
|
95
|
+
|
|
96
|
+
def convert_ids_to_tokens( # type: ignore[override]
|
|
97
|
+
self, ids: list[int], _skip_special_tokens: bool = False
|
|
98
|
+
) -> list[str]:
|
|
99
|
+
"""
|
|
100
|
+
Convert numeric token IDs back to token strings.
|
|
101
|
+
|
|
102
|
+
Generates fake text tokens using Faker library seeded with token IDs
|
|
103
|
+
for deterministic and reproducible token generation.
|
|
104
|
+
|
|
105
|
+
:param ids: Single token ID or list of token IDs to convert
|
|
106
|
+
:return: Single token string or list of token strings
|
|
107
|
+
"""
|
|
108
|
+
if not ids:
|
|
109
|
+
return [""]
|
|
110
|
+
|
|
111
|
+
fake = Faker()
|
|
112
|
+
fake.seed_instance(sum(ids) % self.VocabSize)
|
|
113
|
+
|
|
114
|
+
target_count = len(ids)
|
|
115
|
+
current_count = 0
|
|
116
|
+
tokens = []
|
|
117
|
+
|
|
118
|
+
while current_count < target_count:
|
|
119
|
+
text = fake.text(
|
|
120
|
+
max_nb_chars=(target_count - current_count) * 10 # oversample
|
|
121
|
+
)
|
|
122
|
+
new_tokens = self.tokenize(text)
|
|
123
|
+
|
|
124
|
+
if current_count > 0:
|
|
125
|
+
new_tokens = [".", " "] + new_tokens
|
|
126
|
+
|
|
127
|
+
new_tokens = (
|
|
128
|
+
new_tokens[: target_count - current_count]
|
|
129
|
+
if len(new_tokens) > (target_count - current_count)
|
|
130
|
+
else new_tokens
|
|
131
|
+
)
|
|
132
|
+
tokens += new_tokens
|
|
133
|
+
current_count += len(new_tokens)
|
|
134
|
+
|
|
135
|
+
return tokens
|
|
136
|
+
|
|
137
|
+
def convert_tokens_to_string(self, tokens: list[str]) -> str:
|
|
138
|
+
"""
|
|
139
|
+
Convert a list of token strings back to a single text string.
|
|
140
|
+
|
|
141
|
+
:param tokens: List of token strings to concatenate
|
|
142
|
+
:return: Concatenated string from all tokens
|
|
143
|
+
"""
|
|
144
|
+
return "".join(tokens)
|
|
145
|
+
|
|
146
|
+
def _add_tokens(
|
|
147
|
+
self,
|
|
148
|
+
new_tokens: list[str] | list[AddedToken], # noqa: ARG002
|
|
149
|
+
special_tokens: bool = False, # noqa: ARG002
|
|
150
|
+
) -> int:
|
|
151
|
+
"""
|
|
152
|
+
Add new tokens to the tokenizer vocabulary (mock implementation).
|
|
153
|
+
|
|
154
|
+
:param new_tokens: List of tokens to add to the vocabulary
|
|
155
|
+
:param special_tokens: Whether the tokens are special tokens
|
|
156
|
+
:return: Number of tokens actually added (always 0 for mock)
|
|
157
|
+
"""
|
|
158
|
+
return 0
|
|
159
|
+
|
|
160
|
+
def apply_chat_template( # type: ignore[override]
|
|
161
|
+
self,
|
|
162
|
+
conversation: list,
|
|
163
|
+
tokenize: bool = False, # Changed default to False to match transformers
|
|
164
|
+
add_generation_prompt: bool = False, # noqa: ARG002
|
|
165
|
+
**kwargs, # noqa: ARG002
|
|
166
|
+
) -> str | list[int]:
|
|
167
|
+
"""
|
|
168
|
+
Apply a chat template to format conversation messages.
|
|
169
|
+
|
|
170
|
+
Mock implementation that concatenates all message content for testing.
|
|
171
|
+
|
|
172
|
+
:param conversation: List of chat messages
|
|
173
|
+
:param tokenize: Whether to return tokens or string
|
|
174
|
+
:param add_generation_prompt: Whether to add generation prompt
|
|
175
|
+
:return: Formatted text string or token IDs
|
|
176
|
+
"""
|
|
177
|
+
# Simple concatenation of all message content
|
|
178
|
+
texts = []
|
|
179
|
+
for message in conversation:
|
|
180
|
+
if isinstance(message, dict) and "content" in message:
|
|
181
|
+
texts.append(message["content"])
|
|
182
|
+
elif hasattr(message, "content"):
|
|
183
|
+
texts.append(message.content)
|
|
184
|
+
|
|
185
|
+
formatted_text = " ".join(texts)
|
|
186
|
+
|
|
187
|
+
if tokenize:
|
|
188
|
+
return self.convert_tokens_to_ids(self.tokenize(formatted_text))
|
|
189
|
+
return formatted_text
|
|
190
|
+
|
|
191
|
+
def decode( # type: ignore[override]
|
|
192
|
+
self,
|
|
193
|
+
token_ids: list[int],
|
|
194
|
+
skip_special_tokens: bool = True,
|
|
195
|
+
**kwargs, # noqa: ARG002
|
|
196
|
+
) -> str:
|
|
197
|
+
"""
|
|
198
|
+
Decode token IDs back to text string.
|
|
199
|
+
|
|
200
|
+
:param token_ids: List of token IDs to decode
|
|
201
|
+
:param skip_special_tokens: Whether to skip special tokens
|
|
202
|
+
:return: Decoded text string
|
|
203
|
+
"""
|
|
204
|
+
tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens)
|
|
205
|
+
return self.convert_tokens_to_string(tokens)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def create_fake_text(
|
|
209
|
+
num_tokens: int,
|
|
210
|
+
processor: PreTrainedTokenizer,
|
|
211
|
+
seed: int = 42,
|
|
212
|
+
fake: Faker | None = None,
|
|
213
|
+
) -> str:
|
|
214
|
+
"""
|
|
215
|
+
Generate fake text using a tokenizer processor with specified token count.
|
|
216
|
+
|
|
217
|
+
Creates text by generating fake tokens and joining them into a string,
|
|
218
|
+
ensuring the result has the exact number of tokens when processed by
|
|
219
|
+
the given tokenizer.
|
|
220
|
+
|
|
221
|
+
:param num_tokens: Target number of tokens in the generated text
|
|
222
|
+
:param processor: Tokenizer to use for token generation and validation
|
|
223
|
+
:param seed: Random seed for reproducible text generation
|
|
224
|
+
:param fake: Optional Faker instance for text generation
|
|
225
|
+
:return: Generated text string with the specified token count
|
|
226
|
+
"""
|
|
227
|
+
return "".join(create_fake_tokens_str(num_tokens, processor, seed, fake))
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def create_fake_tokens_str(
|
|
231
|
+
num_tokens: int,
|
|
232
|
+
processor: PreTrainedTokenizer,
|
|
233
|
+
seed: int = 42,
|
|
234
|
+
fake: Faker | None = None,
|
|
235
|
+
) -> list[str]:
|
|
236
|
+
"""
|
|
237
|
+
Generate fake token strings using a tokenizer processor.
|
|
238
|
+
|
|
239
|
+
Creates a list of token strings by generating fake text and tokenizing it
|
|
240
|
+
until the desired token count is reached. Uses the provided tokenizer
|
|
241
|
+
for accurate token boundary detection.
|
|
242
|
+
|
|
243
|
+
:param num_tokens: Target number of tokens to generate
|
|
244
|
+
:param processor: Tokenizer to use for token generation and validation
|
|
245
|
+
:param seed: Random seed for reproducible token generation
|
|
246
|
+
:param fake: Optional Faker instance for text generation
|
|
247
|
+
:return: List of token strings with the specified count
|
|
248
|
+
"""
|
|
249
|
+
if not fake:
|
|
250
|
+
fake = Faker()
|
|
251
|
+
fake.seed_instance(seed)
|
|
252
|
+
|
|
253
|
+
tokens: list[str] = []
|
|
254
|
+
|
|
255
|
+
while len(tokens) < num_tokens:
|
|
256
|
+
text = fake.text(
|
|
257
|
+
max_nb_chars=(num_tokens - len(tokens)) * 30 # oversample
|
|
258
|
+
)
|
|
259
|
+
new_tokens = processor.tokenize(text)
|
|
260
|
+
|
|
261
|
+
if len(tokens) > 0:
|
|
262
|
+
new_tokens = [".", " "] + new_tokens
|
|
263
|
+
|
|
264
|
+
new_tokens = (
|
|
265
|
+
new_tokens[: num_tokens - len(tokens)]
|
|
266
|
+
if len(new_tokens) > (num_tokens - len(tokens))
|
|
267
|
+
else new_tokens
|
|
268
|
+
)
|
|
269
|
+
tokens += new_tokens
|
|
270
|
+
|
|
271
|
+
return tokens
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def times_generator(mean: float, standard_dev: float) -> Generator[float]:
|
|
275
|
+
"""
|
|
276
|
+
Generate infinite timing values from a normal distribution.
|
|
277
|
+
|
|
278
|
+
Creates a generator that yields timing values sampled from a normal
|
|
279
|
+
distribution, useful for simulating realistic request timing patterns
|
|
280
|
+
in benchmarking scenarios.
|
|
281
|
+
|
|
282
|
+
:param mean: Mean value for the normal distribution
|
|
283
|
+
:param standard_dev: Standard deviation for the normal distribution
|
|
284
|
+
:return: Generator yielding positive timing values from the distribution
|
|
285
|
+
"""
|
|
286
|
+
while True:
|
|
287
|
+
yield sample_number(mean, standard_dev)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def sample_number(mean: float, standard_dev: float) -> float:
|
|
291
|
+
"""
|
|
292
|
+
Generate a single timing value from a normal distribution.
|
|
293
|
+
|
|
294
|
+
Samples one timing value from a normal distribution with the specified
|
|
295
|
+
parameters, ensuring the result is non-negative for realistic timing
|
|
296
|
+
simulation in benchmarking scenarios.
|
|
297
|
+
|
|
298
|
+
:param mean: Mean value for the normal distribution
|
|
299
|
+
:param standard_dev: Standard deviation for the normal distribution
|
|
300
|
+
:return: Non-negative timing value from the distribution
|
|
301
|
+
"""
|
|
302
|
+
return max(0.0, random.gauss(mean, standard_dev))
|
guidellm/preprocess/dataset.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
from collections.abc import Iterator
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
8
|
import yaml
|
|
9
9
|
from datasets import Dataset
|
|
@@ -11,7 +11,6 @@ from loguru import logger
|
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
from transformers import PreTrainedTokenizerBase
|
|
13
13
|
|
|
14
|
-
from guidellm.dataset import load_dataset as guidellm_load_dataset
|
|
15
14
|
from guidellm.utils import IntegerRangeSampler, check_load_processor
|
|
16
15
|
from guidellm.utils.hf_datasets import SUPPORTED_TYPES, save_dataset_to_file
|
|
17
16
|
|
|
@@ -32,7 +31,7 @@ def handle_ignore_strategy(
|
|
|
32
31
|
min_prompt_tokens: int,
|
|
33
32
|
tokenizer: PreTrainedTokenizerBase,
|
|
34
33
|
**_kwargs,
|
|
35
|
-
) ->
|
|
34
|
+
) -> str | None:
|
|
36
35
|
"""
|
|
37
36
|
Ignores prompts that are shorter than the required minimum token length.
|
|
38
37
|
|
|
@@ -56,7 +55,7 @@ def handle_concatenate_strategy(
|
|
|
56
55
|
tokenizer: PreTrainedTokenizerBase,
|
|
57
56
|
concat_delimiter: str,
|
|
58
57
|
**_kwargs,
|
|
59
|
-
) ->
|
|
58
|
+
) -> str | None:
|
|
60
59
|
"""
|
|
61
60
|
Concatenates prompts until the minimum token requirement is met.
|
|
62
61
|
|
|
@@ -117,7 +116,7 @@ def handle_error_strategy(
|
|
|
117
116
|
min_prompt_tokens: int,
|
|
118
117
|
tokenizer: PreTrainedTokenizerBase,
|
|
119
118
|
**_kwargs,
|
|
120
|
-
) ->
|
|
119
|
+
) -> str | None:
|
|
121
120
|
"""
|
|
122
121
|
Raises an error if the prompt is too short.
|
|
123
122
|
|
|
@@ -150,24 +149,24 @@ class TokensConfig(BaseModel):
|
|
|
150
149
|
description="The average number of tokens.",
|
|
151
150
|
gt=0,
|
|
152
151
|
)
|
|
153
|
-
stdev:
|
|
152
|
+
stdev: int | None = Field(
|
|
154
153
|
description="The standard deviation of the tokens.",
|
|
155
154
|
gt=0,
|
|
156
155
|
default=None,
|
|
157
156
|
)
|
|
158
|
-
min:
|
|
157
|
+
min: int | None = Field(
|
|
159
158
|
description="The minimum number of tokens.",
|
|
160
159
|
gt=0,
|
|
161
160
|
default=None,
|
|
162
161
|
)
|
|
163
|
-
max:
|
|
162
|
+
max: int | None = Field(
|
|
164
163
|
description="The maximum number of tokens.",
|
|
165
164
|
gt=0,
|
|
166
165
|
default=None,
|
|
167
166
|
)
|
|
168
167
|
|
|
169
168
|
@staticmethod
|
|
170
|
-
def parse_str(data:
|
|
169
|
+
def parse_str(data: str | Path) -> "TokensConfig":
|
|
171
170
|
"""
|
|
172
171
|
Parses a string or path into a TokensConfig object. Supports:
|
|
173
172
|
- JSON string
|
|
@@ -215,14 +214,14 @@ class TokensConfig(BaseModel):
|
|
|
215
214
|
return TokensConfig(**config_dict) # type: ignore[arg-type]
|
|
216
215
|
|
|
217
216
|
@staticmethod
|
|
218
|
-
def parse_config_file(data:
|
|
217
|
+
def parse_config_file(data: str | Path) -> "TokensConfig":
|
|
219
218
|
with Path(data).open("r") as file:
|
|
220
219
|
config_dict = yaml.safe_load(file)
|
|
221
220
|
|
|
222
221
|
return TokensConfig(**config_dict)
|
|
223
222
|
|
|
224
223
|
|
|
225
|
-
def _validate_output_suffix(output_path:
|
|
224
|
+
def _validate_output_suffix(output_path: str | Path) -> None:
|
|
226
225
|
output_path = Path(output_path)
|
|
227
226
|
suffix = output_path.suffix.lower()
|
|
228
227
|
if suffix not in SUPPORTED_TYPES:
|
|
@@ -233,18 +232,18 @@ def _validate_output_suffix(output_path: Union[str, Path]) -> None:
|
|
|
233
232
|
|
|
234
233
|
|
|
235
234
|
def process_dataset(
|
|
236
|
-
data:
|
|
237
|
-
output_path:
|
|
238
|
-
processor:
|
|
239
|
-
prompt_tokens:
|
|
240
|
-
output_tokens:
|
|
241
|
-
processor_args:
|
|
242
|
-
data_args:
|
|
235
|
+
data: str | Path,
|
|
236
|
+
output_path: str | Path,
|
|
237
|
+
processor: str | Path | PreTrainedTokenizerBase,
|
|
238
|
+
prompt_tokens: str | Path,
|
|
239
|
+
output_tokens: str | Path,
|
|
240
|
+
processor_args: dict[str, Any] | None = None,
|
|
241
|
+
data_args: dict[str, Any] | None = None,
|
|
243
242
|
short_prompt_strategy: ShortPromptStrategy = ShortPromptStrategy.IGNORE,
|
|
244
|
-
pad_char:
|
|
245
|
-
concat_delimiter:
|
|
243
|
+
pad_char: str | None = None,
|
|
244
|
+
concat_delimiter: str | None = None,
|
|
246
245
|
push_to_hub: bool = False,
|
|
247
|
-
hub_dataset_id:
|
|
246
|
+
hub_dataset_id: str | None = None,
|
|
248
247
|
random_seed: int = 42,
|
|
249
248
|
) -> None:
|
|
250
249
|
"""
|
|
@@ -271,9 +270,7 @@ def process_dataset(
|
|
|
271
270
|
f"Starting dataset conversion | Input: {data} | Output directory: {output_path}"
|
|
272
271
|
)
|
|
273
272
|
|
|
274
|
-
dataset, column_mappings =
|
|
275
|
-
data, data_args, processor, processor_args
|
|
276
|
-
)
|
|
273
|
+
dataset, column_mappings = None, None
|
|
277
274
|
tokenizer = check_load_processor(
|
|
278
275
|
processor,
|
|
279
276
|
processor_args,
|
|
@@ -354,7 +351,7 @@ def process_dataset(
|
|
|
354
351
|
|
|
355
352
|
|
|
356
353
|
def push_dataset_to_hub(
|
|
357
|
-
hub_dataset_id:
|
|
354
|
+
hub_dataset_id: str | None,
|
|
358
355
|
processed_dataset: Dataset,
|
|
359
356
|
) -> None:
|
|
360
357
|
"""
|
guidellm/presentation/builder.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Any
|
|
2
2
|
|
|
3
3
|
if TYPE_CHECKING:
|
|
4
|
-
from guidellm.benchmark
|
|
4
|
+
from guidellm.benchmark import GenerativeBenchmark
|
|
5
5
|
|
|
6
|
-
from .data_models import BenchmarkDatum, RunInfo, WorkloadDetails
|
|
6
|
+
from guidellm.presentation.data_models import BenchmarkDatum, RunInfo, WorkloadDetails
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class UIDataBuilder:
|