guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,238 @@
1
+ """
2
+ High-performance mock server for OpenAI and vLLM API compatibility testing.
3
+
4
+ This module provides a Sanic-based mock server that simulates OpenAI and vLLM APIs
5
+ with configurable latency, token generation patterns, and response characteristics.
6
+ The server supports both streaming and non-streaming endpoints, enabling realistic
7
+ performance testing and validation of GuideLLM benchmarking workflows without
8
+ requiring actual model deployments.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import time
14
+ from typing import Any, cast
15
+
16
+ from sanic import Sanic, response
17
+ from sanic.exceptions import NotFound
18
+ from sanic.log import logger
19
+ from sanic.request import File, Request
20
+ from sanic.response import BaseHTTPResponse, HTTPResponse
21
+
22
+ from guidellm.mock_server.config import MockServerConfig
23
+ from guidellm.mock_server.handlers import (
24
+ ChatCompletionsHandler,
25
+ CompletionsHandler,
26
+ TokenizerHandler,
27
+ )
28
+
29
+ __all__ = ["MockServer"]
30
+
31
+
32
+ class MockServer:
33
+ """
34
+ High-performance mock server implementing OpenAI and vLLM API endpoints.
35
+
36
+ Provides a Sanic-based web server that simulates API responses with configurable
37
+ timing characteristics for testing and benchmarking purposes. Supports chat
38
+ completions, text completions, tokenization endpoints, and model listing with
39
+ realistic latency patterns to enable comprehensive performance validation.
40
+
41
+ Example:
42
+ ::
43
+ config = ServerConfig(model="test-model", port=8080)
44
+ server = MockServer(config)
45
+ server.run()
46
+ """
47
+
48
+ def __init__(self, config: MockServerConfig) -> None:
49
+ """
50
+ Initialize the mock server with configuration.
51
+
52
+ :param config: Server configuration containing network settings and response
53
+ timing parameters
54
+ """
55
+ self.config = config
56
+ self.app = Sanic("guidellm-mock-server")
57
+ self.chat_handler = ChatCompletionsHandler(config)
58
+ self.completions_handler = CompletionsHandler(config)
59
+ self.tokenizer_handler = TokenizerHandler(config)
60
+
61
+ self._setup_middleware()
62
+ self._setup_routes()
63
+ self._setup_error_handlers()
64
+
65
+ def _setup_middleware(self):
66
+ """Setup middleware for CORS, logging, etc."""
67
+
68
+ @self.app.middleware("request")
69
+ async def add_cors_headers(_request: Request) -> None:
70
+ """Add CORS headers to all requests."""
71
+ return None # noqa: RET501
72
+
73
+ @self.app.middleware("response")
74
+ async def add_response_headers(
75
+ _request: Any, resp: BaseHTTPResponse
76
+ ) -> HTTPResponse:
77
+ """Add standard response headers."""
78
+ resp.headers["Access-Control-Allow-Origin"] = "*"
79
+ resp.headers["Access-Control-Allow-Methods"] = "GET, POST, OPTIONS"
80
+ resp.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
81
+ resp.headers["Server"] = "guidellm-mock-server"
82
+ return resp # type: ignore[return-value]
83
+
84
+ def _setup_routes(self): # noqa: C901
85
+ @self.app.get("/health")
86
+ async def health_check(_request: Request):
87
+ return response.json({"status": "healthy", "timestamp": time.time()})
88
+
89
+ @self.app.get("/v1/models")
90
+ async def list_models(_request: Request):
91
+ return response.json(
92
+ {
93
+ "object": "list",
94
+ "data": [
95
+ {
96
+ "id": self.config.model,
97
+ "object": "model",
98
+ "created": int(time.time()),
99
+ "owned_by": "guidellm-mock",
100
+ }
101
+ ],
102
+ }
103
+ )
104
+
105
+ @self.app.route("/v1/chat/completions", methods=["POST", "OPTIONS"])
106
+ async def chat_completions(request: Request):
107
+ if request.method == "OPTIONS":
108
+ return response.text("", status=204)
109
+ return await self.chat_handler.handle(request)
110
+
111
+ @self.app.route("/v1/completions", methods=["POST", "OPTIONS"])
112
+ async def completions(request: Request):
113
+ if request.method == "OPTIONS":
114
+ return response.text("", status=204)
115
+ return await self.completions_handler.handle(request)
116
+
117
+ @self.app.route("/tokenize", methods=["POST", "OPTIONS"])
118
+ async def tokenize(request: Request):
119
+ if request.method == "OPTIONS":
120
+ return response.text("", status=204)
121
+ return await self.tokenizer_handler.tokenize(request)
122
+
123
+ @self.app.route("/detokenize", methods=["POST", "OPTIONS"])
124
+ async def detokenize(request: Request):
125
+ if request.method == "OPTIONS":
126
+ return response.text("", status=204)
127
+ return await self.tokenizer_handler.detokenize(request)
128
+
129
+ @self.app.route("/v1/audio/transcriptions", methods=["POST", "OPTIONS"])
130
+ async def audio_transcriptions(request: Request) -> HTTPResponse:
131
+ """
132
+ Mock OpenAI audio transcription endpoint:
133
+ - receives multipart/form-data
134
+ - file field contains audio file
135
+ - model field is optional, default to "mock-model"
136
+ - returns "transcribed text"
137
+ """
138
+ if request.method == "OPTIONS":
139
+ return response.text("", status=204)
140
+ if request.files is None or request.form is None:
141
+ return response.json({"error": "No form data provided"}, status=400)
142
+ file: File | None = request.files.get("file")
143
+ if "file" not in request.files or "model" not in request.form:
144
+ return response.json(
145
+ {"error": "Missing 'file' in form-data"}, status=400
146
+ )
147
+
148
+ file = cast("File", file)
149
+ model = request.form.get("model", "mock-model")
150
+
151
+ return response.json(
152
+ {
153
+ "text": f"Mock transcription for {file.name}",
154
+ "file_size": len(file.body),
155
+ "model_used": model,
156
+ "transcription": f"Transcribed({file.name}) using {model}",
157
+ }
158
+ )
159
+
160
+ @self.app.route("/v1/audio/translations", methods=["POST", "OPTIONS"])
161
+ async def audio_translations(request: Request) -> HTTPResponse:
162
+ """
163
+ Mock OpenAI audio translation endpoint:
164
+ - receives multipart/form-data
165
+ - file field contains audio file
166
+ - model field is optional, default to "mock-model"
167
+ - returns translated text
168
+ """
169
+ if request.method == "OPTIONS":
170
+ return response.text("", status=204)
171
+ if request.files is None or request.form is None:
172
+ return response.json({"error": "No form data provided"}, status=400)
173
+ file: File | None = request.files.get("file")
174
+ if "file" not in request.files or "model" not in request.form:
175
+ return response.json(
176
+ {"error": "Missing 'file' in form-data"}, status=400
177
+ )
178
+
179
+ file = cast("File", file)
180
+ decoded_text = (
181
+ "This is a mock translation result." # mock output tranlated text
182
+ )
183
+
184
+ return response.json(
185
+ {
186
+ "text": decoded_text,
187
+ "file_size": len(file.body),
188
+ "filename": {file.name},
189
+ "model_used": request.form.get("model", "mock-model"),
190
+ "mimetype": file.type,
191
+ }
192
+ )
193
+
194
+ def _setup_error_handlers(self):
195
+ """Setup error handlers."""
196
+
197
+ @self.app.exception(Exception)
198
+ async def generic_error_handler(_request: Request, exception: Exception):
199
+ logger.error(f"Unhandled exception: {exception}")
200
+ return response.json(
201
+ {
202
+ "error": {
203
+ "message": "Internal server error",
204
+ "type": type(exception).__name__,
205
+ "error": str(exception),
206
+ }
207
+ },
208
+ status=500,
209
+ )
210
+
211
+ @self.app.exception(NotFound)
212
+ async def not_found_handler(_request: Request, _exception):
213
+ return response.json(
214
+ {
215
+ "error": {
216
+ "message": "Not Found",
217
+ "type": "not_found_error",
218
+ "code": "not_found",
219
+ }
220
+ },
221
+ status=404,
222
+ )
223
+
224
+ def run(self) -> None:
225
+ """
226
+ Start the mock server with configured settings.
227
+
228
+ Runs the Sanic application in single-process mode with access logging enabled
229
+ for debugging and monitoring request patterns during testing.
230
+ """
231
+ self.app.run(
232
+ host=self.config.host,
233
+ port=self.config.port,
234
+ debug=False,
235
+ single_process=True,
236
+ access_log=True,
237
+ register_sys_signals=False, # Disable signal handlers for threading
238
+ )
@@ -0,0 +1,302 @@
1
+ """
2
+ Mock server utilities for text generation and tokenization testing.
3
+
4
+ This module provides mock tokenization and text generation utilities for testing
5
+ guidellm's mock server functionality. It includes a mock tokenizer that simulates
6
+ tokenization processes, functions to generate reproducible fake text with specific
7
+ token counts, and timing generators for realistic benchmarking scenarios.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import random
13
+ import re
14
+ from collections.abc import Generator
15
+
16
+ from faker import Faker
17
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer, TextInput
18
+
19
+ __all__ = [
20
+ "MockTokenizer",
21
+ "create_fake_text",
22
+ "create_fake_tokens_str",
23
+ "sample_number",
24
+ "times_generator",
25
+ ]
26
+
27
+
28
+ class MockTokenizer(PreTrainedTokenizer):
29
+ """
30
+ Mock tokenizer implementation for testing text processing workflows.
31
+
32
+ Provides a simplified tokenizer that splits text using regex patterns and
33
+ generates deterministic token IDs based on string hashing. Used for testing
34
+ guidellm components without requiring actual model tokenizers.
35
+
36
+ :cvar VocabSize: Fixed vocabulary size for the mock tokenizer
37
+ """
38
+
39
+ VocabSize = 100000007
40
+
41
+ def __len__(self) -> int:
42
+ """
43
+ Get the vocabulary size of the tokenizer.
44
+
45
+ :return: The total number of tokens in the vocabulary
46
+ """
47
+ return self.VocabSize
48
+
49
+ def __call__(self, text: str | list[str], **kwargs) -> list[int]: # noqa: ARG002
50
+ """
51
+ Tokenize text and return token IDs (callable interface).
52
+
53
+ :param text: Input text to tokenize
54
+ :return: List of token IDs
55
+ """
56
+ if isinstance(text, str):
57
+ tokens = self.tokenize(text)
58
+ return self.convert_tokens_to_ids(tokens)
59
+ elif isinstance(text, list):
60
+ # Handle batch processing
61
+ result = []
62
+ for t in text:
63
+ result.extend(self.__call__(t))
64
+ return result
65
+ else:
66
+ msg = f"text input must be of type `str` or `list[str]`, got {type(text)}"
67
+ raise ValueError(msg)
68
+
69
+ def tokenize(self, text: TextInput, **_kwargs) -> list[str]: # type: ignore[override]
70
+ """
71
+ Tokenize input text into a list of token strings.
72
+
73
+ Splits text using regex to separate words, punctuation, and whitespace
74
+ into individual tokens for processing.
75
+
76
+ :param text: Input text to tokenize
77
+ :return: List of token strings from the input text
78
+ """
79
+ # Split text into tokens: words, spaces, and punctuation
80
+ return re.findall(r"\w+|[^\w\s]|\s+", text)
81
+
82
+ def convert_tokens_to_ids(self, tokens: str | list[str]) -> list[int]:
83
+ """
84
+ Convert token strings to numeric token IDs.
85
+
86
+ Uses deterministic hashing to generate consistent token IDs for
87
+ reproducible testing scenarios.
88
+
89
+ :param tokens: Single token string or list of token strings
90
+ :return: Single token ID or list of token IDs
91
+ """
92
+ if isinstance(tokens, str):
93
+ return [hash(tokens) % self.VocabSize]
94
+ return [hash(token) % self.VocabSize for token in tokens]
95
+
96
+ def convert_ids_to_tokens( # type: ignore[override]
97
+ self, ids: list[int], _skip_special_tokens: bool = False
98
+ ) -> list[str]:
99
+ """
100
+ Convert numeric token IDs back to token strings.
101
+
102
+ Generates fake text tokens using Faker library seeded with token IDs
103
+ for deterministic and reproducible token generation.
104
+
105
+ :param ids: Single token ID or list of token IDs to convert
106
+ :return: Single token string or list of token strings
107
+ """
108
+ if not ids:
109
+ return [""]
110
+
111
+ fake = Faker()
112
+ fake.seed_instance(sum(ids) % self.VocabSize)
113
+
114
+ target_count = len(ids)
115
+ current_count = 0
116
+ tokens = []
117
+
118
+ while current_count < target_count:
119
+ text = fake.text(
120
+ max_nb_chars=(target_count - current_count) * 10 # oversample
121
+ )
122
+ new_tokens = self.tokenize(text)
123
+
124
+ if current_count > 0:
125
+ new_tokens = [".", " "] + new_tokens
126
+
127
+ new_tokens = (
128
+ new_tokens[: target_count - current_count]
129
+ if len(new_tokens) > (target_count - current_count)
130
+ else new_tokens
131
+ )
132
+ tokens += new_tokens
133
+ current_count += len(new_tokens)
134
+
135
+ return tokens
136
+
137
+ def convert_tokens_to_string(self, tokens: list[str]) -> str:
138
+ """
139
+ Convert a list of token strings back to a single text string.
140
+
141
+ :param tokens: List of token strings to concatenate
142
+ :return: Concatenated string from all tokens
143
+ """
144
+ return "".join(tokens)
145
+
146
+ def _add_tokens(
147
+ self,
148
+ new_tokens: list[str] | list[AddedToken], # noqa: ARG002
149
+ special_tokens: bool = False, # noqa: ARG002
150
+ ) -> int:
151
+ """
152
+ Add new tokens to the tokenizer vocabulary (mock implementation).
153
+
154
+ :param new_tokens: List of tokens to add to the vocabulary
155
+ :param special_tokens: Whether the tokens are special tokens
156
+ :return: Number of tokens actually added (always 0 for mock)
157
+ """
158
+ return 0
159
+
160
+ def apply_chat_template( # type: ignore[override]
161
+ self,
162
+ conversation: list,
163
+ tokenize: bool = False, # Changed default to False to match transformers
164
+ add_generation_prompt: bool = False, # noqa: ARG002
165
+ **kwargs, # noqa: ARG002
166
+ ) -> str | list[int]:
167
+ """
168
+ Apply a chat template to format conversation messages.
169
+
170
+ Mock implementation that concatenates all message content for testing.
171
+
172
+ :param conversation: List of chat messages
173
+ :param tokenize: Whether to return tokens or string
174
+ :param add_generation_prompt: Whether to add generation prompt
175
+ :return: Formatted text string or token IDs
176
+ """
177
+ # Simple concatenation of all message content
178
+ texts = []
179
+ for message in conversation:
180
+ if isinstance(message, dict) and "content" in message:
181
+ texts.append(message["content"])
182
+ elif hasattr(message, "content"):
183
+ texts.append(message.content)
184
+
185
+ formatted_text = " ".join(texts)
186
+
187
+ if tokenize:
188
+ return self.convert_tokens_to_ids(self.tokenize(formatted_text))
189
+ return formatted_text
190
+
191
+ def decode( # type: ignore[override]
192
+ self,
193
+ token_ids: list[int],
194
+ skip_special_tokens: bool = True,
195
+ **kwargs, # noqa: ARG002
196
+ ) -> str:
197
+ """
198
+ Decode token IDs back to text string.
199
+
200
+ :param token_ids: List of token IDs to decode
201
+ :param skip_special_tokens: Whether to skip special tokens
202
+ :return: Decoded text string
203
+ """
204
+ tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens)
205
+ return self.convert_tokens_to_string(tokens)
206
+
207
+
208
+ def create_fake_text(
209
+ num_tokens: int,
210
+ processor: PreTrainedTokenizer,
211
+ seed: int = 42,
212
+ fake: Faker | None = None,
213
+ ) -> str:
214
+ """
215
+ Generate fake text using a tokenizer processor with specified token count.
216
+
217
+ Creates text by generating fake tokens and joining them into a string,
218
+ ensuring the result has the exact number of tokens when processed by
219
+ the given tokenizer.
220
+
221
+ :param num_tokens: Target number of tokens in the generated text
222
+ :param processor: Tokenizer to use for token generation and validation
223
+ :param seed: Random seed for reproducible text generation
224
+ :param fake: Optional Faker instance for text generation
225
+ :return: Generated text string with the specified token count
226
+ """
227
+ return "".join(create_fake_tokens_str(num_tokens, processor, seed, fake))
228
+
229
+
230
+ def create_fake_tokens_str(
231
+ num_tokens: int,
232
+ processor: PreTrainedTokenizer,
233
+ seed: int = 42,
234
+ fake: Faker | None = None,
235
+ ) -> list[str]:
236
+ """
237
+ Generate fake token strings using a tokenizer processor.
238
+
239
+ Creates a list of token strings by generating fake text and tokenizing it
240
+ until the desired token count is reached. Uses the provided tokenizer
241
+ for accurate token boundary detection.
242
+
243
+ :param num_tokens: Target number of tokens to generate
244
+ :param processor: Tokenizer to use for token generation and validation
245
+ :param seed: Random seed for reproducible token generation
246
+ :param fake: Optional Faker instance for text generation
247
+ :return: List of token strings with the specified count
248
+ """
249
+ if not fake:
250
+ fake = Faker()
251
+ fake.seed_instance(seed)
252
+
253
+ tokens: list[str] = []
254
+
255
+ while len(tokens) < num_tokens:
256
+ text = fake.text(
257
+ max_nb_chars=(num_tokens - len(tokens)) * 30 # oversample
258
+ )
259
+ new_tokens = processor.tokenize(text)
260
+
261
+ if len(tokens) > 0:
262
+ new_tokens = [".", " "] + new_tokens
263
+
264
+ new_tokens = (
265
+ new_tokens[: num_tokens - len(tokens)]
266
+ if len(new_tokens) > (num_tokens - len(tokens))
267
+ else new_tokens
268
+ )
269
+ tokens += new_tokens
270
+
271
+ return tokens
272
+
273
+
274
+ def times_generator(mean: float, standard_dev: float) -> Generator[float]:
275
+ """
276
+ Generate infinite timing values from a normal distribution.
277
+
278
+ Creates a generator that yields timing values sampled from a normal
279
+ distribution, useful for simulating realistic request timing patterns
280
+ in benchmarking scenarios.
281
+
282
+ :param mean: Mean value for the normal distribution
283
+ :param standard_dev: Standard deviation for the normal distribution
284
+ :return: Generator yielding positive timing values from the distribution
285
+ """
286
+ while True:
287
+ yield sample_number(mean, standard_dev)
288
+
289
+
290
+ def sample_number(mean: float, standard_dev: float) -> float:
291
+ """
292
+ Generate a single timing value from a normal distribution.
293
+
294
+ Samples one timing value from a normal distribution with the specified
295
+ parameters, ensuring the result is non-negative for realistic timing
296
+ simulation in benchmarking scenarios.
297
+
298
+ :param mean: Mean value for the normal distribution
299
+ :param standard_dev: Standard deviation for the normal distribution
300
+ :return: Non-negative timing value from the distribution
301
+ """
302
+ return max(0.0, random.gauss(mean, standard_dev))
@@ -1,47 +1,90 @@
1
- from .result import (
2
- SchedulerRequestInfo,
3
- SchedulerRequestResult,
4
- SchedulerResult,
5
- SchedulerRunInfo,
1
+ """
2
+ Scheduler subsystem for orchestrating benchmark workloads and managing worker processes.
3
+
4
+ This module provides the core scheduling infrastructure for guidellm, including
5
+ strategies for controlling request timing patterns (synchronous, asynchronous,
6
+ constant rate, Poisson), constraints for limiting benchmark execution (duration,
7
+ error rates, request counts), and distributed execution through worker processes.
8
+ The scheduler coordinates between backend interfaces, manages benchmark state
9
+ transitions, and handles multi-turn request sequences with customizable timing
10
+ strategies and resource constraints.
11
+ """
12
+
13
+ from .constraints import (
14
+ Constraint,
15
+ ConstraintInitializer,
16
+ ConstraintsInitializerFactory,
17
+ MaxDurationConstraint,
18
+ MaxErrorRateConstraint,
19
+ MaxErrorsConstraint,
20
+ MaxGlobalErrorRateConstraint,
21
+ MaxNumberConstraint,
22
+ OverSaturationConstraint,
23
+ OverSaturationConstraintInitializer,
24
+ PydanticConstraintInitializer,
25
+ SerializableConstraintInitializer,
26
+ UnserializableConstraintInitializer,
6
27
  )
28
+ from .environments import Environment, NonDistributedEnvironment
7
29
  from .scheduler import Scheduler
8
- from .strategy import (
30
+ from .schemas import (
31
+ BackendInterface,
32
+ BackendT,
33
+ MultiTurnRequestT,
34
+ RequestT,
35
+ ResponseT,
36
+ SchedulerMessagingPydanticRegistry,
37
+ SchedulerProgress,
38
+ SchedulerState,
39
+ SchedulerUpdateAction,
40
+ )
41
+ from .strategies import (
9
42
  AsyncConstantStrategy,
10
43
  AsyncPoissonStrategy,
11
44
  ConcurrentStrategy,
12
45
  SchedulingStrategy,
46
+ StrategyT,
13
47
  StrategyType,
14
48
  SynchronousStrategy,
15
49
  ThroughputStrategy,
16
- strategy_display_str,
17
- )
18
- from .worker import (
19
- GenerativeRequestsWorker,
20
- GenerativeRequestsWorkerDescription,
21
- RequestsWorker,
22
- ResolveStatus,
23
- WorkerDescription,
24
- WorkerProcessResult,
25
50
  )
51
+ from .worker import WorkerProcess
52
+ from .worker_group import WorkerProcessGroup
26
53
 
27
54
  __all__ = [
28
55
  "AsyncConstantStrategy",
29
56
  "AsyncPoissonStrategy",
57
+ "BackendInterface",
58
+ "BackendT",
30
59
  "ConcurrentStrategy",
31
- "GenerativeRequestsWorker",
32
- "GenerativeRequestsWorkerDescription",
33
- "RequestsWorker",
34
- "ResolveStatus",
60
+ "Constraint",
61
+ "ConstraintInitializer",
62
+ "ConstraintsInitializerFactory",
63
+ "Environment",
64
+ "MaxDurationConstraint",
65
+ "MaxErrorRateConstraint",
66
+ "MaxErrorsConstraint",
67
+ "MaxGlobalErrorRateConstraint",
68
+ "MaxNumberConstraint",
69
+ "MultiTurnRequestT",
70
+ "NonDistributedEnvironment",
71
+ "OverSaturationConstraint",
72
+ "OverSaturationConstraintInitializer",
73
+ "PydanticConstraintInitializer",
74
+ "RequestT",
75
+ "ResponseT",
35
76
  "Scheduler",
36
- "SchedulerRequestInfo",
37
- "SchedulerRequestResult",
38
- "SchedulerResult",
39
- "SchedulerRunInfo",
77
+ "SchedulerMessagingPydanticRegistry",
78
+ "SchedulerProgress",
79
+ "SchedulerState",
80
+ "SchedulerUpdateAction",
40
81
  "SchedulingStrategy",
82
+ "SerializableConstraintInitializer",
83
+ "StrategyT",
41
84
  "StrategyType",
42
85
  "SynchronousStrategy",
43
86
  "ThroughputStrategy",
44
- "WorkerDescription",
45
- "WorkerProcessResult",
46
- "strategy_display_str",
87
+ "UnserializableConstraintInitializer",
88
+ "WorkerProcess",
89
+ "WorkerProcessGroup",
47
90
  ]