guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,302 @@
1
+ """
2
+ Mock server utilities for text generation and tokenization testing.
3
+
4
+ This module provides mock tokenization and text generation utilities for testing
5
+ guidellm's mock server functionality. It includes a mock tokenizer that simulates
6
+ tokenization processes, functions to generate reproducible fake text with specific
7
+ token counts, and timing generators for realistic benchmarking scenarios.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import random
13
+ import re
14
+ from collections.abc import Generator
15
+
16
+ from faker import Faker
17
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer, TextInput
18
+
19
+ __all__ = [
20
+ "MockTokenizer",
21
+ "create_fake_text",
22
+ "create_fake_tokens_str",
23
+ "sample_number",
24
+ "times_generator",
25
+ ]
26
+
27
+
28
+ class MockTokenizer(PreTrainedTokenizer):
29
+ """
30
+ Mock tokenizer implementation for testing text processing workflows.
31
+
32
+ Provides a simplified tokenizer that splits text using regex patterns and
33
+ generates deterministic token IDs based on string hashing. Used for testing
34
+ guidellm components without requiring actual model tokenizers.
35
+
36
+ :cvar VocabSize: Fixed vocabulary size for the mock tokenizer
37
+ """
38
+
39
+ VocabSize = 100000007
40
+
41
+ def __len__(self) -> int:
42
+ """
43
+ Get the vocabulary size of the tokenizer.
44
+
45
+ :return: The total number of tokens in the vocabulary
46
+ """
47
+ return self.VocabSize
48
+
49
+ def __call__(self, text: str | list[str], **kwargs) -> list[int]: # noqa: ARG002
50
+ """
51
+ Tokenize text and return token IDs (callable interface).
52
+
53
+ :param text: Input text to tokenize
54
+ :return: List of token IDs
55
+ """
56
+ if isinstance(text, str):
57
+ tokens = self.tokenize(text)
58
+ return self.convert_tokens_to_ids(tokens)
59
+ elif isinstance(text, list):
60
+ # Handle batch processing
61
+ result = []
62
+ for t in text:
63
+ result.extend(self.__call__(t))
64
+ return result
65
+ else:
66
+ msg = f"text input must be of type `str` or `list[str]`, got {type(text)}"
67
+ raise ValueError(msg)
68
+
69
+ def tokenize(self, text: TextInput, **_kwargs) -> list[str]: # type: ignore[override]
70
+ """
71
+ Tokenize input text into a list of token strings.
72
+
73
+ Splits text using regex to separate words, punctuation, and whitespace
74
+ into individual tokens for processing.
75
+
76
+ :param text: Input text to tokenize
77
+ :return: List of token strings from the input text
78
+ """
79
+ # Split text into tokens: words, spaces, and punctuation
80
+ return re.findall(r"\w+|[^\w\s]|\s+", text)
81
+
82
+ def convert_tokens_to_ids(self, tokens: str | list[str]) -> list[int]:
83
+ """
84
+ Convert token strings to numeric token IDs.
85
+
86
+ Uses deterministic hashing to generate consistent token IDs for
87
+ reproducible testing scenarios.
88
+
89
+ :param tokens: Single token string or list of token strings
90
+ :return: Single token ID or list of token IDs
91
+ """
92
+ if isinstance(tokens, str):
93
+ return [hash(tokens) % self.VocabSize]
94
+ return [hash(token) % self.VocabSize for token in tokens]
95
+
96
+ def convert_ids_to_tokens( # type: ignore[override]
97
+ self, ids: list[int], _skip_special_tokens: bool = False
98
+ ) -> list[str]:
99
+ """
100
+ Convert numeric token IDs back to token strings.
101
+
102
+ Generates fake text tokens using Faker library seeded with token IDs
103
+ for deterministic and reproducible token generation.
104
+
105
+ :param ids: Single token ID or list of token IDs to convert
106
+ :return: Single token string or list of token strings
107
+ """
108
+ if not ids:
109
+ return [""]
110
+
111
+ fake = Faker()
112
+ fake.seed_instance(sum(ids) % self.VocabSize)
113
+
114
+ target_count = len(ids)
115
+ current_count = 0
116
+ tokens = []
117
+
118
+ while current_count < target_count:
119
+ text = fake.text(
120
+ max_nb_chars=(target_count - current_count) * 10 # oversample
121
+ )
122
+ new_tokens = self.tokenize(text)
123
+
124
+ if current_count > 0:
125
+ new_tokens = [".", " "] + new_tokens
126
+
127
+ new_tokens = (
128
+ new_tokens[: target_count - current_count]
129
+ if len(new_tokens) > (target_count - current_count)
130
+ else new_tokens
131
+ )
132
+ tokens += new_tokens
133
+ current_count += len(new_tokens)
134
+
135
+ return tokens
136
+
137
+ def convert_tokens_to_string(self, tokens: list[str]) -> str:
138
+ """
139
+ Convert a list of token strings back to a single text string.
140
+
141
+ :param tokens: List of token strings to concatenate
142
+ :return: Concatenated string from all tokens
143
+ """
144
+ return "".join(tokens)
145
+
146
+ def _add_tokens(
147
+ self,
148
+ new_tokens: list[str] | list[AddedToken], # noqa: ARG002
149
+ special_tokens: bool = False, # noqa: ARG002
150
+ ) -> int:
151
+ """
152
+ Add new tokens to the tokenizer vocabulary (mock implementation).
153
+
154
+ :param new_tokens: List of tokens to add to the vocabulary
155
+ :param special_tokens: Whether the tokens are special tokens
156
+ :return: Number of tokens actually added (always 0 for mock)
157
+ """
158
+ return 0
159
+
160
+ def apply_chat_template( # type: ignore[override]
161
+ self,
162
+ conversation: list,
163
+ tokenize: bool = False, # Changed default to False to match transformers
164
+ add_generation_prompt: bool = False, # noqa: ARG002
165
+ **kwargs, # noqa: ARG002
166
+ ) -> str | list[int]:
167
+ """
168
+ Apply a chat template to format conversation messages.
169
+
170
+ Mock implementation that concatenates all message content for testing.
171
+
172
+ :param conversation: List of chat messages
173
+ :param tokenize: Whether to return tokens or string
174
+ :param add_generation_prompt: Whether to add generation prompt
175
+ :return: Formatted text string or token IDs
176
+ """
177
+ # Simple concatenation of all message content
178
+ texts = []
179
+ for message in conversation:
180
+ if isinstance(message, dict) and "content" in message:
181
+ texts.append(message["content"])
182
+ elif hasattr(message, "content"):
183
+ texts.append(message.content)
184
+
185
+ formatted_text = " ".join(texts)
186
+
187
+ if tokenize:
188
+ return self.convert_tokens_to_ids(self.tokenize(formatted_text))
189
+ return formatted_text
190
+
191
+ def decode( # type: ignore[override]
192
+ self,
193
+ token_ids: list[int],
194
+ skip_special_tokens: bool = True,
195
+ **kwargs, # noqa: ARG002
196
+ ) -> str:
197
+ """
198
+ Decode token IDs back to text string.
199
+
200
+ :param token_ids: List of token IDs to decode
201
+ :param skip_special_tokens: Whether to skip special tokens
202
+ :return: Decoded text string
203
+ """
204
+ tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens)
205
+ return self.convert_tokens_to_string(tokens)
206
+
207
+
208
+ def create_fake_text(
209
+ num_tokens: int,
210
+ processor: PreTrainedTokenizer,
211
+ seed: int = 42,
212
+ fake: Faker | None = None,
213
+ ) -> str:
214
+ """
215
+ Generate fake text using a tokenizer processor with specified token count.
216
+
217
+ Creates text by generating fake tokens and joining them into a string,
218
+ ensuring the result has the exact number of tokens when processed by
219
+ the given tokenizer.
220
+
221
+ :param num_tokens: Target number of tokens in the generated text
222
+ :param processor: Tokenizer to use for token generation and validation
223
+ :param seed: Random seed for reproducible text generation
224
+ :param fake: Optional Faker instance for text generation
225
+ :return: Generated text string with the specified token count
226
+ """
227
+ return "".join(create_fake_tokens_str(num_tokens, processor, seed, fake))
228
+
229
+
230
+ def create_fake_tokens_str(
231
+ num_tokens: int,
232
+ processor: PreTrainedTokenizer,
233
+ seed: int = 42,
234
+ fake: Faker | None = None,
235
+ ) -> list[str]:
236
+ """
237
+ Generate fake token strings using a tokenizer processor.
238
+
239
+ Creates a list of token strings by generating fake text and tokenizing it
240
+ until the desired token count is reached. Uses the provided tokenizer
241
+ for accurate token boundary detection.
242
+
243
+ :param num_tokens: Target number of tokens to generate
244
+ :param processor: Tokenizer to use for token generation and validation
245
+ :param seed: Random seed for reproducible token generation
246
+ :param fake: Optional Faker instance for text generation
247
+ :return: List of token strings with the specified count
248
+ """
249
+ if not fake:
250
+ fake = Faker()
251
+ fake.seed_instance(seed)
252
+
253
+ tokens: list[str] = []
254
+
255
+ while len(tokens) < num_tokens:
256
+ text = fake.text(
257
+ max_nb_chars=(num_tokens - len(tokens)) * 30 # oversample
258
+ )
259
+ new_tokens = processor.tokenize(text)
260
+
261
+ if len(tokens) > 0:
262
+ new_tokens = [".", " "] + new_tokens
263
+
264
+ new_tokens = (
265
+ new_tokens[: num_tokens - len(tokens)]
266
+ if len(new_tokens) > (num_tokens - len(tokens))
267
+ else new_tokens
268
+ )
269
+ tokens += new_tokens
270
+
271
+ return tokens
272
+
273
+
274
+ def times_generator(mean: float, standard_dev: float) -> Generator[float]:
275
+ """
276
+ Generate infinite timing values from a normal distribution.
277
+
278
+ Creates a generator that yields timing values sampled from a normal
279
+ distribution, useful for simulating realistic request timing patterns
280
+ in benchmarking scenarios.
281
+
282
+ :param mean: Mean value for the normal distribution
283
+ :param standard_dev: Standard deviation for the normal distribution
284
+ :return: Generator yielding positive timing values from the distribution
285
+ """
286
+ while True:
287
+ yield sample_number(mean, standard_dev)
288
+
289
+
290
+ def sample_number(mean: float, standard_dev: float) -> float:
291
+ """
292
+ Generate a single timing value from a normal distribution.
293
+
294
+ Samples one timing value from a normal distribution with the specified
295
+ parameters, ensuring the result is non-negative for realistic timing
296
+ simulation in benchmarking scenarios.
297
+
298
+ :param mean: Mean value for the normal distribution
299
+ :param standard_dev: Standard deviation for the normal distribution
300
+ :return: Non-negative timing value from the distribution
301
+ """
302
+ return max(0.0, random.gauss(mean, standard_dev))
@@ -1,9 +1,9 @@
1
1
  import json
2
2
  import os
3
- from collections.abc import Iterator
3
+ from collections.abc import Callable, Iterator
4
4
  from enum import Enum
5
5
  from pathlib import Path
6
- from typing import Any, Callable, Optional, Union
6
+ from typing import Any
7
7
 
8
8
  import yaml
9
9
  from datasets import Dataset
@@ -11,7 +11,6 @@ from loguru import logger
11
11
  from pydantic import BaseModel, Field
12
12
  from transformers import PreTrainedTokenizerBase
13
13
 
14
- from guidellm.dataset import load_dataset as guidellm_load_dataset
15
14
  from guidellm.utils import IntegerRangeSampler, check_load_processor
16
15
  from guidellm.utils.hf_datasets import SUPPORTED_TYPES, save_dataset_to_file
17
16
 
@@ -32,7 +31,7 @@ def handle_ignore_strategy(
32
31
  min_prompt_tokens: int,
33
32
  tokenizer: PreTrainedTokenizerBase,
34
33
  **_kwargs,
35
- ) -> Optional[str]:
34
+ ) -> str | None:
36
35
  """
37
36
  Ignores prompts that are shorter than the required minimum token length.
38
37
 
@@ -56,7 +55,7 @@ def handle_concatenate_strategy(
56
55
  tokenizer: PreTrainedTokenizerBase,
57
56
  concat_delimiter: str,
58
57
  **_kwargs,
59
- ) -> Optional[str]:
58
+ ) -> str | None:
60
59
  """
61
60
  Concatenates prompts until the minimum token requirement is met.
62
61
 
@@ -117,7 +116,7 @@ def handle_error_strategy(
117
116
  min_prompt_tokens: int,
118
117
  tokenizer: PreTrainedTokenizerBase,
119
118
  **_kwargs,
120
- ) -> Optional[str]:
119
+ ) -> str | None:
121
120
  """
122
121
  Raises an error if the prompt is too short.
123
122
 
@@ -150,24 +149,24 @@ class TokensConfig(BaseModel):
150
149
  description="The average number of tokens.",
151
150
  gt=0,
152
151
  )
153
- stdev: Optional[int] = Field(
152
+ stdev: int | None = Field(
154
153
  description="The standard deviation of the tokens.",
155
154
  gt=0,
156
155
  default=None,
157
156
  )
158
- min: Optional[int] = Field(
157
+ min: int | None = Field(
159
158
  description="The minimum number of tokens.",
160
159
  gt=0,
161
160
  default=None,
162
161
  )
163
- max: Optional[int] = Field(
162
+ max: int | None = Field(
164
163
  description="The maximum number of tokens.",
165
164
  gt=0,
166
165
  default=None,
167
166
  )
168
167
 
169
168
  @staticmethod
170
- def parse_str(data: Union[str, Path]) -> "TokensConfig":
169
+ def parse_str(data: str | Path) -> "TokensConfig":
171
170
  """
172
171
  Parses a string or path into a TokensConfig object. Supports:
173
172
  - JSON string
@@ -215,14 +214,14 @@ class TokensConfig(BaseModel):
215
214
  return TokensConfig(**config_dict) # type: ignore[arg-type]
216
215
 
217
216
  @staticmethod
218
- def parse_config_file(data: Union[str, Path]) -> "TokensConfig":
217
+ def parse_config_file(data: str | Path) -> "TokensConfig":
219
218
  with Path(data).open("r") as file:
220
219
  config_dict = yaml.safe_load(file)
221
220
 
222
221
  return TokensConfig(**config_dict)
223
222
 
224
223
 
225
- def _validate_output_suffix(output_path: Union[str, Path]) -> None:
224
+ def _validate_output_suffix(output_path: str | Path) -> None:
226
225
  output_path = Path(output_path)
227
226
  suffix = output_path.suffix.lower()
228
227
  if suffix not in SUPPORTED_TYPES:
@@ -233,18 +232,18 @@ def _validate_output_suffix(output_path: Union[str, Path]) -> None:
233
232
 
234
233
 
235
234
  def process_dataset(
236
- data: Union[str, Path],
237
- output_path: Union[str, Path],
238
- processor: Union[str, Path, PreTrainedTokenizerBase],
239
- prompt_tokens: Union[str, Path],
240
- output_tokens: Union[str, Path],
241
- processor_args: Optional[dict[str, Any]] = None,
242
- data_args: Optional[dict[str, Any]] = None,
235
+ data: str | Path,
236
+ output_path: str | Path,
237
+ processor: str | Path | PreTrainedTokenizerBase,
238
+ prompt_tokens: str | Path,
239
+ output_tokens: str | Path,
240
+ processor_args: dict[str, Any] | None = None,
241
+ data_args: dict[str, Any] | None = None,
243
242
  short_prompt_strategy: ShortPromptStrategy = ShortPromptStrategy.IGNORE,
244
- pad_char: Optional[str] = None,
245
- concat_delimiter: Optional[str] = None,
243
+ pad_char: str | None = None,
244
+ concat_delimiter: str | None = None,
246
245
  push_to_hub: bool = False,
247
- hub_dataset_id: Optional[str] = None,
246
+ hub_dataset_id: str | None = None,
248
247
  random_seed: int = 42,
249
248
  ) -> None:
250
249
  """
@@ -271,9 +270,7 @@ def process_dataset(
271
270
  f"Starting dataset conversion | Input: {data} | Output directory: {output_path}"
272
271
  )
273
272
 
274
- dataset, column_mappings = guidellm_load_dataset(
275
- data, data_args, processor, processor_args
276
- )
273
+ dataset, column_mappings = None, None
277
274
  tokenizer = check_load_processor(
278
275
  processor,
279
276
  processor_args,
@@ -354,7 +351,7 @@ def process_dataset(
354
351
 
355
352
 
356
353
  def push_dataset_to_hub(
357
- hub_dataset_id: Optional[str],
354
+ hub_dataset_id: str | None,
358
355
  processed_dataset: Dataset,
359
356
  ) -> None:
360
357
  """
@@ -1,9 +1,9 @@
1
1
  from typing import TYPE_CHECKING, Any
2
2
 
3
3
  if TYPE_CHECKING:
4
- from guidellm.benchmark.benchmark import GenerativeBenchmark
4
+ from guidellm.benchmark import GenerativeBenchmark
5
5
 
6
- from .data_models import BenchmarkDatum, RunInfo, WorkloadDetails
6
+ from guidellm.presentation.data_models import BenchmarkDatum, RunInfo, WorkloadDetails
7
7
 
8
8
 
9
9
  class UIDataBuilder:
@@ -1,25 +1,25 @@
1
1
  import random
2
2
  from collections import defaultdict
3
3
  from math import ceil
4
- from typing import TYPE_CHECKING, Optional, Union
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  from pydantic import BaseModel, computed_field
7
7
 
8
8
  if TYPE_CHECKING:
9
- from guidellm.benchmark.benchmark import GenerativeBenchmark
9
+ from guidellm.benchmark import GenerativeBenchmark
10
10
 
11
- from guidellm.objects.statistics import DistributionSummary
11
+ from guidellm.utils import DistributionSummary
12
12
 
13
13
 
14
14
  class Bucket(BaseModel):
15
- value: Union[float, int]
15
+ value: float | int
16
16
  count: int
17
17
 
18
18
  @staticmethod
19
19
  def from_data(
20
- data: Union[list[float], list[int]],
21
- bucket_width: Optional[float] = None,
22
- n_buckets: Optional[int] = None,
20
+ data: list[float] | list[int],
21
+ bucket_width: float | None = None,
22
+ n_buckets: int | None = None,
23
23
  ) -> tuple[list["Bucket"], float]:
24
24
  if not data:
25
25
  return [], 1.0
@@ -35,7 +35,7 @@ class Bucket(BaseModel):
35
35
  else:
36
36
  n_buckets = ceil(range_v / bucket_width)
37
37
 
38
- bucket_counts: defaultdict[Union[float, int], int] = defaultdict(int)
38
+ bucket_counts: defaultdict[float | int, int] = defaultdict(int)
39
39
  for val in data:
40
40
  idx = int((val - min_v) // bucket_width)
41
41
  if idx >= n_buckets:
@@ -67,12 +67,12 @@ class RunInfo(BaseModel):
67
67
 
68
68
  @classmethod
69
69
  def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
70
- model = benchmarks[0].worker.backend_model or "N/A"
70
+ model = benchmarks[0].benchmarker.backend.get("model", "N/A")
71
71
  timestamp = max(
72
72
  bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
73
73
  )
74
74
  return cls(
75
- model=Model(name=model, size=0),
75
+ model=Model(name=model or "", size=0),
76
76
  task="N/A",
77
77
  timestamp=timestamp,
78
78
  dataset=Dataset(name="N/A"),
@@ -80,7 +80,7 @@ class RunInfo(BaseModel):
80
80
 
81
81
 
82
82
  class Distribution(BaseModel):
83
- statistics: Optional[DistributionSummary] = None
83
+ statistics: DistributionSummary | None = None
84
84
  buckets: list[Bucket]
85
85
  bucket_width: float
86
86
 
@@ -108,8 +108,8 @@ class WorkloadDetails(BaseModel):
108
108
 
109
109
  @classmethod
110
110
  def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
111
- target = benchmarks[0].worker.backend_target
112
- rate_type = benchmarks[0].args.profile.type_
111
+ target = benchmarks[0].benchmarker.backend.get("target", "N/A")
112
+ rate_type = benchmarks[0].scheduler.strategy.type_
113
113
  successful_requests = [
114
114
  req for bm in benchmarks for req in bm.requests.successful
115
115
  ]
@@ -117,21 +117,25 @@ class WorkloadDetails(BaseModel):
117
117
  range(len(successful_requests)), min(5, len(successful_requests))
118
118
  )
119
119
  sample_prompts = [
120
- successful_requests[i].prompt.replace("\n", " ").replace('"', "'")
120
+ req.request_args.replace("\n", " ").replace('"', "'")
121
+ if (req := successful_requests[i]).request_args
122
+ else ""
121
123
  for i in sample_indices
122
124
  ]
123
125
  sample_outputs = [
124
- successful_requests[i].output.replace("\n", " ").replace('"', "'")
126
+ req.output.replace("\n", " ").replace('"', "'")
127
+ if (req := successful_requests[i]).output
128
+ else ""
125
129
  for i in sample_indices
126
130
  ]
127
131
 
128
132
  prompt_tokens = [
129
- float(req.prompt_tokens)
133
+ float(req.prompt_tokens) if req.prompt_tokens is not None else -1
130
134
  for bm in benchmarks
131
135
  for req in bm.requests.successful
132
136
  ]
133
137
  output_tokens = [
134
- float(req.output_tokens)
138
+ float(req.output_tokens) if req.output_tokens is not None else -1
135
139
  for bm in benchmarks
136
140
  for req in bm.requests.successful
137
141
  ]
@@ -152,13 +156,13 @@ class WorkloadDetails(BaseModel):
152
156
  statistics=output_token_stats, buckets=output_token_buckets, bucket_width=1
153
157
  )
154
158
 
155
- min_start_time = benchmarks[0].run_stats.start_time
159
+ min_start_time = benchmarks[0].start_time
156
160
 
157
161
  all_req_times = [
158
- req.start_time - min_start_time
162
+ req.info.timings.request_start - min_start_time
159
163
  for bm in benchmarks
160
164
  for req in bm.requests.successful
161
- if req.start_time is not None
165
+ if req.info.timings.request_start is not None
162
166
  ]
163
167
  number_of_buckets = len(benchmarks)
164
168
  request_over_time_buckets, bucket_width = Bucket.from_data(
@@ -190,7 +194,7 @@ class TabularDistributionSummary(DistributionSummary):
190
194
  """
191
195
 
192
196
  @computed_field
193
- def percentile_rows(self) -> list[dict[str, Union[str, float]]]:
197
+ def percentile_rows(self) -> list[dict[str, str | float]]:
194
198
  rows = [
195
199
  {"percentile": name, "value": value}
196
200
  for name, value in self.percentiles.model_dump().items()
@@ -1,14 +1,13 @@
1
1
  import re
2
2
  from pathlib import Path
3
- from typing import Union
4
3
 
5
4
  from loguru import logger
6
5
 
7
- from guidellm.config import settings
6
+ from guidellm.settings import settings
8
7
  from guidellm.utils.text import load_text
9
8
 
10
9
 
11
- def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
10
+ def create_report(js_data: dict, output_path: str | Path) -> Path:
12
11
  """
13
12
  Creates a report from the dictionary and saves it to the output path.
14
13