guidellm 0.1.0__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0.dev0.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0.dev0.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dev0.dist-info}/top_level.txt +0 -0
@@ -1,391 +0,0 @@
1
- import json
2
- import math
3
- from dataclasses import dataclass
4
- from pathlib import Path
5
- from typing import Dict, List, Optional, Tuple, Union
6
-
7
- import numpy as np
8
- from loguru import logger
9
- from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003
10
-
11
- from guidellm.config import settings
12
- from guidellm.core.request import TextGenerationRequest
13
- from guidellm.request.base import GenerationMode, RequestGenerator
14
- from guidellm.utils import clean_text, filter_text, load_text, split_text
15
-
16
- __all__ = ["EmulatedConfig", "EmulatedRequestGenerator", "EndlessTokens"]
17
-
18
-
19
- @dataclass
20
- class EmulatedConfig:
21
- """
22
- Configuration for emulated text generation requests.
23
-
24
- Args:
25
- prompt_tokens (int): Number of prompt tokens.
26
- prompt_tokens_variance (Optional[int]): Variance for prompt tokens.
27
- prompt_tokens_min (Optional[int]): Minimum number of prompt tokens.
28
- prompt_tokens_max (Optional[int]): Maximum number of prompt tokens.
29
- generated_tokens (Optional[int]): Number of generated tokens.
30
- generated_tokens_variance (Optional[int]): Variance for generated tokens.
31
- generated_tokens_min (Optional[int]): Minimum number of generated tokens.
32
- generated_tokens_max (Optional[int]): Maximum number of generated tokens.
33
- """
34
-
35
- @staticmethod
36
- def create_config(config: Optional[Union[str, Path, Dict]]) -> "EmulatedConfig":
37
- """
38
- Create an EmulatedConfig instance from a configuration source.
39
-
40
- :param config: Configuration source, can be a dictionary, JSON string,
41
- key=value string, or file path.
42
- :type config: Union[str, Path, Dict]
43
- :return: An instance of EmulatedConfig.
44
- :rtype: EmulatedConfig
45
- :raises FileNotFoundError: If the configuration file is not found.
46
- :raises ValueError: If the configuration format is invalid.
47
- """
48
- if not config:
49
- logger.debug("Creating default configuration")
50
- return EmulatedConfig(prompt_tokens=1024, generated_tokens=256)
51
-
52
- if isinstance(config, dict):
53
- logger.debug("Loading configuration from dict: {}", config)
54
- return EmulatedConfig(**config)
55
-
56
- if isinstance(config, Path) or (
57
- isinstance(config, str) and (config.endswith(".json") or "{" in config)
58
- ):
59
- logger.debug("Loading configuration from json: {}", config)
60
-
61
- if isinstance(config, str) and "{" in config:
62
- json_text = config.strip()
63
- else:
64
- if isinstance(config, str):
65
- config = Path(config)
66
-
67
- if not config.exists():
68
- raise FileNotFoundError(f"Configuration file not found: {config}")
69
-
70
- json_text = config.read_text(encoding="utf-8")
71
-
72
- json_dict = json.loads(json_text)
73
-
74
- return EmulatedConfig(**json_dict)
75
-
76
- if isinstance(config, str) and "=" in config:
77
- logger.debug("Loading configuration from csv string: {}", config)
78
- items = config.split(",")
79
- config_dict = {}
80
- for item in items:
81
- key_value = item.strip().split("=")
82
- if len(key_value) != 2: # noqa: PLR2004
83
- raise ValueError(f"Unexpected format for item: {item}")
84
- key = key_value[0].strip()
85
- value = (
86
- int(key_value[1].strip())
87
- if key_value[1].isnumeric()
88
- else key_value[1]
89
- )
90
- config_dict[key] = value
91
-
92
- return EmulatedConfig(**config_dict) # type: ignore # noqa: PGH003
93
-
94
- raise ValueError(
95
- f"Invalid configuration given for creation of EmulatedConfig: {config}"
96
- )
97
-
98
- prompt_tokens: int
99
- prompt_tokens_variance: Optional[int] = None
100
- prompt_tokens_min: Optional[int] = None
101
- prompt_tokens_max: Optional[int] = None
102
-
103
- generated_tokens: Optional[int] = None
104
- generated_tokens_variance: Optional[int] = None
105
- generated_tokens_min: Optional[int] = None
106
- generated_tokens_max: Optional[int] = None
107
-
108
- @property
109
- def prompt_tokens_range(self) -> Tuple[int, int]:
110
- """
111
- Get the range (min, max) of prompt tokens to generate.
112
-
113
- :return: The range of prompt tokens.
114
- :rtype: Tuple[int, int]
115
- """
116
- return self._token_range(
117
- self.prompt_tokens,
118
- self.prompt_tokens_variance,
119
- self.prompt_tokens_min,
120
- self.prompt_tokens_max,
121
- )
122
-
123
- @property
124
- def output_tokens_range(self) -> Tuple[int, int]:
125
- """
126
- Get the range (min, max) of output tokens to generate.
127
-
128
- :return: The range of generated tokens.
129
- :rtype: Tuple[int, int]
130
- """
131
- if not self.generated_tokens:
132
- return 0, 0
133
-
134
- return self._token_range(
135
- self.generated_tokens,
136
- self.generated_tokens_variance,
137
- self.generated_tokens_min,
138
- self.generated_tokens_max,
139
- )
140
-
141
- def sample_prompt_tokens(self, rng: np.random.Generator) -> int:
142
- """
143
- Sample the number of prompt tokens to generate.
144
-
145
- :param rng: The random number generator to use.
146
- :type rng: np.random.Generator
147
- :return: The number of prompt tokens to create.
148
- :rtype: int
149
- """
150
- return self._sample_tokens(
151
- self.prompt_tokens,
152
- self.prompt_tokens_variance,
153
- self.prompt_tokens_min,
154
- self.prompt_tokens_max,
155
- rng,
156
- )
157
-
158
- def sample_output_tokens(self, rng: np.random.Generator) -> Optional[int]:
159
- """
160
- Sample the number of output tokens to generate.
161
-
162
- :param rng: The random number generator to use.
163
- :type rng: np.random.Generator
164
- :return: The number of output tokens to generate.
165
- :rtype: Optional[int]
166
- """
167
- if not self.generated_tokens:
168
- return None
169
-
170
- return self._sample_tokens(
171
- self.generated_tokens,
172
- self.generated_tokens_variance,
173
- self.generated_tokens_min,
174
- self.generated_tokens_max,
175
- rng,
176
- )
177
-
178
- @staticmethod
179
- def _sample_tokens(
180
- base: int,
181
- variance: Optional[int],
182
- min_tokens: Optional[int],
183
- max_tokens: Optional[int],
184
- rng: np.random.Generator,
185
- ) -> int:
186
- min_tokens, max_tokens = EmulatedConfig._token_range(
187
- base, variance, min_tokens, max_tokens
188
- )
189
-
190
- if min_tokens == max_tokens:
191
- return min_tokens
192
-
193
- if not variance:
194
- return rng.integers(min_tokens, max_tokens + 1)
195
-
196
- rand = rng.normal(base, math.sqrt(variance))
197
-
198
- return int(min(max(rand, min_tokens), max_tokens))
199
-
200
- @staticmethod
201
- def _token_range(
202
- base: int,
203
- variance: Optional[int],
204
- min_tokens: Optional[int],
205
- max_tokens: Optional[int],
206
- ) -> Tuple[int, int]:
207
- if not variance:
208
- return (
209
- min_tokens or base,
210
- max_tokens or base,
211
- )
212
-
213
- min_tokens = min_tokens if min_tokens and min_tokens > 0 else 1
214
- max_tokens = (
215
- max_tokens if max_tokens and max_tokens > base else base + 5 * variance
216
- )
217
-
218
- return min_tokens, max_tokens
219
-
220
-
221
- class EndlessTokens(List[str]):
222
- """
223
- A list subclass that allows for endless data generation.
224
- """
225
-
226
- def __init__(
227
- self,
228
- data: Union[str, Path],
229
- filter_start: Optional[Union[str, int]] = None,
230
- filter_end: Optional[Union[str, int]] = None,
231
- clean_text_args: Optional[Dict[str, bool]] = None,
232
- ):
233
- """
234
- Initialize EndlessDataWords with data.
235
-
236
- :param data: Source text data.
237
- :type data: str
238
- """
239
- logger.debug("Loading data from: {}", data)
240
- data = load_text(data)
241
- data = filter_text(data, filter_start, filter_end)
242
- data = (
243
- clean_text(data)
244
- if not clean_text_args
245
- else clean_text(data, **clean_text_args)
246
- )
247
- self._tokens, self._token_separators, self._line_indices = split_text(data)
248
-
249
- super().__init__(self._tokens)
250
-
251
- @property
252
- def line_indices(self) -> List[int]:
253
- """
254
- Get the list of start indices for lines.
255
-
256
- :return: List of start indices.
257
- :rtype: List[int]
258
- """
259
- return self._line_indices
260
-
261
- def create_text(self, start: int, length: int) -> str:
262
- """
263
- Create a text snippet from the specified range.
264
-
265
- :param start: Start index.
266
- :type start: int
267
- :param length: Length of the snippet.
268
- :type length: int
269
- :return: Text snippet.
270
- :rtype: str
271
- """
272
- start = start % len(self)
273
- text = ""
274
- buff_token_sep = ""
275
-
276
- for counter in range(length):
277
- index = (start + counter) % len(self)
278
- text += buff_token_sep + self[index]
279
- buff_token_sep = self._token_separators[index]
280
-
281
- return text
282
-
283
-
284
- class EmulatedRequestGenerator(RequestGenerator):
285
- """
286
- A request generator that generates emulated requests based on a configuration.
287
-
288
- :param config: The configuration string, file path, or dictionary.
289
- :type config: Union[str, Dict, Path]
290
- :param random_seed: The random seed to use for generating requests.
291
- :type random_seed: Optional[int]
292
- :param tokenizer: The tokenizer instance or the name/config to use
293
- for tokenizing prompts.
294
- :type tokenizer: Optional[Union[str, PreTrainedTokenizer]]
295
- :param mode: The generation mode, either 'async' or 'sync'.
296
- :type mode: GenerationMode
297
- :param async_queue_size: The size of the request queue.
298
- :type async_queue_size: int
299
- """
300
-
301
- def __init__(
302
- self,
303
- config: Optional[Union[str, Path, Dict]],
304
- random_seed: Optional[int] = None,
305
- tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
306
- mode: GenerationMode = "async",
307
- async_queue_size: int = 50,
308
- ):
309
- """
310
- Initialize EmulatedRequestGenerator with configuration and tokenizer.
311
-
312
- :param config: Configuration source, can be a dictionary,
313
- JSON string, or file path.
314
- :type config: Optional[Union[str, Path, Dict]]
315
- :param random_seed: Optional seed for random number generator.
316
- :type random_seed: Optional[int]
317
- :param tokenizer: Tokenizer instance or configuration for tokenizing prompts.
318
- :type tokenizer: Optional[Union[str, PreTrainedTokenizer]]
319
- :param mode: Mode of request generation, either 'async' or 'sync'.
320
- :type mode: str
321
- :param async_queue_size: Size of the asynchronous queue.
322
- :type async_queue_size: int
323
- """
324
- self._config = EmulatedConfig.create_config(config)
325
- self._tokens = EndlessTokens(
326
- settings.emulated_data.source,
327
- settings.emulated_data.filter_start,
328
- settings.emulated_data.filter_end,
329
- )
330
- self._rng = np.random.default_rng(random_seed)
331
-
332
- # NOTE: Must be after all the parameters since the queue population
333
- # function requires attributes above
334
- super().__init__(
335
- type_="emulated",
336
- source=str(config),
337
- tokenizer=tokenizer,
338
- mode=mode,
339
- async_queue_size=async_queue_size,
340
- )
341
-
342
- def create_item(self) -> TextGenerationRequest:
343
- """
344
- Create a new text generation request item from the data.
345
-
346
- :return: A new text generation request.
347
- :rtype: TextGenerationRequest
348
- """
349
- logger.debug("Creating new text generation request")
350
- target_prompt_token_count = self._config.sample_prompt_tokens(self._rng)
351
- prompt = self.sample_prompt(target_prompt_token_count)
352
- prompt_token_count = len(self.tokenizer.tokenize(prompt))
353
- output_token_count = self._config.sample_output_tokens(self._rng)
354
- logger.debug("Generated prompt: {}", prompt)
355
-
356
- return TextGenerationRequest(
357
- prompt=prompt,
358
- prompt_token_count=prompt_token_count,
359
- output_token_count=output_token_count,
360
- )
361
-
362
- def sample_prompt(self, tokens: int) -> str:
363
- """
364
- Sample a prompt with the specified number of tokens.
365
-
366
- :param tokens: Number of tokens for the prompt.
367
- :type tokens: int
368
- :return: Sampled prompt text.
369
- :rtype: str
370
- """
371
- start_line_index = self._rng.integers(0, len(self._tokens.line_indices))
372
-
373
- # binary search to find the proper number of tokens for the prompt
374
- # this is because tokenizers differ in tokenization behavior
375
- left = 0
376
- right = left + 5 * tokens
377
-
378
- while left < right:
379
- mid = (left + right) // 2
380
- prompt = self._tokens.create_text(start_line_index, mid)
381
- token_count = len(self.tokenizer.tokenize(prompt))
382
-
383
- if token_count == tokens:
384
- return prompt
385
-
386
- if token_count < tokens:
387
- left = mid + 1
388
- else:
389
- right = mid
390
-
391
- return self._tokens.create_text(start_line_index, left)
guidellm/request/file.py DELETED
@@ -1,76 +0,0 @@
1
- from pathlib import Path
2
- from typing import Optional, Union
3
-
4
- from loguru import logger
5
- from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003
6
-
7
- from guidellm.config import settings
8
- from guidellm.core.request import TextGenerationRequest
9
- from guidellm.request.base import GenerationMode, RequestGenerator
10
- from guidellm.utils import load_text_lines
11
-
12
- __all__ = ["FileRequestGenerator"]
13
-
14
-
15
- class FileRequestGenerator(RequestGenerator):
16
- """
17
- A request generator implementation for files.
18
-
19
- :param path: The path to the file containing the data.
20
- :type path: Optional[Union[str, Path]]
21
- :param tokenizer: The tokenizer instance or the name/config to use
22
- for tokenizing prompts.
23
- :type tokenizer: Union[str, PreTrainedTokenizer]
24
- :param mode: The generation mode, either 'async' or 'sync'.
25
- :type mode: str
26
- :param async_queue_size: The size of the request queue.
27
- :type async_queue_size: int
28
- """
29
-
30
- def __init__(
31
- self,
32
- path: Optional[Union[str, Path]],
33
- tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
34
- mode: GenerationMode = "async",
35
- async_queue_size: int = 50,
36
- ):
37
- if not path:
38
- raise ValueError("File path must be provided for FileRequestGenerator")
39
-
40
- self._path = path
41
- self._data = load_text_lines(
42
- path,
43
- filters=settings.dataset.preferred_data_columns,
44
- )
45
- self._iterator = iter(self._data)
46
-
47
- # NOTE: Must be after all the parameters since the queue population
48
- # function requires attributes above
49
- super().__init__(
50
- type_="file",
51
- source=str(path),
52
- tokenizer=tokenizer,
53
- mode=mode,
54
- async_queue_size=async_queue_size,
55
- )
56
-
57
- def create_item(self) -> TextGenerationRequest:
58
- """
59
- Create a new result request item from the data.
60
-
61
- :return: A new result request.
62
- :rtype: TextGenerationRequest
63
- """
64
- logger.debug("Creating new request item from file data")
65
-
66
- try:
67
- data = next(self._iterator)
68
- except StopIteration:
69
- self._iterator = iter(self._data)
70
- data = next(self._iterator)
71
-
72
- token_count = len(self.tokenizer.tokenize(data))
73
- request = TextGenerationRequest(prompt=data, prompt_token_count=token_count)
74
- logger.debug("Created new TextGenerationRequest: {}", request)
75
-
76
- return request
@@ -1,100 +0,0 @@
1
- from pathlib import Path
2
- from typing import Optional, Union
3
-
4
- from datasets import ( # type: ignore # noqa: PGH003
5
- Dataset,
6
- DatasetDict,
7
- IterableDataset,
8
- IterableDatasetDict,
9
- )
10
- from loguru import logger
11
- from transformers import PreTrainedTokenizer # type: ignore # noqa: PGH003
12
-
13
- from guidellm.core.request import TextGenerationRequest
14
- from guidellm.request.base import GenerationMode, RequestGenerator
15
- from guidellm.utils import (
16
- load_transformers_dataset,
17
- resolve_transformers_dataset_column,
18
- )
19
-
20
- __all__ = ["TransformersDatasetRequestGenerator"]
21
-
22
-
23
- class TransformersDatasetRequestGenerator(RequestGenerator):
24
- """
25
- A request generator implementation for Hugging Face datasets.
26
-
27
- :param dataset: The name of the Hugging Face dataset to use or the path
28
- to a local dataset.
29
- :type dataset_name: str
30
- :param split: The split of the dataset to use (e.g., 'train', 'test').
31
- :type split: str
32
- :param column: The column/field to use for generating requests.
33
- :type column: str
34
- :param tokenizer: The tokenizer instance or the name/config to use
35
- for tokenizing prompts.
36
- :type tokenizer: Union[str, PreTrainedTokenizer]
37
- :param mode: The generation mode, either 'async' or 'sync'.
38
- :type mode: str
39
- :param async_queue_size: The size of the request queue.
40
- :type async_queue_size: int
41
- """
42
-
43
- def __init__(
44
- self,
45
- dataset: Union[
46
- str, Path, DatasetDict, Dataset, IterableDatasetDict, IterableDataset
47
- ],
48
- split: Optional[str] = None,
49
- column: Optional[str] = None,
50
- tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
51
- mode: GenerationMode = "async",
52
- async_queue_size: int = 50,
53
- **kwargs,
54
- ):
55
- self._dataset = dataset
56
- self._split = split
57
- self._column = column
58
- self._kwargs = kwargs
59
-
60
- self._hf_dataset = load_transformers_dataset(dataset, split=split, **kwargs)
61
- self._hf_column = resolve_transformers_dataset_column(
62
- self._hf_dataset, column=column
63
- )
64
- self._hf_dataset_iterator = iter(self._hf_dataset)
65
-
66
- # NOTE: Must be after all the parameters since the queue population
67
- # function requires attributes above
68
- super().__init__(
69
- type_="transformers_dataset",
70
- source=str(dataset),
71
- tokenizer=tokenizer,
72
- mode=mode,
73
- async_queue_size=async_queue_size,
74
- )
75
-
76
- def create_item(self) -> TextGenerationRequest:
77
- """
78
- Create a new result request item from the dataset.
79
-
80
- :return: A new result request.
81
- :rtype: TextGenerationRequest
82
- """
83
-
84
- logger.debug("Creating new request item from dataset")
85
-
86
- try:
87
- data = next(self._hf_dataset_iterator)
88
- except StopIteration:
89
- self._hf_dataset_iterator = iter(self._hf_dataset)
90
- data = next(self._hf_dataset_iterator)
91
-
92
- prompt = data[self._hf_column]
93
- token_count = len(self.tokenizer.tokenize(prompt))
94
- request = TextGenerationRequest(
95
- prompt=prompt,
96
- prompt_token_count=token_count,
97
- )
98
- logger.debug(f"Created new TextGenerationRequest: {request}")
99
-
100
- return request