guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0
guidellm/backend/base.py DELETED
@@ -1,320 +0,0 @@
1
- import asyncio
2
- import functools
3
- from abc import ABC, abstractmethod
4
- from typing import AsyncGenerator, Dict, List, Literal, Optional, Type, Union
5
-
6
- from loguru import logger
7
- from pydantic import BaseModel
8
- from transformers import ( # type: ignore # noqa: PGH003
9
- AutoTokenizer,
10
- PreTrainedTokenizer,
11
- )
12
-
13
- from guidellm.core import TextGenerationRequest, TextGenerationResult
14
-
15
- __all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"]
16
-
17
-
18
- BackendEnginePublic = Literal["openai_server"]
19
- BackendEngine = Union[BackendEnginePublic, Literal["test"]]
20
-
21
-
22
- class GenerativeResponse(BaseModel):
23
- """
24
- A model representing a response from a generative AI backend.
25
-
26
- :param type_: The type of response, either 'token_iter' for intermediate
27
- token output or 'final' for the final result.
28
- :type type_: Literal["token_iter", "final"]
29
- :param add_token: The token to add to the output
30
- (only applicable if type_ is 'token_iter').
31
- :type add_token: Optional[str]
32
- :param prompt: The original prompt sent to the backend.
33
- :type prompt: Optional[str]
34
- :param output: The final generated output (only applicable if type_ is 'final').
35
- :type output: Optional[str]
36
- :param prompt_token_count: The number of tokens in the prompt.
37
- :type prompt_token_count: Optional[int]
38
- :param output_token_count: The number of tokens in the output.
39
- :type output_token_count: Optional[int]
40
- """
41
-
42
- type_: Literal["token_iter", "final"]
43
- add_token: Optional[str] = None
44
- prompt: Optional[str] = None
45
- output: Optional[str] = None
46
- prompt_token_count: Optional[int] = None
47
- output_token_count: Optional[int] = None
48
-
49
-
50
- class Backend(ABC):
51
- """
52
- Abstract base class for generative AI backends.
53
-
54
- This class provides a common interface for creating and interacting with different
55
- generative AI backends. Subclasses should implement the abstract methods to
56
- define specific backend behavior.
57
-
58
- :cvar _registry: A dictionary that maps BackendEngine types to backend classes.
59
- :type _registry: Dict[BackendEngine, Type[Backend]]
60
- :param type_: The type of the backend.
61
- :type type_: BackendEngine
62
- :param target: The target URL for the backend.
63
- :type target: str
64
- :param model: The model used by the backend.
65
- :type model: str
66
- """
67
-
68
- _registry: Dict[BackendEngine, "Type[Backend]"] = {}
69
-
70
- @classmethod
71
- def register(cls, backend_type: BackendEngine):
72
- """
73
- A decorator to register a backend class in the backend registry.
74
-
75
- :param backend_type: The type of backend to register.
76
- :type backend_type: BackendEngine
77
- :return: The decorated backend class.
78
- :rtype: Type[Backend]
79
- """
80
-
81
- def inner_wrapper(wrapped_class: Type["Backend"]):
82
- cls._registry[backend_type] = wrapped_class
83
- logger.info("Registered backend type: {}", backend_type)
84
- return wrapped_class
85
-
86
- return inner_wrapper
87
-
88
- @classmethod
89
- def create(cls, backend_type: BackendEngine, **kwargs) -> "Backend":
90
- """
91
- Factory method to create a backend instance based on the backend type.
92
-
93
- :param backend_type: The type of backend to create.
94
- :type backend_type: BackendEngine
95
- :param kwargs: Additional arguments for backend initialization.
96
- :return: An instance of a subclass of Backend.
97
- :rtype: Backend
98
- :raises ValueError: If the backend type is not registered.
99
- """
100
-
101
- logger.info("Creating backend of type {}", backend_type)
102
-
103
- if backend_type not in cls._registry:
104
- err = ValueError(f"Unsupported backend type: {backend_type}")
105
- logger.error("{}", err)
106
- raise err
107
-
108
- return Backend._registry[backend_type](**kwargs)
109
-
110
- def __init__(self, type_: BackendEngine, target: str, model: str):
111
- """
112
- Base constructor for the Backend class.
113
- Calls into test_connection to ensure the backend is reachable.
114
- Ensure all setup is done in the subclass constructor before calling super.
115
-
116
- :param type_: The type of the backend.
117
- :param target: The target URL for the backend.
118
- :param model: The model used by the backend.
119
- """
120
- self._type = type_
121
- self._target = target
122
- self._model = model
123
-
124
- self.test_connection()
125
-
126
- @property
127
- def default_model(self) -> str:
128
- """
129
- Get the default model for the backend.
130
-
131
- :return: The default model.
132
- :rtype: str
133
- :raises ValueError: If no models are available.
134
- """
135
- return _cachable_default_model(self)
136
-
137
- @property
138
- def type_(self) -> BackendEngine:
139
- """
140
- Get the type of the backend.
141
-
142
- :return: The type of the backend.
143
- :rtype: BackendEngine
144
- """
145
- return self._type
146
-
147
- @property
148
- def target(self) -> str:
149
- """
150
- Get the target URL for the backend.
151
-
152
- :return: The target URL.
153
- :rtype: str
154
- """
155
- return self._target
156
-
157
- @property
158
- def model(self) -> str:
159
- """
160
- Get the model used by the backend.
161
-
162
- :return: The model name.
163
- :rtype: str
164
- """
165
- return self._model
166
-
167
- def model_tokenizer(self) -> PreTrainedTokenizer:
168
- """
169
- Get the tokenizer for the backend model.
170
-
171
- :return: The tokenizer instance.
172
- """
173
- return AutoTokenizer.from_pretrained(self.model)
174
-
175
- def test_connection(self) -> bool:
176
- """
177
- Test the connection to the backend by running a short text generation request.
178
- If successful, returns True, otherwise raises an exception.
179
-
180
- :return: True if the connection is successful.
181
- :rtype: bool
182
- :raises ValueError: If the connection test fails.
183
- """
184
- try:
185
- asyncio.get_running_loop()
186
- is_async = True
187
- except RuntimeError:
188
- is_async = False
189
-
190
- if is_async:
191
- logger.warning("Running in async mode, cannot test connection")
192
- return True
193
-
194
- try:
195
- request = TextGenerationRequest(
196
- prompt="Test connection", output_token_count=5
197
- )
198
-
199
- asyncio.run(self.submit(request))
200
- return True
201
- except Exception as err:
202
- raise_err = RuntimeError(
203
- f"Backend connection test failed for backend type={self.type_} "
204
- f"with target={self.target} and model={self.model} with error: {err}"
205
- )
206
- logger.error(raise_err)
207
- raise raise_err from err
208
-
209
- async def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
210
- """
211
- Submit a text generation request and return the result.
212
-
213
- This method handles the request submission to the backend and processes
214
- the response in a streaming fashion if applicable.
215
-
216
- :param request: The request object containing the prompt
217
- and other configurations.
218
- :type request: TextGenerationRequest
219
- :return: The result of the text generation request.
220
- :rtype: TextGenerationResult
221
- :raises ValueError: If no response is received from the backend.
222
- """
223
-
224
- logger.debug("Submitting request with prompt: {}", request.prompt)
225
-
226
- result = TextGenerationResult(request=request)
227
- result.start(request.prompt)
228
- received_final = False
229
-
230
- async for response in self.make_request(request):
231
- logger.debug("Received response: {}", response)
232
- if response.type_ == "token_iter":
233
- result.output_token(response.add_token if response.add_token else "")
234
- elif response.type_ == "final":
235
- if received_final:
236
- err = ValueError(
237
- "Received multiple final responses from the backend."
238
- )
239
- logger.error(err)
240
- raise err
241
-
242
- result.end(
243
- output=response.output,
244
- prompt_token_count=response.prompt_token_count,
245
- output_token_count=response.output_token_count,
246
- )
247
- received_final = True
248
- else:
249
- err = ValueError(
250
- f"Invalid response received from the backend of type: "
251
- f"{response.type_} for {response}"
252
- )
253
- logger.error(err)
254
- raise err
255
-
256
- if not received_final:
257
- err = ValueError("No final response received from the backend.")
258
- logger.error(err)
259
- raise err
260
-
261
- logger.info("Request completed with output: {}", result.output)
262
-
263
- return result
264
-
265
- @abstractmethod
266
- async def make_request(
267
- self,
268
- request: TextGenerationRequest,
269
- ) -> AsyncGenerator[GenerativeResponse, None]:
270
- """
271
- Abstract method to make a request to the backend.
272
-
273
- Subclasses must implement this method to define how requests are handled
274
- by the backend.
275
-
276
- :param request: The request object containing the prompt and
277
- other configurations.
278
- :type request: TextGenerationRequest
279
- :yield: A generator yielding responses from the backend.
280
- :rtype: AsyncGenerator[GenerativeResponse, None]
281
- """
282
- yield None # type: ignore # noqa: PGH003
283
-
284
- @abstractmethod
285
- def available_models(self) -> List[str]:
286
- """
287
- Abstract method to get the available models for the backend.
288
-
289
- Subclasses must implement this method to provide the list of models
290
- supported by the backend.
291
-
292
- :return: A list of available models.
293
- :rtype: List[str]
294
- :raises NotImplementedError: If the method is not implemented by a subclass.
295
- """
296
- raise NotImplementedError
297
-
298
-
299
- @functools.lru_cache(maxsize=1)
300
- def _cachable_default_model(backend: Backend) -> str:
301
- """
302
- Get the default model for a backend using LRU caching.
303
-
304
- This function caches the default model to optimize repeated lookups.
305
-
306
- :param backend: The backend instance for which to get the default model.
307
- :type backend: Backend
308
- :return: The default model.
309
- :rtype: str
310
- :raises ValueError: If no models are available.
311
- """
312
- logger.debug("Getting default model for backend: {}", backend)
313
- models = backend.available_models()
314
- if models:
315
- logger.debug("Default model: {}", models[0])
316
- return models[0]
317
-
318
- err = ValueError("No models available.")
319
- logger.error(err)
320
- raise err
guidellm/core/__init__.py DELETED
@@ -1,24 +0,0 @@
1
- from .distribution import Distribution
2
- from .report import GuidanceReport
3
- from .request import TextGenerationRequest
4
- from .result import (
5
- RequestConcurrencyMeasurement,
6
- TextGenerationBenchmark,
7
- TextGenerationBenchmarkReport,
8
- TextGenerationError,
9
- TextGenerationResult,
10
- )
11
- from .serializable import Serializable, SerializableFileType
12
-
13
- __all__ = [
14
- "Distribution",
15
- "GuidanceReport",
16
- "RequestConcurrencyMeasurement",
17
- "Serializable",
18
- "SerializableFileType",
19
- "TextGenerationBenchmark",
20
- "TextGenerationBenchmarkReport",
21
- "TextGenerationError",
22
- "TextGenerationRequest",
23
- "TextGenerationResult",
24
- ]
@@ -1,190 +0,0 @@
1
- from typing import List, Sequence
2
-
3
- import numpy as np
4
- from loguru import logger
5
- from pydantic import Field
6
-
7
- from guidellm.core.serializable import Serializable
8
-
9
- __all__ = ["Distribution"]
10
-
11
-
12
- class Distribution(Serializable):
13
- """
14
- A class to represent a statistical distribution and perform various
15
- statistical analyses.
16
- """
17
-
18
- data: Sequence[float] = Field(
19
- default_factory=list,
20
- description="The data points of the distribution.",
21
- )
22
-
23
- def __str__(self):
24
- return f"Distribution({self.describe()})"
25
-
26
- def __len__(self):
27
- return len(self.data)
28
-
29
- @property
30
- def mean(self) -> float:
31
- """
32
- Calculate and return the mean of the distribution.
33
- :return: The mean of the distribution.
34
- """
35
- if not self.data:
36
- logger.warning("No data points available to calculate mean.")
37
- return 0.0
38
-
39
- mean_value = np.mean(self.data).item()
40
- logger.debug(f"Calculated mean: {mean_value}")
41
- return mean_value
42
-
43
- @property
44
- def median(self) -> float:
45
- """
46
- Calculate and return the median of the distribution.
47
- :return: The median of the distribution.
48
- """
49
- if not self.data:
50
- logger.warning("No data points available to calculate median.")
51
- return 0.0
52
-
53
- median_value = np.median(self.data).item()
54
- logger.debug(f"Calculated median: {median_value}")
55
- return median_value
56
-
57
- @property
58
- def variance(self) -> float:
59
- """
60
- Calculate and return the variance of the distribution.
61
- :return: The variance of the distribution.
62
- """
63
- if not self.data:
64
- logger.warning("No data points available to calculate variance.")
65
- return 0.0
66
-
67
- variance_value = np.var(self.data).item()
68
- logger.debug(f"Calculated variance: {variance_value}")
69
- return variance_value
70
-
71
- @property
72
- def std_deviation(self) -> float:
73
- """
74
- Calculate and return the standard deviation of the distribution.
75
- :return: The standard deviation of the distribution.
76
- """
77
- if not self.data:
78
- logger.warning("No data points available to calculate standard deviation.")
79
- return 0.0
80
-
81
- std_deviation_value = np.std(self.data).item()
82
- logger.debug(f"Calculated standard deviation: {std_deviation_value}")
83
- return std_deviation_value
84
-
85
- def percentile(self, percentile: float) -> float:
86
- """
87
- Calculate and return the specified percentile of the distribution.
88
- :param percentile: The desired percentile to calculate (0-100).
89
- :return: The specified percentile of the distribution.
90
- """
91
- if not self.data:
92
- logger.warning("No data points available to calculate percentile.")
93
- return 0.0
94
-
95
- percentile_value = np.percentile(self.data, percentile).item()
96
- logger.debug(f"Calculated {percentile}th percentile: {percentile_value}")
97
- return percentile_value
98
-
99
- def percentiles(self, percentiles: List[float]) -> List[float]:
100
- """
101
- Calculate and return the specified percentiles of the distribution.
102
- :param percentiles: A list of desired percentiles to calculate (0-100).
103
- :return: A list of the specified percentiles of the distribution.
104
- """
105
- if not self.data:
106
- logger.warning("No data points available to calculate percentiles.")
107
- return [0.0] * len(percentiles)
108
-
109
- percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist() # type: ignore # noqa: PGH003
110
- logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}")
111
- return percentiles_values
112
-
113
- @property
114
- def min(self) -> float:
115
- """
116
- Return the minimum value of the distribution.
117
- :return: The minimum value of the distribution.
118
- """
119
- if not self.data:
120
- logger.warning("No data points available to calculate minimum.")
121
- return 0.0
122
-
123
- min_value: float = np.min(self.data).item() # type: ignore # noqa: PGH003
124
- logger.debug(f"Calculated min: {min_value}")
125
- return min_value
126
-
127
- @property
128
- def max(self) -> float:
129
- """
130
- Return the maximum value of the distribution.
131
- :return: The maximum value of the distribution.
132
- """
133
- if not self.data:
134
- logger.warning("No data points available to calculate maximum.")
135
- return 0.0
136
-
137
- max_value: float = np.max(self.data).item() # type: ignore # noqa: PGH003
138
- logger.debug(f"Calculated max: {max_value}")
139
- return max_value
140
-
141
- @property
142
- def range(self) -> float:
143
- """
144
- Calculate and return the range of the distribution (max - min).
145
- :return: The range of the distribution.
146
- """
147
- if not self.data:
148
- logger.warning("No data points available to calculate range.")
149
- return 0.0
150
-
151
- range_value = self.max - self.min
152
- logger.debug(f"Calculated range: {range_value}")
153
- return range_value
154
-
155
- def describe(self) -> dict:
156
- """
157
- Return a dictionary describing various statistics of the distribution.
158
- :return: A dictionary with statistical summaries of the distribution.
159
- """
160
- description = {
161
- "mean": self.mean,
162
- "median": self.median,
163
- "variance": self.variance,
164
- "std_deviation": self.std_deviation,
165
- "percentile_indices": [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
166
- "percentile_values": self.percentiles(
167
- [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
168
- ),
169
- "min": self.min,
170
- "max": self.max,
171
- "range": self.range,
172
- }
173
- logger.debug(f"Generated description: {description}")
174
- return description
175
-
176
- def add_data(self, new_data: Sequence[float]):
177
- """
178
- Add new data points to the distribution.
179
- :param new_data: A list of new numerical data points to add.
180
- """
181
- self.data = list(self.data) + list(new_data)
182
- logger.debug(f"Added new data: {new_data}")
183
-
184
- def remove_data(self, remove_data: Sequence[float]):
185
- """
186
- Remove specified data points from the distribution.
187
- :param remove_data: A list of numerical data points to remove.
188
- """
189
- self.data = [item for item in self.data if item not in remove_data]
190
- logger.debug(f"Removed data: {remove_data}")