guidellm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +19 -0
- guidellm/backend/__init__.py +10 -0
- guidellm/backend/base.py +320 -0
- guidellm/backend/openai.py +168 -0
- guidellm/config.py +234 -0
- guidellm/core/__init__.py +24 -0
- guidellm/core/distribution.py +190 -0
- guidellm/core/report.py +321 -0
- guidellm/core/request.py +44 -0
- guidellm/core/result.py +545 -0
- guidellm/core/serializable.py +169 -0
- guidellm/executor/__init__.py +10 -0
- guidellm/executor/base.py +213 -0
- guidellm/executor/profile_generator.py +343 -0
- guidellm/logger.py +83 -0
- guidellm/main.py +336 -0
- guidellm/request/__init__.py +13 -0
- guidellm/request/base.py +194 -0
- guidellm/request/emulated.py +391 -0
- guidellm/request/file.py +76 -0
- guidellm/request/transformers.py +100 -0
- guidellm/scheduler/__init__.py +4 -0
- guidellm/scheduler/base.py +374 -0
- guidellm/scheduler/load_generator.py +196 -0
- guidellm/utils/__init__.py +40 -0
- guidellm/utils/injector.py +70 -0
- guidellm/utils/progress.py +196 -0
- guidellm/utils/text.py +455 -0
- guidellm/utils/transformers.py +151 -0
- guidellm-0.1.0.dist-info/LICENSE +201 -0
- guidellm-0.1.0.dist-info/METADATA +434 -0
- guidellm-0.1.0.dist-info/RECORD +35 -0
- guidellm-0.1.0.dist-info/WHEEL +5 -0
- guidellm-0.1.0.dist-info/entry_points.txt +3 -0
- guidellm-0.1.0.dist-info/top_level.txt +1 -0
guidellm/config.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Dict, List, Optional, Sequence
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, model_validator
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"DatasetSettings",
|
|
10
|
+
"EmulatedDataSettings",
|
|
11
|
+
"Environment",
|
|
12
|
+
"LoggingSettings",
|
|
13
|
+
"OpenAISettings",
|
|
14
|
+
"print_config",
|
|
15
|
+
"ReportGenerationSettings",
|
|
16
|
+
"Settings",
|
|
17
|
+
"reload_settings",
|
|
18
|
+
"settings",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Environment(str, Enum):
|
|
23
|
+
"""
|
|
24
|
+
Enum for the supported environments
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
LOCAL = "local"
|
|
28
|
+
DEV = "dev"
|
|
29
|
+
STAGING = "staging"
|
|
30
|
+
PROD = "prod"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
ENV_REPORT_MAPPING = {
|
|
34
|
+
Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
|
|
35
|
+
Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
|
|
36
|
+
Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
|
|
37
|
+
Environment.LOCAL: "tests/dummy/report.html",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LoggingSettings(BaseModel):
|
|
42
|
+
"""
|
|
43
|
+
Logging settings for the application
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
disabled: bool = False
|
|
47
|
+
clear_loggers: bool = True
|
|
48
|
+
console_log_level: str = "WARNING"
|
|
49
|
+
log_file: Optional[str] = None
|
|
50
|
+
log_file_level: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class DatasetSettings(BaseModel):
|
|
54
|
+
"""
|
|
55
|
+
Dataset settings for the application
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
preferred_data_columns: List[str] = Field(
|
|
59
|
+
default_factory=lambda: [
|
|
60
|
+
"prompt",
|
|
61
|
+
"instruction",
|
|
62
|
+
"input",
|
|
63
|
+
"inputs",
|
|
64
|
+
"question",
|
|
65
|
+
"context",
|
|
66
|
+
"text",
|
|
67
|
+
"content",
|
|
68
|
+
"body",
|
|
69
|
+
"data",
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
preferred_data_splits: List[str] = Field(
|
|
73
|
+
default_factory=lambda: ["test", "tst", "validation", "val", "train"]
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class EmulatedDataSettings(BaseModel):
|
|
78
|
+
"""
|
|
79
|
+
Emulated data settings for the application to use
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
source: str = "https://www.gutenberg.org/files/1342/1342-0.txt"
|
|
83
|
+
filter_start: str = "It is a truth universally acknowledged, that a"
|
|
84
|
+
filter_end: str = "CHISWICK PRESS:--CHARLES WHITTINGHAM AND CO."
|
|
85
|
+
clean_text_args: Dict[str, bool] = Field(
|
|
86
|
+
default_factory=lambda: {
|
|
87
|
+
"fix_encoding": True,
|
|
88
|
+
"clean_whitespace": True,
|
|
89
|
+
"remove_empty_lines": True,
|
|
90
|
+
"force_new_line_punctuation": True,
|
|
91
|
+
}
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class OpenAISettings(BaseModel):
|
|
96
|
+
"""
|
|
97
|
+
OpenAI settings for the application to connect to the API
|
|
98
|
+
for OpenAI server based pathways
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
# OpenAI API key.
|
|
102
|
+
api_key: str = "invalid_token"
|
|
103
|
+
|
|
104
|
+
# OpenAI-compatible server URL
|
|
105
|
+
# NOTE: The default value is default address of llama.cpp web server
|
|
106
|
+
base_url: str = "http://localhost:8000/v1"
|
|
107
|
+
|
|
108
|
+
max_gen_tokens: int = 4096
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ReportGenerationSettings(BaseModel):
|
|
112
|
+
"""
|
|
113
|
+
Report generation settings for the application
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
source: str = ""
|
|
117
|
+
report_html_match: str = "window.report_data = {};"
|
|
118
|
+
report_html_placeholder: str = "{}"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Settings(BaseSettings):
|
|
122
|
+
"""
|
|
123
|
+
All the settings are powered by pydantic_settings and could be
|
|
124
|
+
populated from the .env file.
|
|
125
|
+
|
|
126
|
+
The format to populate the settings is next
|
|
127
|
+
|
|
128
|
+
```sh
|
|
129
|
+
export GUIDELLM__LOGGING__DISABLED=true
|
|
130
|
+
export GUIDELLM__OPENAI__API_KEY=******
|
|
131
|
+
```
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
model_config = SettingsConfigDict(
|
|
135
|
+
env_prefix="GUIDELLM__",
|
|
136
|
+
env_nested_delimiter="__",
|
|
137
|
+
extra="ignore",
|
|
138
|
+
validate_default=True,
|
|
139
|
+
env_file=".env",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# general settings
|
|
143
|
+
env: Environment = Environment.PROD
|
|
144
|
+
request_timeout: int = 30
|
|
145
|
+
max_concurrency: int = 512
|
|
146
|
+
num_sweep_profiles: int = 9
|
|
147
|
+
logging: LoggingSettings = LoggingSettings()
|
|
148
|
+
|
|
149
|
+
# Data settings
|
|
150
|
+
dataset: DatasetSettings = DatasetSettings()
|
|
151
|
+
emulated_data: EmulatedDataSettings = EmulatedDataSettings()
|
|
152
|
+
|
|
153
|
+
# Request settings
|
|
154
|
+
openai: OpenAISettings = OpenAISettings()
|
|
155
|
+
|
|
156
|
+
# Report settings
|
|
157
|
+
report_generation: ReportGenerationSettings = ReportGenerationSettings()
|
|
158
|
+
|
|
159
|
+
@model_validator(mode="after")
|
|
160
|
+
@classmethod
|
|
161
|
+
def set_default_source(cls, values):
|
|
162
|
+
if not values.report_generation.source:
|
|
163
|
+
values.report_generation.source = ENV_REPORT_MAPPING.get(values.env)
|
|
164
|
+
|
|
165
|
+
return values
|
|
166
|
+
|
|
167
|
+
def generate_env_file(self) -> str:
|
|
168
|
+
"""
|
|
169
|
+
Generate the .env file from the current settings
|
|
170
|
+
"""
|
|
171
|
+
return Settings._recursive_generate_env(
|
|
172
|
+
self,
|
|
173
|
+
self.model_config["env_prefix"], # type: ignore # noqa: PGH003
|
|
174
|
+
self.model_config["env_nested_delimiter"], # type: ignore # noqa: PGH003
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _recursive_generate_env(model: BaseModel, prefix: str, delimiter: str) -> str:
|
|
179
|
+
env_file = ""
|
|
180
|
+
add_models = []
|
|
181
|
+
for key, value in model.model_dump().items():
|
|
182
|
+
if isinstance(value, BaseModel):
|
|
183
|
+
# add nested properties to be processed after the current level
|
|
184
|
+
add_models.append((key, value))
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
dict_values = (
|
|
188
|
+
{
|
|
189
|
+
f"{prefix}{key.upper()}{delimiter}{sub_key.upper()}": sub_value
|
|
190
|
+
for sub_key, sub_value in value.items()
|
|
191
|
+
}
|
|
192
|
+
if isinstance(value, dict)
|
|
193
|
+
else {f"{prefix}{key.upper()}": value}
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
for tag, sub_value in dict_values.items():
|
|
197
|
+
if isinstance(sub_value, Sequence) and not isinstance(sub_value, str):
|
|
198
|
+
value_str = ",".join(f'"{item}"' for item in sub_value)
|
|
199
|
+
env_file += f"{tag}=[{value_str}]\n"
|
|
200
|
+
elif isinstance(sub_value, Dict):
|
|
201
|
+
value_str = json.dumps(sub_value)
|
|
202
|
+
env_file += f"{tag}={value_str}\n"
|
|
203
|
+
elif not sub_value:
|
|
204
|
+
env_file += f"{tag}=\n"
|
|
205
|
+
else:
|
|
206
|
+
env_file += f'{tag}="{sub_value}"\n'
|
|
207
|
+
|
|
208
|
+
for key, value in add_models:
|
|
209
|
+
env_file += Settings._recursive_generate_env(
|
|
210
|
+
value, f"{prefix}{key.upper()}{delimiter}", delimiter
|
|
211
|
+
)
|
|
212
|
+
return env_file
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
settings = Settings()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def reload_settings():
|
|
219
|
+
"""
|
|
220
|
+
Reload the settings from the environment variables
|
|
221
|
+
"""
|
|
222
|
+
new_settings = Settings()
|
|
223
|
+
settings.__dict__.update(new_settings.__dict__)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def print_config():
|
|
227
|
+
"""
|
|
228
|
+
Print the current configuration settings
|
|
229
|
+
"""
|
|
230
|
+
print(f"Settings: \n{settings.generate_env_file()}") # noqa: T201
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
if __name__ == "__main__":
|
|
234
|
+
print_config()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from .distribution import Distribution
|
|
2
|
+
from .report import GuidanceReport
|
|
3
|
+
from .request import TextGenerationRequest
|
|
4
|
+
from .result import (
|
|
5
|
+
RequestConcurrencyMeasurement,
|
|
6
|
+
TextGenerationBenchmark,
|
|
7
|
+
TextGenerationBenchmarkReport,
|
|
8
|
+
TextGenerationError,
|
|
9
|
+
TextGenerationResult,
|
|
10
|
+
)
|
|
11
|
+
from .serializable import Serializable, SerializableFileType
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Distribution",
|
|
15
|
+
"GuidanceReport",
|
|
16
|
+
"RequestConcurrencyMeasurement",
|
|
17
|
+
"Serializable",
|
|
18
|
+
"SerializableFileType",
|
|
19
|
+
"TextGenerationBenchmark",
|
|
20
|
+
"TextGenerationBenchmarkReport",
|
|
21
|
+
"TextGenerationError",
|
|
22
|
+
"TextGenerationRequest",
|
|
23
|
+
"TextGenerationResult",
|
|
24
|
+
]
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
from typing import List, Sequence
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from loguru import logger
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
|
|
7
|
+
from guidellm.core.serializable import Serializable
|
|
8
|
+
|
|
9
|
+
__all__ = ["Distribution"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Distribution(Serializable):
|
|
13
|
+
"""
|
|
14
|
+
A class to represent a statistical distribution and perform various
|
|
15
|
+
statistical analyses.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
data: Sequence[float] = Field(
|
|
19
|
+
default_factory=list,
|
|
20
|
+
description="The data points of the distribution.",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def __str__(self):
|
|
24
|
+
return f"Distribution({self.describe()})"
|
|
25
|
+
|
|
26
|
+
def __len__(self):
|
|
27
|
+
return len(self.data)
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def mean(self) -> float:
|
|
31
|
+
"""
|
|
32
|
+
Calculate and return the mean of the distribution.
|
|
33
|
+
:return: The mean of the distribution.
|
|
34
|
+
"""
|
|
35
|
+
if not self.data:
|
|
36
|
+
logger.warning("No data points available to calculate mean.")
|
|
37
|
+
return 0.0
|
|
38
|
+
|
|
39
|
+
mean_value = np.mean(self.data).item()
|
|
40
|
+
logger.debug(f"Calculated mean: {mean_value}")
|
|
41
|
+
return mean_value
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def median(self) -> float:
|
|
45
|
+
"""
|
|
46
|
+
Calculate and return the median of the distribution.
|
|
47
|
+
:return: The median of the distribution.
|
|
48
|
+
"""
|
|
49
|
+
if not self.data:
|
|
50
|
+
logger.warning("No data points available to calculate median.")
|
|
51
|
+
return 0.0
|
|
52
|
+
|
|
53
|
+
median_value = np.median(self.data).item()
|
|
54
|
+
logger.debug(f"Calculated median: {median_value}")
|
|
55
|
+
return median_value
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def variance(self) -> float:
|
|
59
|
+
"""
|
|
60
|
+
Calculate and return the variance of the distribution.
|
|
61
|
+
:return: The variance of the distribution.
|
|
62
|
+
"""
|
|
63
|
+
if not self.data:
|
|
64
|
+
logger.warning("No data points available to calculate variance.")
|
|
65
|
+
return 0.0
|
|
66
|
+
|
|
67
|
+
variance_value = np.var(self.data).item()
|
|
68
|
+
logger.debug(f"Calculated variance: {variance_value}")
|
|
69
|
+
return variance_value
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def std_deviation(self) -> float:
|
|
73
|
+
"""
|
|
74
|
+
Calculate and return the standard deviation of the distribution.
|
|
75
|
+
:return: The standard deviation of the distribution.
|
|
76
|
+
"""
|
|
77
|
+
if not self.data:
|
|
78
|
+
logger.warning("No data points available to calculate standard deviation.")
|
|
79
|
+
return 0.0
|
|
80
|
+
|
|
81
|
+
std_deviation_value = np.std(self.data).item()
|
|
82
|
+
logger.debug(f"Calculated standard deviation: {std_deviation_value}")
|
|
83
|
+
return std_deviation_value
|
|
84
|
+
|
|
85
|
+
def percentile(self, percentile: float) -> float:
|
|
86
|
+
"""
|
|
87
|
+
Calculate and return the specified percentile of the distribution.
|
|
88
|
+
:param percentile: The desired percentile to calculate (0-100).
|
|
89
|
+
:return: The specified percentile of the distribution.
|
|
90
|
+
"""
|
|
91
|
+
if not self.data:
|
|
92
|
+
logger.warning("No data points available to calculate percentile.")
|
|
93
|
+
return 0.0
|
|
94
|
+
|
|
95
|
+
percentile_value = np.percentile(self.data, percentile).item()
|
|
96
|
+
logger.debug(f"Calculated {percentile}th percentile: {percentile_value}")
|
|
97
|
+
return percentile_value
|
|
98
|
+
|
|
99
|
+
def percentiles(self, percentiles: List[float]) -> List[float]:
|
|
100
|
+
"""
|
|
101
|
+
Calculate and return the specified percentiles of the distribution.
|
|
102
|
+
:param percentiles: A list of desired percentiles to calculate (0-100).
|
|
103
|
+
:return: A list of the specified percentiles of the distribution.
|
|
104
|
+
"""
|
|
105
|
+
if not self.data:
|
|
106
|
+
logger.warning("No data points available to calculate percentiles.")
|
|
107
|
+
return [0.0] * len(percentiles)
|
|
108
|
+
|
|
109
|
+
percentiles_values: List[float] = np.percentile(self.data, percentiles).tolist() # type: ignore # noqa: PGH003
|
|
110
|
+
logger.debug(f"Calculated percentiles {percentiles}: {percentiles_values}")
|
|
111
|
+
return percentiles_values
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def min(self) -> float:
|
|
115
|
+
"""
|
|
116
|
+
Return the minimum value of the distribution.
|
|
117
|
+
:return: The minimum value of the distribution.
|
|
118
|
+
"""
|
|
119
|
+
if not self.data:
|
|
120
|
+
logger.warning("No data points available to calculate minimum.")
|
|
121
|
+
return 0.0
|
|
122
|
+
|
|
123
|
+
min_value: float = np.min(self.data).item() # type: ignore # noqa: PGH003
|
|
124
|
+
logger.debug(f"Calculated min: {min_value}")
|
|
125
|
+
return min_value
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def max(self) -> float:
|
|
129
|
+
"""
|
|
130
|
+
Return the maximum value of the distribution.
|
|
131
|
+
:return: The maximum value of the distribution.
|
|
132
|
+
"""
|
|
133
|
+
if not self.data:
|
|
134
|
+
logger.warning("No data points available to calculate maximum.")
|
|
135
|
+
return 0.0
|
|
136
|
+
|
|
137
|
+
max_value: float = np.max(self.data).item() # type: ignore # noqa: PGH003
|
|
138
|
+
logger.debug(f"Calculated max: {max_value}")
|
|
139
|
+
return max_value
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def range(self) -> float:
|
|
143
|
+
"""
|
|
144
|
+
Calculate and return the range of the distribution (max - min).
|
|
145
|
+
:return: The range of the distribution.
|
|
146
|
+
"""
|
|
147
|
+
if not self.data:
|
|
148
|
+
logger.warning("No data points available to calculate range.")
|
|
149
|
+
return 0.0
|
|
150
|
+
|
|
151
|
+
range_value = self.max - self.min
|
|
152
|
+
logger.debug(f"Calculated range: {range_value}")
|
|
153
|
+
return range_value
|
|
154
|
+
|
|
155
|
+
def describe(self) -> dict:
|
|
156
|
+
"""
|
|
157
|
+
Return a dictionary describing various statistics of the distribution.
|
|
158
|
+
:return: A dictionary with statistical summaries of the distribution.
|
|
159
|
+
"""
|
|
160
|
+
description = {
|
|
161
|
+
"mean": self.mean,
|
|
162
|
+
"median": self.median,
|
|
163
|
+
"variance": self.variance,
|
|
164
|
+
"std_deviation": self.std_deviation,
|
|
165
|
+
"percentile_indices": [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
|
|
166
|
+
"percentile_values": self.percentiles(
|
|
167
|
+
[10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99],
|
|
168
|
+
),
|
|
169
|
+
"min": self.min,
|
|
170
|
+
"max": self.max,
|
|
171
|
+
"range": self.range,
|
|
172
|
+
}
|
|
173
|
+
logger.debug(f"Generated description: {description}")
|
|
174
|
+
return description
|
|
175
|
+
|
|
176
|
+
def add_data(self, new_data: Sequence[float]):
|
|
177
|
+
"""
|
|
178
|
+
Add new data points to the distribution.
|
|
179
|
+
:param new_data: A list of new numerical data points to add.
|
|
180
|
+
"""
|
|
181
|
+
self.data = list(self.data) + list(new_data)
|
|
182
|
+
logger.debug(f"Added new data: {new_data}")
|
|
183
|
+
|
|
184
|
+
def remove_data(self, remove_data: Sequence[float]):
|
|
185
|
+
"""
|
|
186
|
+
Remove specified data points from the distribution.
|
|
187
|
+
:param remove_data: A list of numerical data points to remove.
|
|
188
|
+
"""
|
|
189
|
+
self.data = [item for item in self.data if item not in remove_data]
|
|
190
|
+
logger.debug(f"Removed data: {remove_data}")
|