guidellm 0.3.0rc20250507__py3-none-any.whl → 0.4.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +8 -13
- guidellm/__main__.py +290 -69
- guidellm/backend/__init__.py +6 -6
- guidellm/backend/backend.py +25 -4
- guidellm/backend/openai.py +147 -27
- guidellm/backend/response.py +6 -2
- guidellm/benchmark/__init__.py +16 -22
- guidellm/benchmark/aggregator.py +3 -3
- guidellm/benchmark/benchmark.py +11 -12
- guidellm/benchmark/benchmarker.py +2 -2
- guidellm/benchmark/entrypoints.py +34 -10
- guidellm/benchmark/output.py +57 -5
- guidellm/benchmark/profile.py +4 -4
- guidellm/benchmark/progress.py +2 -2
- guidellm/benchmark/scenario.py +104 -0
- guidellm/benchmark/scenarios/__init__.py +0 -0
- guidellm/config.py +28 -7
- guidellm/dataset/__init__.py +4 -4
- guidellm/dataset/creator.py +1 -1
- guidellm/dataset/synthetic.py +36 -11
- guidellm/logger.py +8 -4
- guidellm/objects/__init__.py +2 -2
- guidellm/objects/pydantic.py +30 -1
- guidellm/objects/statistics.py +20 -14
- guidellm/preprocess/__init__.py +3 -0
- guidellm/preprocess/dataset.py +374 -0
- guidellm/presentation/__init__.py +28 -0
- guidellm/presentation/builder.py +27 -0
- guidellm/presentation/data_models.py +232 -0
- guidellm/presentation/injector.py +66 -0
- guidellm/request/__init__.py +6 -3
- guidellm/request/loader.py +5 -5
- guidellm/{scheduler → request}/types.py +4 -1
- guidellm/scheduler/__init__.py +10 -15
- guidellm/scheduler/queues.py +25 -0
- guidellm/scheduler/result.py +21 -3
- guidellm/scheduler/scheduler.py +68 -60
- guidellm/scheduler/strategy.py +26 -24
- guidellm/scheduler/worker.py +64 -103
- guidellm/utils/__init__.py +17 -5
- guidellm/utils/cli.py +62 -0
- guidellm/utils/default_group.py +105 -0
- guidellm/utils/dict.py +23 -0
- guidellm/utils/hf_datasets.py +36 -0
- guidellm/utils/random.py +1 -1
- guidellm/utils/text.py +12 -5
- guidellm/version.py +6 -0
- guidellm-0.4.0a2.dist-info/METADATA +317 -0
- guidellm-0.4.0a2.dist-info/RECORD +62 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/WHEEL +1 -1
- guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
- guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.4.0a2.dist-info}/top_level.txt +0 -0
guidellm/benchmark/output.py
CHANGED
|
@@ -2,6 +2,7 @@ import csv
|
|
|
2
2
|
import json
|
|
3
3
|
import math
|
|
4
4
|
from collections import OrderedDict
|
|
5
|
+
from copy import deepcopy
|
|
5
6
|
from datetime import datetime
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any, Literal, Optional, Union
|
|
@@ -25,12 +26,16 @@ from guidellm.objects import (
|
|
|
25
26
|
StandardBaseModel,
|
|
26
27
|
StatusDistributionSummary,
|
|
27
28
|
)
|
|
29
|
+
from guidellm.presentation import UIDataBuilder
|
|
30
|
+
from guidellm.presentation.injector import create_report
|
|
28
31
|
from guidellm.scheduler import strategy_display_str
|
|
29
32
|
from guidellm.utils import Colors, split_text_list_by_length
|
|
33
|
+
from guidellm.utils.dict import recursive_key_update
|
|
34
|
+
from guidellm.utils.text import camelize_str
|
|
30
35
|
|
|
31
36
|
__all__ = [
|
|
32
|
-
"GenerativeBenchmarksReport",
|
|
33
37
|
"GenerativeBenchmarksConsole",
|
|
38
|
+
"GenerativeBenchmarksReport",
|
|
34
39
|
]
|
|
35
40
|
|
|
36
41
|
|
|
@@ -68,6 +73,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
68
73
|
if type_ == "csv":
|
|
69
74
|
raise ValueError(f"CSV file type is not supported for loading: {path}.")
|
|
70
75
|
|
|
76
|
+
if type_ == "html":
|
|
77
|
+
raise ValueError(f"HTML file type is not supported for loading: {path}.")
|
|
78
|
+
|
|
71
79
|
raise ValueError(f"Unsupported file type: {type_} for {path}.")
|
|
72
80
|
|
|
73
81
|
benchmarks: list[GenerativeBenchmark] = Field(
|
|
@@ -114,6 +122,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
114
122
|
if type_ == "csv":
|
|
115
123
|
return self.save_csv(path)
|
|
116
124
|
|
|
125
|
+
if type_ == "html":
|
|
126
|
+
return self.save_html(path)
|
|
127
|
+
|
|
117
128
|
raise ValueError(f"Unsupported file type: {type_} for {path}.")
|
|
118
129
|
|
|
119
130
|
def save_json(self, path: Union[str, Path]) -> Path:
|
|
@@ -220,11 +231,29 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
220
231
|
|
|
221
232
|
return path
|
|
222
233
|
|
|
234
|
+
def save_html(self, path: Union[str, Path]) -> Path:
|
|
235
|
+
"""
|
|
236
|
+
Download html, inject report data and save to a file.
|
|
237
|
+
|
|
238
|
+
:param path: The path to create the report at.
|
|
239
|
+
:return: The path to the report.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
data_builder = UIDataBuilder(self.benchmarks)
|
|
243
|
+
data = data_builder.to_dict()
|
|
244
|
+
camel_data = recursive_key_update(deepcopy(data), camelize_str)
|
|
245
|
+
ui_api_data = {}
|
|
246
|
+
for k, v in camel_data.items():
|
|
247
|
+
key = f"window.{k} = {{}};"
|
|
248
|
+
value = f"window.{k} = {json.dumps(v, indent=2)};\n"
|
|
249
|
+
ui_api_data[key] = value
|
|
250
|
+
return create_report(ui_api_data, path)
|
|
251
|
+
|
|
223
252
|
@staticmethod
|
|
224
253
|
def _file_setup(
|
|
225
254
|
path: Union[str, Path],
|
|
226
|
-
default_file_type: Literal["json", "yaml", "csv"] = "json",
|
|
227
|
-
) -> tuple[Path, Literal["json", "yaml", "csv"]]:
|
|
255
|
+
default_file_type: Literal["json", "yaml", "csv", "html"] = "json",
|
|
256
|
+
) -> tuple[Path, Literal["json", "yaml", "csv", "html"]]:
|
|
228
257
|
path = Path(path) if not isinstance(path, Path) else path
|
|
229
258
|
|
|
230
259
|
if path.is_dir():
|
|
@@ -242,7 +271,13 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
242
271
|
if path_suffix in [".csv"]:
|
|
243
272
|
return path, "csv"
|
|
244
273
|
|
|
245
|
-
|
|
274
|
+
if path_suffix in [".html"]:
|
|
275
|
+
return path, "html"
|
|
276
|
+
|
|
277
|
+
raise ValueError(
|
|
278
|
+
f"Unsupported file extension: {path_suffix} for {path}; "
|
|
279
|
+
"expected json, yaml, csv, or html."
|
|
280
|
+
)
|
|
246
281
|
|
|
247
282
|
@staticmethod
|
|
248
283
|
def _benchmark_desc_headers_and_values(
|
|
@@ -889,7 +924,7 @@ class GenerativeBenchmarksConsole:
|
|
|
889
924
|
"Request Stats": (1, 2),
|
|
890
925
|
"Out Tok/sec": (3, 3),
|
|
891
926
|
"Tot Tok/sec": (4, 4),
|
|
892
|
-
"Req Latency (
|
|
927
|
+
"Req Latency (sec)": (5, 7),
|
|
893
928
|
"TTFT (ms)": (8, 10),
|
|
894
929
|
"ITL (ms)": (11, 13),
|
|
895
930
|
"TPOT (ms)": (14, 16),
|
|
@@ -944,3 +979,20 @@ class GenerativeBenchmarksConsole:
|
|
|
944
979
|
title="Benchmarks Stats",
|
|
945
980
|
sections=sections,
|
|
946
981
|
)
|
|
982
|
+
|
|
983
|
+
def print_full_report(self):
|
|
984
|
+
"""
|
|
985
|
+
Print out the benchmark statistics to the console.
|
|
986
|
+
Temporarily enables the console if it's disabled.
|
|
987
|
+
|
|
988
|
+
Format:
|
|
989
|
+
- Metadata
|
|
990
|
+
- Info
|
|
991
|
+
- Stats
|
|
992
|
+
"""
|
|
993
|
+
orig_enabled = self.enabled
|
|
994
|
+
self.enabled = True
|
|
995
|
+
self.print_benchmarks_metadata()
|
|
996
|
+
self.print_benchmarks_info()
|
|
997
|
+
self.print_benchmarks_stats()
|
|
998
|
+
self.enabled = orig_enabled
|
guidellm/benchmark/profile.py
CHANGED
|
@@ -17,13 +17,13 @@ from guidellm.scheduler import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
|
-
"
|
|
20
|
+
"AsyncProfile",
|
|
21
|
+
"ConcurrentProfile",
|
|
21
22
|
"Profile",
|
|
23
|
+
"ProfileType",
|
|
24
|
+
"SweepProfile",
|
|
22
25
|
"SynchronousProfile",
|
|
23
|
-
"ConcurrentProfile",
|
|
24
26
|
"ThroughputProfile",
|
|
25
|
-
"AsyncProfile",
|
|
26
|
-
"SweepProfile",
|
|
27
27
|
"create_profile",
|
|
28
28
|
]
|
|
29
29
|
|
guidellm/benchmark/progress.py
CHANGED
|
@@ -33,10 +33,10 @@ from guidellm.scheduler import (
|
|
|
33
33
|
from guidellm.utils import Colors
|
|
34
34
|
|
|
35
35
|
__all__ = [
|
|
36
|
-
"BenchmarkerTaskProgressState",
|
|
37
36
|
"BenchmarkerProgressDisplay",
|
|
38
|
-
"
|
|
37
|
+
"BenchmarkerTaskProgressState",
|
|
39
38
|
"GenerativeTextBenchmarkerProgressDisplay",
|
|
39
|
+
"GenerativeTextBenchmarkerTaskProgressState",
|
|
40
40
|
]
|
|
41
41
|
|
|
42
42
|
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from functools import cache
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Any, Literal, Optional, TypeVar, Union
|
|
5
|
+
|
|
6
|
+
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
7
|
+
from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
|
|
8
|
+
from transformers.tokenization_utils_base import ( # type: ignore[import]
|
|
9
|
+
PreTrainedTokenizerBase,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from guidellm.backend.backend import BackendType
|
|
13
|
+
from guidellm.benchmark.profile import ProfileType
|
|
14
|
+
from guidellm.objects.pydantic import StandardBaseModel
|
|
15
|
+
from guidellm.scheduler.strategy import StrategyType
|
|
16
|
+
|
|
17
|
+
__ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
|
|
18
|
+
|
|
19
|
+
SCENARIO_DIR = Path(__file__).parent / "scenarios/"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@cache
|
|
23
|
+
def get_builtin_scenarios() -> list[str]:
|
|
24
|
+
"""Returns list of builtin scenario names."""
|
|
25
|
+
return [p.stem for p in SCENARIO_DIR.glob("*.json")]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
|
|
29
|
+
"""
|
|
30
|
+
Parse a comma separated string to a list of float
|
|
31
|
+
or convert single float list of one or pass float
|
|
32
|
+
list through.
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(value, (int, float)):
|
|
35
|
+
return [value]
|
|
36
|
+
elif isinstance(value, list):
|
|
37
|
+
return value
|
|
38
|
+
|
|
39
|
+
values = value.split(",") if "," in value else [value]
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
return [float(val) for val in values]
|
|
43
|
+
except ValueError as err:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"must be a number or comma-separated list of numbers."
|
|
46
|
+
) from err
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
T = TypeVar("T", bound="Scenario")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Scenario(StandardBaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Parent Scenario class with common options for all benchmarking types.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
target: str
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
|
|
61
|
+
filename = SCENARIO_DIR / f"{name}.json"
|
|
62
|
+
|
|
63
|
+
if not filename.is_file():
|
|
64
|
+
raise ValueError(f"{name} is not a valid builtin scenario")
|
|
65
|
+
|
|
66
|
+
return cls.from_file(filename, overrides)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GenerativeTextScenario(Scenario):
|
|
70
|
+
"""
|
|
71
|
+
Scenario class for generative text benchmarks.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
class Config:
|
|
75
|
+
# NOTE: This prevents errors due to unvalidatable
|
|
76
|
+
# types like PreTrainedTokenizerBase
|
|
77
|
+
arbitrary_types_allowed = True
|
|
78
|
+
|
|
79
|
+
backend_type: BackendType = "openai_http"
|
|
80
|
+
backend_args: Optional[dict[str, Any]] = None
|
|
81
|
+
model: Optional[str] = None
|
|
82
|
+
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
|
|
83
|
+
processor_args: Optional[dict[str, Any]] = None
|
|
84
|
+
data: Union[
|
|
85
|
+
str,
|
|
86
|
+
Path,
|
|
87
|
+
Iterable[Union[str, dict[str, Any]]],
|
|
88
|
+
Dataset,
|
|
89
|
+
DatasetDict,
|
|
90
|
+
IterableDataset,
|
|
91
|
+
IterableDatasetDict,
|
|
92
|
+
]
|
|
93
|
+
data_args: Optional[dict[str, Any]] = None
|
|
94
|
+
data_sampler: Optional[Literal["random"]] = None
|
|
95
|
+
rate_type: Union[StrategyType, ProfileType]
|
|
96
|
+
rate: Annotated[
|
|
97
|
+
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
|
|
98
|
+
] = None
|
|
99
|
+
max_seconds: Optional[PositiveFloat] = None
|
|
100
|
+
max_requests: Optional[PositiveInt] = None
|
|
101
|
+
warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
102
|
+
cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
103
|
+
output_sampling: Optional[NonNegativeInt] = None
|
|
104
|
+
random_seed: int = 42
|
|
File without changes
|
guidellm/config.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
2
3
|
from collections.abc import Sequence
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from typing import Literal, Optional
|
|
@@ -11,8 +12,8 @@ __all__ = [
|
|
|
11
12
|
"Environment",
|
|
12
13
|
"LoggingSettings",
|
|
13
14
|
"OpenAISettings",
|
|
14
|
-
"print_config",
|
|
15
15
|
"Settings",
|
|
16
|
+
"print_config",
|
|
16
17
|
"reload_settings",
|
|
17
18
|
"settings",
|
|
18
19
|
]
|
|
@@ -30,10 +31,10 @@ class Environment(str, Enum):
|
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
ENV_REPORT_MAPPING = {
|
|
33
|
-
Environment.PROD: "https://
|
|
34
|
-
Environment.STAGING: "https://
|
|
35
|
-
Environment.DEV: "https://
|
|
36
|
-
Environment.LOCAL: "
|
|
34
|
+
Environment.PROD: "https://blog.vllm.ai/guidellm/ui/latest/index.html",
|
|
35
|
+
Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/latest/index.html",
|
|
36
|
+
Environment.DEV: "https://blog.vllm.ai/guidellm/ui/dev/index.html",
|
|
37
|
+
Environment.LOCAL: "http://localhost:3000/index.html",
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
|
|
@@ -81,10 +82,20 @@ class OpenAISettings(BaseModel):
|
|
|
81
82
|
|
|
82
83
|
api_key: Optional[str] = None
|
|
83
84
|
bearer_token: Optional[str] = None
|
|
85
|
+
headers: Optional[dict[str, str]] = None
|
|
84
86
|
organization: Optional[str] = None
|
|
85
87
|
project: Optional[str] = None
|
|
86
88
|
base_url: str = "http://localhost:8000"
|
|
87
89
|
max_output_tokens: int = 16384
|
|
90
|
+
verify: bool = True
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ReportGenerationSettings(BaseModel):
|
|
94
|
+
"""
|
|
95
|
+
Report generation settings for the application
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
source: str = ""
|
|
88
99
|
|
|
89
100
|
|
|
90
101
|
class Settings(BaseSettings):
|
|
@@ -115,13 +126,18 @@ class Settings(BaseSettings):
|
|
|
115
126
|
default_sweep_number: int = 10
|
|
116
127
|
|
|
117
128
|
# HTTP settings
|
|
129
|
+
request_follow_redirects: bool = True
|
|
118
130
|
request_timeout: int = 60 * 5 # 5 minutes
|
|
119
131
|
request_http2: bool = True
|
|
120
132
|
|
|
121
133
|
# Scheduler settings
|
|
122
134
|
max_concurrency: int = 512
|
|
123
|
-
max_worker_processes: int =
|
|
124
|
-
|
|
135
|
+
max_worker_processes: int = Field(
|
|
136
|
+
# use number of CPUs - 1, but at least 10
|
|
137
|
+
default_factory=lambda: max((os.cpu_count() or 1) - 1, 10)
|
|
138
|
+
)
|
|
139
|
+
min_queued_requests: int = 20
|
|
140
|
+
scheduler_start_delay: float = 5
|
|
125
141
|
|
|
126
142
|
# Data settings
|
|
127
143
|
dataset: DatasetSettings = DatasetSettings()
|
|
@@ -139,6 +155,9 @@ class Settings(BaseSettings):
|
|
|
139
155
|
)
|
|
140
156
|
openai: OpenAISettings = OpenAISettings()
|
|
141
157
|
|
|
158
|
+
# Report settings
|
|
159
|
+
report_generation: ReportGenerationSettings = ReportGenerationSettings()
|
|
160
|
+
|
|
142
161
|
# Output settings
|
|
143
162
|
table_border_char: str = "="
|
|
144
163
|
table_headers_border_char: str = "-"
|
|
@@ -147,6 +166,8 @@ class Settings(BaseSettings):
|
|
|
147
166
|
@model_validator(mode="after")
|
|
148
167
|
@classmethod
|
|
149
168
|
def set_default_source(cls, values):
|
|
169
|
+
if not values.report_generation.source:
|
|
170
|
+
values.report_generation.source = ENV_REPORT_MAPPING.get(values.env)
|
|
150
171
|
return values
|
|
151
172
|
|
|
152
173
|
def generate_env_file(self) -> str:
|
guidellm/dataset/__init__.py
CHANGED
|
@@ -10,13 +10,13 @@ from .synthetic import (
|
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
13
|
-
"DatasetCreator",
|
|
14
13
|
"ColumnInputTypes",
|
|
15
|
-
"
|
|
16
|
-
"load_dataset",
|
|
14
|
+
"DatasetCreator",
|
|
17
15
|
"FileDatasetCreator",
|
|
16
|
+
"HFDatasetsCreator",
|
|
18
17
|
"InMemoryDatasetCreator",
|
|
19
|
-
"SyntheticDatasetCreator",
|
|
20
18
|
"SyntheticDatasetConfig",
|
|
19
|
+
"SyntheticDatasetCreator",
|
|
21
20
|
"SyntheticTextItemsGenerator",
|
|
21
|
+
"load_dataset",
|
|
22
22
|
]
|
guidellm/dataset/creator.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Any, Literal, Optional, Union
|
|
|
5
5
|
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
6
6
|
from transformers import PreTrainedTokenizerBase # type: ignore[import]
|
|
7
7
|
|
|
8
|
-
__all__ = ["
|
|
8
|
+
__all__ = ["ColumnInputTypes", "DatasetCreator"]
|
|
9
9
|
|
|
10
10
|
ColumnInputTypes = Literal[
|
|
11
11
|
"prompt_column",
|
guidellm/dataset/synthetic.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import random
|
|
3
3
|
from collections.abc import Iterable, Iterator
|
|
4
|
+
from itertools import cycle
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any, Literal, Optional, Union
|
|
6
7
|
|
|
@@ -18,13 +19,18 @@ from guidellm.dataset.creator import ColumnInputTypes, DatasetCreator
|
|
|
18
19
|
from guidellm.utils import EndlessTextCreator, IntegerRangeSampler, check_load_processor
|
|
19
20
|
|
|
20
21
|
__all__ = [
|
|
21
|
-
"SyntheticDatasetCreator",
|
|
22
22
|
"SyntheticDatasetConfig",
|
|
23
|
+
"SyntheticDatasetCreator",
|
|
23
24
|
"SyntheticTextItemsGenerator",
|
|
24
25
|
]
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class SyntheticDatasetConfig(BaseModel):
|
|
29
|
+
prefix_tokens: int = Field(
|
|
30
|
+
description="The number of shared prefix tokens to prepend to each prompt.",
|
|
31
|
+
ge=0,
|
|
32
|
+
default=0,
|
|
33
|
+
)
|
|
28
34
|
prompt_tokens: int = Field(
|
|
29
35
|
description="The average number of text tokens generated for prompts.",
|
|
30
36
|
gt=0,
|
|
@@ -163,6 +169,10 @@ class SyntheticTextItemsGenerator(
|
|
|
163
169
|
)
|
|
164
170
|
# ensure diff distribution from output tokens
|
|
165
171
|
rand = random.Random(self.random_seed + 2) # noqa: S311
|
|
172
|
+
unique_prefix_iter = cycle(self.processor.get_vocab().values())
|
|
173
|
+
|
|
174
|
+
prefix_index = rand.randint(0, len(self.text_creator.words))
|
|
175
|
+
prefix_tokens = self._create_prompt(self.config.prefix_tokens, prefix_index)
|
|
166
176
|
|
|
167
177
|
for _, prompt_tokens, output_tokens in zip(
|
|
168
178
|
range(self.config.samples),
|
|
@@ -170,37 +180,52 @@ class SyntheticTextItemsGenerator(
|
|
|
170
180
|
output_tokens_sampler,
|
|
171
181
|
):
|
|
172
182
|
start_index = rand.randint(0, len(self.text_creator.words))
|
|
183
|
+
prompt_text = self.processor.decode(
|
|
184
|
+
prefix_tokens
|
|
185
|
+
+ self._create_prompt(
|
|
186
|
+
prompt_tokens, start_index, next(unique_prefix_iter)
|
|
187
|
+
),
|
|
188
|
+
skip_special_tokens=True,
|
|
189
|
+
)
|
|
173
190
|
yield {
|
|
174
|
-
"prompt":
|
|
175
|
-
"prompt_tokens_count": prompt_tokens,
|
|
191
|
+
"prompt": prompt_text,
|
|
192
|
+
"prompt_tokens_count": self.config.prefix_tokens + prompt_tokens,
|
|
176
193
|
"output_tokens_count": output_tokens,
|
|
177
194
|
}
|
|
178
195
|
|
|
179
|
-
def _create_prompt(
|
|
196
|
+
def _create_prompt(
|
|
197
|
+
self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
|
|
198
|
+
) -> list[int]:
|
|
180
199
|
if prompt_tokens <= 0:
|
|
181
|
-
return
|
|
200
|
+
return []
|
|
182
201
|
|
|
183
202
|
left = start_index
|
|
184
203
|
right = start_index + 4 * prompt_tokens
|
|
204
|
+
start_tokens = [unique_prefix] if unique_prefix else []
|
|
185
205
|
|
|
186
206
|
while left < right:
|
|
187
207
|
mid = (left + right) // 2
|
|
188
208
|
test_prompt = self.text_creator.create_text(start_index, mid - start_index)
|
|
189
|
-
test_tokens =
|
|
209
|
+
test_tokens = start_tokens + self.processor.encode(test_prompt)
|
|
190
210
|
|
|
191
|
-
if test_tokens == prompt_tokens:
|
|
192
|
-
return
|
|
193
|
-
elif test_tokens < prompt_tokens:
|
|
211
|
+
if len(test_tokens) == prompt_tokens:
|
|
212
|
+
return test_tokens
|
|
213
|
+
elif len(test_tokens) < prompt_tokens:
|
|
194
214
|
left = mid + 1
|
|
195
215
|
else:
|
|
196
216
|
right = mid
|
|
197
217
|
|
|
198
|
-
|
|
218
|
+
final_text = self.text_creator.create_text(start_index, left - start_index)
|
|
219
|
+
return start_tokens + self.processor.encode(final_text)
|
|
199
220
|
|
|
200
221
|
|
|
201
222
|
class SyntheticDatasetCreator(DatasetCreator):
|
|
202
223
|
@classmethod
|
|
203
|
-
def is_supported(
|
|
224
|
+
def is_supported(
|
|
225
|
+
cls,
|
|
226
|
+
data: Any,
|
|
227
|
+
data_args: Optional[dict[str, Any]], # noqa: ARG003
|
|
228
|
+
) -> bool:
|
|
204
229
|
if (
|
|
205
230
|
isinstance(data, Path)
|
|
206
231
|
and data.exists()
|
guidellm/logger.py
CHANGED
|
@@ -9,13 +9,16 @@ Environment Variables:
|
|
|
9
9
|
- GUIDELLM__LOGGING__DISABLED: Disable logging (default: false).
|
|
10
10
|
- GUIDELLM__LOGGING__CLEAR_LOGGERS: Clear existing loggers
|
|
11
11
|
from loguru (default: true).
|
|
12
|
-
-
|
|
12
|
+
- GUIDELLM__LOGGING__CONSOLE_LOG_LEVEL: Log level for console logging
|
|
13
13
|
(default: none, options: DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
|
14
|
-
-
|
|
14
|
+
- GUIDELLM__LOGGING__LOG_FILE: Path to the log file for file logging
|
|
15
15
|
(default: guidellm.log if log file level set else none)
|
|
16
|
-
-
|
|
16
|
+
- GUIDELLM__LOGGING__LOG_FILE_LEVEL: Log level for file logging
|
|
17
17
|
(default: INFO if log file set else none).
|
|
18
18
|
|
|
19
|
+
If logging isn't responding to the environment variables, run the `guidellm config`
|
|
20
|
+
command to validate that the environment variables match and are being set correctly.
|
|
21
|
+
|
|
19
22
|
Usage:
|
|
20
23
|
from guidellm import logger, configure_logger, LoggerConfig
|
|
21
24
|
|
|
@@ -68,7 +71,8 @@ def configure_logger(config: LoggingSettings = settings.logging):
|
|
|
68
71
|
logger.add(
|
|
69
72
|
sys.stdout,
|
|
70
73
|
level=config.console_log_level.upper(),
|
|
71
|
-
format="{time
|
|
74
|
+
format="<green>{time:YY-MM-DD HH:mm:ss}</green>|<level>{level: <8}</level> \
|
|
75
|
+
|<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
|
|
72
76
|
)
|
|
73
77
|
|
|
74
78
|
if config.log_file or config.log_file_level:
|
guidellm/objects/__init__.py
CHANGED
|
@@ -8,11 +8,11 @@ from .statistics import (
|
|
|
8
8
|
)
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
|
-
"StandardBaseModel",
|
|
12
|
-
"StatusBreakdown",
|
|
13
11
|
"DistributionSummary",
|
|
14
12
|
"Percentiles",
|
|
15
13
|
"RunningStats",
|
|
14
|
+
"StandardBaseModel",
|
|
15
|
+
"StatusBreakdown",
|
|
16
16
|
"StatusDistributionSummary",
|
|
17
17
|
"TimeRunningStats",
|
|
18
18
|
]
|
guidellm/objects/pydantic.py
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Generic, Optional, TypeVar
|
|
2
4
|
|
|
5
|
+
import yaml
|
|
3
6
|
from loguru import logger
|
|
4
7
|
from pydantic import BaseModel, ConfigDict, Field
|
|
5
8
|
|
|
6
9
|
__all__ = ["StandardBaseModel", "StatusBreakdown"]
|
|
7
10
|
|
|
11
|
+
T = TypeVar("T", bound="StandardBaseModel")
|
|
12
|
+
|
|
8
13
|
|
|
9
14
|
class StandardBaseModel(BaseModel):
|
|
10
15
|
"""
|
|
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
|
|
|
27
32
|
data,
|
|
28
33
|
)
|
|
29
34
|
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_default(cls: type[T], field: str) -> Any:
|
|
37
|
+
"""Get default values for model fields"""
|
|
38
|
+
return cls.model_fields[field].default
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
|
|
42
|
+
"""
|
|
43
|
+
Attempt to create a new instance of the model using
|
|
44
|
+
data loaded from json or yaml file.
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
with filename.open() as f:
|
|
48
|
+
if str(filename).endswith(".json"):
|
|
49
|
+
data = json.load(f)
|
|
50
|
+
else: # Assume everything else is yaml
|
|
51
|
+
data = yaml.safe_load(f)
|
|
52
|
+
except (json.JSONDecodeError, yaml.YAMLError) as e:
|
|
53
|
+
logger.error(f"Failed to parse {filename} as type {cls.__name__}")
|
|
54
|
+
raise ValueError(f"Error when parsing file: {filename}") from e
|
|
55
|
+
|
|
56
|
+
data.update(overrides)
|
|
57
|
+
return cls.model_validate(data)
|
|
58
|
+
|
|
30
59
|
|
|
31
60
|
SuccessfulT = TypeVar("SuccessfulT")
|
|
32
61
|
ErroredT = TypeVar("ErroredT")
|
guidellm/objects/statistics.py
CHANGED
|
@@ -9,10 +9,10 @@ from pydantic import Field, computed_field
|
|
|
9
9
|
from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
|
-
"Percentiles",
|
|
13
12
|
"DistributionSummary",
|
|
14
|
-
"
|
|
13
|
+
"Percentiles",
|
|
15
14
|
"RunningStats",
|
|
15
|
+
"StatusDistributionSummary",
|
|
16
16
|
"TimeRunningStats",
|
|
17
17
|
]
|
|
18
18
|
|
|
@@ -37,6 +37,9 @@ class Percentiles(StandardBaseModel):
|
|
|
37
37
|
p25: float = Field(
|
|
38
38
|
description="The 25th percentile of the distribution.",
|
|
39
39
|
)
|
|
40
|
+
p50: float = Field(
|
|
41
|
+
description="The 50th percentile of the distribution.",
|
|
42
|
+
)
|
|
40
43
|
p75: float = Field(
|
|
41
44
|
description="The 75th percentile of the distribution.",
|
|
42
45
|
)
|
|
@@ -159,6 +162,7 @@ class DistributionSummary(StandardBaseModel):
|
|
|
159
162
|
p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004
|
|
160
163
|
p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004
|
|
161
164
|
p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004
|
|
165
|
+
p50=cdf[np.argmax(cdf[:, 1] >= 0.50), 0].item(), # noqa: PLR2004
|
|
162
166
|
p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004
|
|
163
167
|
p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004
|
|
164
168
|
p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004
|
|
@@ -172,6 +176,7 @@ class DistributionSummary(StandardBaseModel):
|
|
|
172
176
|
p05=0,
|
|
173
177
|
p10=0,
|
|
174
178
|
p25=0,
|
|
179
|
+
p50=0,
|
|
175
180
|
p75=0,
|
|
176
181
|
p90=0,
|
|
177
182
|
p95=0,
|
|
@@ -238,18 +243,9 @@ class DistributionSummary(StandardBaseModel):
|
|
|
238
243
|
"""
|
|
239
244
|
if distribution_type == "concurrency":
|
|
240
245
|
# convert to delta changes based on when requests were running
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
time_deltas[end] -= 1
|
|
245
|
-
|
|
246
|
-
# convert to the events over time measuring concurrency changes
|
|
247
|
-
events = []
|
|
248
|
-
active = 0
|
|
249
|
-
|
|
250
|
-
for time, delta in sorted(time_deltas.items()):
|
|
251
|
-
active += delta
|
|
252
|
-
events.append((time, active))
|
|
246
|
+
events = [(start, 1) for start, _ in requests] + [
|
|
247
|
+
(end, -1) for _, end in requests
|
|
248
|
+
]
|
|
253
249
|
elif distribution_type == "rate":
|
|
254
250
|
# convert to events for when requests finished
|
|
255
251
|
global_start = min(start for start, _ in requests) if requests else 0
|
|
@@ -276,6 +272,16 @@ class DistributionSummary(StandardBaseModel):
|
|
|
276
272
|
else:
|
|
277
273
|
flattened_events.append((time, val))
|
|
278
274
|
|
|
275
|
+
if distribution_type == "concurrency":
|
|
276
|
+
# convert to the events over time measuring concurrency changes
|
|
277
|
+
events_over_time: list[tuple[float, float]] = []
|
|
278
|
+
active = 0
|
|
279
|
+
for time, delta in flattened_events:
|
|
280
|
+
active += delta # type: ignore [assignment]
|
|
281
|
+
events_over_time.append((time, active))
|
|
282
|
+
|
|
283
|
+
flattened_events = events_over_time
|
|
284
|
+
|
|
279
285
|
# convert to value distribution function
|
|
280
286
|
distribution: dict[float, float] = defaultdict(float)
|
|
281
287
|
|