guidellm 0.3.0rc20250507__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +8 -13
- guidellm/__main__.py +290 -69
- guidellm/backend/__init__.py +6 -6
- guidellm/backend/backend.py +25 -4
- guidellm/backend/openai.py +153 -30
- guidellm/backend/response.py +6 -2
- guidellm/benchmark/__init__.py +16 -22
- guidellm/benchmark/aggregator.py +3 -3
- guidellm/benchmark/benchmark.py +11 -12
- guidellm/benchmark/benchmarker.py +2 -2
- guidellm/benchmark/entrypoints.py +34 -10
- guidellm/benchmark/output.py +59 -8
- guidellm/benchmark/profile.py +4 -4
- guidellm/benchmark/progress.py +2 -2
- guidellm/benchmark/scenario.py +104 -0
- guidellm/benchmark/scenarios/__init__.py +0 -0
- guidellm/config.py +32 -7
- guidellm/dataset/__init__.py +4 -4
- guidellm/dataset/creator.py +1 -1
- guidellm/dataset/synthetic.py +36 -11
- guidellm/logger.py +8 -4
- guidellm/objects/__init__.py +2 -2
- guidellm/objects/pydantic.py +30 -1
- guidellm/objects/statistics.py +20 -14
- guidellm/preprocess/__init__.py +3 -0
- guidellm/preprocess/dataset.py +374 -0
- guidellm/presentation/__init__.py +28 -0
- guidellm/presentation/builder.py +27 -0
- guidellm/presentation/data_models.py +232 -0
- guidellm/presentation/injector.py +66 -0
- guidellm/request/__init__.py +6 -3
- guidellm/request/loader.py +5 -5
- guidellm/{scheduler → request}/types.py +4 -1
- guidellm/scheduler/__init__.py +10 -15
- guidellm/scheduler/queues.py +25 -0
- guidellm/scheduler/result.py +21 -3
- guidellm/scheduler/scheduler.py +68 -60
- guidellm/scheduler/strategy.py +26 -24
- guidellm/scheduler/worker.py +64 -103
- guidellm/utils/__init__.py +17 -5
- guidellm/utils/cli.py +62 -0
- guidellm/utils/default_group.py +105 -0
- guidellm/utils/dict.py +23 -0
- guidellm/utils/hf_datasets.py +36 -0
- guidellm/utils/random.py +1 -1
- guidellm/utils/text.py +12 -5
- guidellm/version.py +6 -0
- guidellm-0.3.1.dist-info/METADATA +329 -0
- guidellm-0.3.1.dist-info/RECORD +62 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
- guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
- guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0
guidellm/benchmark/output.py
CHANGED
|
@@ -2,6 +2,7 @@ import csv
|
|
|
2
2
|
import json
|
|
3
3
|
import math
|
|
4
4
|
from collections import OrderedDict
|
|
5
|
+
from copy import deepcopy
|
|
5
6
|
from datetime import datetime
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any, Literal, Optional, Union
|
|
@@ -25,12 +26,16 @@ from guidellm.objects import (
|
|
|
25
26
|
StandardBaseModel,
|
|
26
27
|
StatusDistributionSummary,
|
|
27
28
|
)
|
|
29
|
+
from guidellm.presentation import UIDataBuilder
|
|
30
|
+
from guidellm.presentation.injector import create_report
|
|
28
31
|
from guidellm.scheduler import strategy_display_str
|
|
29
32
|
from guidellm.utils import Colors, split_text_list_by_length
|
|
33
|
+
from guidellm.utils.dict import recursive_key_update
|
|
34
|
+
from guidellm.utils.text import camelize_str
|
|
30
35
|
|
|
31
36
|
__all__ = [
|
|
32
|
-
"GenerativeBenchmarksReport",
|
|
33
37
|
"GenerativeBenchmarksConsole",
|
|
38
|
+
"GenerativeBenchmarksReport",
|
|
34
39
|
]
|
|
35
40
|
|
|
36
41
|
|
|
@@ -68,6 +73,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
68
73
|
if type_ == "csv":
|
|
69
74
|
raise ValueError(f"CSV file type is not supported for loading: {path}.")
|
|
70
75
|
|
|
76
|
+
if type_ == "html":
|
|
77
|
+
raise ValueError(f"HTML file type is not supported for loading: {path}.")
|
|
78
|
+
|
|
71
79
|
raise ValueError(f"Unsupported file type: {type_} for {path}.")
|
|
72
80
|
|
|
73
81
|
benchmarks: list[GenerativeBenchmark] = Field(
|
|
@@ -114,6 +122,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
114
122
|
if type_ == "csv":
|
|
115
123
|
return self.save_csv(path)
|
|
116
124
|
|
|
125
|
+
if type_ == "html":
|
|
126
|
+
return self.save_html(path)
|
|
127
|
+
|
|
117
128
|
raise ValueError(f"Unsupported file type: {type_} for {path}.")
|
|
118
129
|
|
|
119
130
|
def save_json(self, path: Union[str, Path]) -> Path:
|
|
@@ -133,10 +144,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
133
144
|
)
|
|
134
145
|
|
|
135
146
|
model_dict = self.model_dump()
|
|
136
|
-
model_json = json.dumps(model_dict)
|
|
137
147
|
|
|
138
|
-
with path.open("w") as file:
|
|
139
|
-
|
|
148
|
+
with path.open("w", encoding="utf-8") as file:
|
|
149
|
+
json.dump(model_dict, file, ensure_ascii=False, indent=4)
|
|
140
150
|
|
|
141
151
|
return path
|
|
142
152
|
|
|
@@ -220,11 +230,29 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
220
230
|
|
|
221
231
|
return path
|
|
222
232
|
|
|
233
|
+
def save_html(self, path: Union[str, Path]) -> Path:
|
|
234
|
+
"""
|
|
235
|
+
Download html, inject report data and save to a file.
|
|
236
|
+
|
|
237
|
+
:param path: The path to create the report at.
|
|
238
|
+
:return: The path to the report.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
data_builder = UIDataBuilder(self.benchmarks)
|
|
242
|
+
data = data_builder.to_dict()
|
|
243
|
+
camel_data = recursive_key_update(deepcopy(data), camelize_str)
|
|
244
|
+
ui_api_data = {}
|
|
245
|
+
for k, v in camel_data.items():
|
|
246
|
+
key = f"window.{k} = {{}};"
|
|
247
|
+
value = f"window.{k} = {json.dumps(v, indent=2)};\n"
|
|
248
|
+
ui_api_data[key] = value
|
|
249
|
+
return create_report(ui_api_data, path)
|
|
250
|
+
|
|
223
251
|
@staticmethod
|
|
224
252
|
def _file_setup(
|
|
225
253
|
path: Union[str, Path],
|
|
226
|
-
default_file_type: Literal["json", "yaml", "csv"] = "json",
|
|
227
|
-
) -> tuple[Path, Literal["json", "yaml", "csv"]]:
|
|
254
|
+
default_file_type: Literal["json", "yaml", "csv", "html"] = "json",
|
|
255
|
+
) -> tuple[Path, Literal["json", "yaml", "csv", "html"]]:
|
|
228
256
|
path = Path(path) if not isinstance(path, Path) else path
|
|
229
257
|
|
|
230
258
|
if path.is_dir():
|
|
@@ -242,7 +270,13 @@ class GenerativeBenchmarksReport(StandardBaseModel):
|
|
|
242
270
|
if path_suffix in [".csv"]:
|
|
243
271
|
return path, "csv"
|
|
244
272
|
|
|
245
|
-
|
|
273
|
+
if path_suffix in [".html"]:
|
|
274
|
+
return path, "html"
|
|
275
|
+
|
|
276
|
+
raise ValueError(
|
|
277
|
+
f"Unsupported file extension: {path_suffix} for {path}; "
|
|
278
|
+
"expected json, yaml, csv, or html."
|
|
279
|
+
)
|
|
246
280
|
|
|
247
281
|
@staticmethod
|
|
248
282
|
def _benchmark_desc_headers_and_values(
|
|
@@ -889,7 +923,7 @@ class GenerativeBenchmarksConsole:
|
|
|
889
923
|
"Request Stats": (1, 2),
|
|
890
924
|
"Out Tok/sec": (3, 3),
|
|
891
925
|
"Tot Tok/sec": (4, 4),
|
|
892
|
-
"Req Latency (
|
|
926
|
+
"Req Latency (sec)": (5, 7),
|
|
893
927
|
"TTFT (ms)": (8, 10),
|
|
894
928
|
"ITL (ms)": (11, 13),
|
|
895
929
|
"TPOT (ms)": (14, 16),
|
|
@@ -944,3 +978,20 @@ class GenerativeBenchmarksConsole:
|
|
|
944
978
|
title="Benchmarks Stats",
|
|
945
979
|
sections=sections,
|
|
946
980
|
)
|
|
981
|
+
|
|
982
|
+
def print_full_report(self):
|
|
983
|
+
"""
|
|
984
|
+
Print out the benchmark statistics to the console.
|
|
985
|
+
Temporarily enables the console if it's disabled.
|
|
986
|
+
|
|
987
|
+
Format:
|
|
988
|
+
- Metadata
|
|
989
|
+
- Info
|
|
990
|
+
- Stats
|
|
991
|
+
"""
|
|
992
|
+
orig_enabled = self.enabled
|
|
993
|
+
self.enabled = True
|
|
994
|
+
self.print_benchmarks_metadata()
|
|
995
|
+
self.print_benchmarks_info()
|
|
996
|
+
self.print_benchmarks_stats()
|
|
997
|
+
self.enabled = orig_enabled
|
guidellm/benchmark/profile.py
CHANGED
|
@@ -17,13 +17,13 @@ from guidellm.scheduler import (
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
|
-
"
|
|
20
|
+
"AsyncProfile",
|
|
21
|
+
"ConcurrentProfile",
|
|
21
22
|
"Profile",
|
|
23
|
+
"ProfileType",
|
|
24
|
+
"SweepProfile",
|
|
22
25
|
"SynchronousProfile",
|
|
23
|
-
"ConcurrentProfile",
|
|
24
26
|
"ThroughputProfile",
|
|
25
|
-
"AsyncProfile",
|
|
26
|
-
"SweepProfile",
|
|
27
27
|
"create_profile",
|
|
28
28
|
]
|
|
29
29
|
|
guidellm/benchmark/progress.py
CHANGED
|
@@ -33,10 +33,10 @@ from guidellm.scheduler import (
|
|
|
33
33
|
from guidellm.utils import Colors
|
|
34
34
|
|
|
35
35
|
__all__ = [
|
|
36
|
-
"BenchmarkerTaskProgressState",
|
|
37
36
|
"BenchmarkerProgressDisplay",
|
|
38
|
-
"
|
|
37
|
+
"BenchmarkerTaskProgressState",
|
|
39
38
|
"GenerativeTextBenchmarkerProgressDisplay",
|
|
39
|
+
"GenerativeTextBenchmarkerTaskProgressState",
|
|
40
40
|
]
|
|
41
41
|
|
|
42
42
|
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from functools import cache
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Any, Literal, Optional, TypeVar, Union
|
|
5
|
+
|
|
6
|
+
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
7
|
+
from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
|
|
8
|
+
from transformers.tokenization_utils_base import ( # type: ignore[import]
|
|
9
|
+
PreTrainedTokenizerBase,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from guidellm.backend.backend import BackendType
|
|
13
|
+
from guidellm.benchmark.profile import ProfileType
|
|
14
|
+
from guidellm.objects.pydantic import StandardBaseModel
|
|
15
|
+
from guidellm.scheduler.strategy import StrategyType
|
|
16
|
+
|
|
17
|
+
__ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
|
|
18
|
+
|
|
19
|
+
SCENARIO_DIR = Path(__file__).parent / "scenarios/"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@cache
|
|
23
|
+
def get_builtin_scenarios() -> list[str]:
|
|
24
|
+
"""Returns list of builtin scenario names."""
|
|
25
|
+
return [p.stem for p in SCENARIO_DIR.glob("*.json")]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
|
|
29
|
+
"""
|
|
30
|
+
Parse a comma separated string to a list of float
|
|
31
|
+
or convert single float list of one or pass float
|
|
32
|
+
list through.
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(value, (int, float)):
|
|
35
|
+
return [value]
|
|
36
|
+
elif isinstance(value, list):
|
|
37
|
+
return value
|
|
38
|
+
|
|
39
|
+
values = value.split(",") if "," in value else [value]
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
return [float(val) for val in values]
|
|
43
|
+
except ValueError as err:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"must be a number or comma-separated list of numbers."
|
|
46
|
+
) from err
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
T = TypeVar("T", bound="Scenario")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Scenario(StandardBaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Parent Scenario class with common options for all benchmarking types.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
target: str
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
|
|
61
|
+
filename = SCENARIO_DIR / f"{name}.json"
|
|
62
|
+
|
|
63
|
+
if not filename.is_file():
|
|
64
|
+
raise ValueError(f"{name} is not a valid builtin scenario")
|
|
65
|
+
|
|
66
|
+
return cls.from_file(filename, overrides)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GenerativeTextScenario(Scenario):
|
|
70
|
+
"""
|
|
71
|
+
Scenario class for generative text benchmarks.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
class Config:
|
|
75
|
+
# NOTE: This prevents errors due to unvalidatable
|
|
76
|
+
# types like PreTrainedTokenizerBase
|
|
77
|
+
arbitrary_types_allowed = True
|
|
78
|
+
|
|
79
|
+
backend_type: BackendType = "openai_http"
|
|
80
|
+
backend_args: Optional[dict[str, Any]] = None
|
|
81
|
+
model: Optional[str] = None
|
|
82
|
+
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
|
|
83
|
+
processor_args: Optional[dict[str, Any]] = None
|
|
84
|
+
data: Union[
|
|
85
|
+
str,
|
|
86
|
+
Path,
|
|
87
|
+
Iterable[Union[str, dict[str, Any]]],
|
|
88
|
+
Dataset,
|
|
89
|
+
DatasetDict,
|
|
90
|
+
IterableDataset,
|
|
91
|
+
IterableDatasetDict,
|
|
92
|
+
]
|
|
93
|
+
data_args: Optional[dict[str, Any]] = None
|
|
94
|
+
data_sampler: Optional[Literal["random"]] = None
|
|
95
|
+
rate_type: Union[StrategyType, ProfileType]
|
|
96
|
+
rate: Annotated[
|
|
97
|
+
Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
|
|
98
|
+
] = None
|
|
99
|
+
max_seconds: Optional[PositiveFloat] = None
|
|
100
|
+
max_requests: Optional[PositiveInt] = None
|
|
101
|
+
warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
102
|
+
cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
|
|
103
|
+
output_sampling: Optional[NonNegativeInt] = None
|
|
104
|
+
random_seed: int = 42
|
|
File without changes
|
guidellm/config.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import os
|
|
2
3
|
from collections.abc import Sequence
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from typing import Literal, Optional
|
|
@@ -11,8 +12,8 @@ __all__ = [
|
|
|
11
12
|
"Environment",
|
|
12
13
|
"LoggingSettings",
|
|
13
14
|
"OpenAISettings",
|
|
14
|
-
"print_config",
|
|
15
15
|
"Settings",
|
|
16
|
+
"print_config",
|
|
16
17
|
"reload_settings",
|
|
17
18
|
"settings",
|
|
18
19
|
]
|
|
@@ -30,10 +31,10 @@ class Environment(str, Enum):
|
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
ENV_REPORT_MAPPING = {
|
|
33
|
-
Environment.PROD: "https://guidellm.
|
|
34
|
-
Environment.STAGING: "https://
|
|
35
|
-
Environment.DEV: "https://
|
|
36
|
-
Environment.LOCAL: "
|
|
34
|
+
Environment.PROD: "https://blog.vllm.ai/guidellm/ui/v0.3.0/index.html",
|
|
35
|
+
Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/v0.3.0/index.html",
|
|
36
|
+
Environment.DEV: "https://blog.vllm.ai/guidellm/ui/dev/index.html",
|
|
37
|
+
Environment.LOCAL: "http://localhost:3000/index.html",
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
|
|
@@ -81,10 +82,24 @@ class OpenAISettings(BaseModel):
|
|
|
81
82
|
|
|
82
83
|
api_key: Optional[str] = None
|
|
83
84
|
bearer_token: Optional[str] = None
|
|
85
|
+
headers: Optional[dict[str, str]] = None
|
|
84
86
|
organization: Optional[str] = None
|
|
85
87
|
project: Optional[str] = None
|
|
86
88
|
base_url: str = "http://localhost:8000"
|
|
87
89
|
max_output_tokens: int = 16384
|
|
90
|
+
verify: bool = True
|
|
91
|
+
max_output_key: dict[Literal["text_completions", "chat_completions"], str] = {
|
|
92
|
+
"text_completions": "max_tokens",
|
|
93
|
+
"chat_completions": "max_completion_tokens",
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class ReportGenerationSettings(BaseModel):
|
|
98
|
+
"""
|
|
99
|
+
Report generation settings for the application
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
source: str = ""
|
|
88
103
|
|
|
89
104
|
|
|
90
105
|
class Settings(BaseSettings):
|
|
@@ -115,13 +130,18 @@ class Settings(BaseSettings):
|
|
|
115
130
|
default_sweep_number: int = 10
|
|
116
131
|
|
|
117
132
|
# HTTP settings
|
|
133
|
+
request_follow_redirects: bool = True
|
|
118
134
|
request_timeout: int = 60 * 5 # 5 minutes
|
|
119
135
|
request_http2: bool = True
|
|
120
136
|
|
|
121
137
|
# Scheduler settings
|
|
122
138
|
max_concurrency: int = 512
|
|
123
|
-
max_worker_processes: int =
|
|
124
|
-
|
|
139
|
+
max_worker_processes: int = Field(
|
|
140
|
+
# use number of CPUs - 1, but at least 10
|
|
141
|
+
default_factory=lambda: max((os.cpu_count() or 1) - 1, 10)
|
|
142
|
+
)
|
|
143
|
+
min_queued_requests: int = 20
|
|
144
|
+
scheduler_start_delay: float = 5
|
|
125
145
|
|
|
126
146
|
# Data settings
|
|
127
147
|
dataset: DatasetSettings = DatasetSettings()
|
|
@@ -139,6 +159,9 @@ class Settings(BaseSettings):
|
|
|
139
159
|
)
|
|
140
160
|
openai: OpenAISettings = OpenAISettings()
|
|
141
161
|
|
|
162
|
+
# Report settings
|
|
163
|
+
report_generation: ReportGenerationSettings = ReportGenerationSettings()
|
|
164
|
+
|
|
142
165
|
# Output settings
|
|
143
166
|
table_border_char: str = "="
|
|
144
167
|
table_headers_border_char: str = "-"
|
|
@@ -147,6 +170,8 @@ class Settings(BaseSettings):
|
|
|
147
170
|
@model_validator(mode="after")
|
|
148
171
|
@classmethod
|
|
149
172
|
def set_default_source(cls, values):
|
|
173
|
+
if not values.report_generation.source:
|
|
174
|
+
values.report_generation.source = ENV_REPORT_MAPPING.get(values.env)
|
|
150
175
|
return values
|
|
151
176
|
|
|
152
177
|
def generate_env_file(self) -> str:
|
guidellm/dataset/__init__.py
CHANGED
|
@@ -10,13 +10,13 @@ from .synthetic import (
|
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
13
|
-
"DatasetCreator",
|
|
14
13
|
"ColumnInputTypes",
|
|
15
|
-
"
|
|
16
|
-
"load_dataset",
|
|
14
|
+
"DatasetCreator",
|
|
17
15
|
"FileDatasetCreator",
|
|
16
|
+
"HFDatasetsCreator",
|
|
18
17
|
"InMemoryDatasetCreator",
|
|
19
|
-
"SyntheticDatasetCreator",
|
|
20
18
|
"SyntheticDatasetConfig",
|
|
19
|
+
"SyntheticDatasetCreator",
|
|
21
20
|
"SyntheticTextItemsGenerator",
|
|
21
|
+
"load_dataset",
|
|
22
22
|
]
|
guidellm/dataset/creator.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Any, Literal, Optional, Union
|
|
|
5
5
|
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
6
6
|
from transformers import PreTrainedTokenizerBase # type: ignore[import]
|
|
7
7
|
|
|
8
|
-
__all__ = ["
|
|
8
|
+
__all__ = ["ColumnInputTypes", "DatasetCreator"]
|
|
9
9
|
|
|
10
10
|
ColumnInputTypes = Literal[
|
|
11
11
|
"prompt_column",
|
guidellm/dataset/synthetic.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import random
|
|
3
3
|
from collections.abc import Iterable, Iterator
|
|
4
|
+
from itertools import cycle
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any, Literal, Optional, Union
|
|
6
7
|
|
|
@@ -18,13 +19,18 @@ from guidellm.dataset.creator import ColumnInputTypes, DatasetCreator
|
|
|
18
19
|
from guidellm.utils import EndlessTextCreator, IntegerRangeSampler, check_load_processor
|
|
19
20
|
|
|
20
21
|
__all__ = [
|
|
21
|
-
"SyntheticDatasetCreator",
|
|
22
22
|
"SyntheticDatasetConfig",
|
|
23
|
+
"SyntheticDatasetCreator",
|
|
23
24
|
"SyntheticTextItemsGenerator",
|
|
24
25
|
]
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class SyntheticDatasetConfig(BaseModel):
|
|
29
|
+
prefix_tokens: int = Field(
|
|
30
|
+
description="The number of shared prefix tokens to prepend to each prompt.",
|
|
31
|
+
ge=0,
|
|
32
|
+
default=0,
|
|
33
|
+
)
|
|
28
34
|
prompt_tokens: int = Field(
|
|
29
35
|
description="The average number of text tokens generated for prompts.",
|
|
30
36
|
gt=0,
|
|
@@ -163,6 +169,10 @@ class SyntheticTextItemsGenerator(
|
|
|
163
169
|
)
|
|
164
170
|
# ensure diff distribution from output tokens
|
|
165
171
|
rand = random.Random(self.random_seed + 2) # noqa: S311
|
|
172
|
+
unique_prefix_iter = cycle(self.processor.get_vocab().values())
|
|
173
|
+
|
|
174
|
+
prefix_index = rand.randint(0, len(self.text_creator.words))
|
|
175
|
+
prefix_tokens = self._create_prompt(self.config.prefix_tokens, prefix_index)
|
|
166
176
|
|
|
167
177
|
for _, prompt_tokens, output_tokens in zip(
|
|
168
178
|
range(self.config.samples),
|
|
@@ -170,37 +180,52 @@ class SyntheticTextItemsGenerator(
|
|
|
170
180
|
output_tokens_sampler,
|
|
171
181
|
):
|
|
172
182
|
start_index = rand.randint(0, len(self.text_creator.words))
|
|
183
|
+
prompt_text = self.processor.decode(
|
|
184
|
+
prefix_tokens
|
|
185
|
+
+ self._create_prompt(
|
|
186
|
+
prompt_tokens, start_index, next(unique_prefix_iter)
|
|
187
|
+
),
|
|
188
|
+
skip_special_tokens=True,
|
|
189
|
+
)
|
|
173
190
|
yield {
|
|
174
|
-
"prompt":
|
|
175
|
-
"prompt_tokens_count": prompt_tokens,
|
|
191
|
+
"prompt": prompt_text,
|
|
192
|
+
"prompt_tokens_count": self.config.prefix_tokens + prompt_tokens,
|
|
176
193
|
"output_tokens_count": output_tokens,
|
|
177
194
|
}
|
|
178
195
|
|
|
179
|
-
def _create_prompt(
|
|
196
|
+
def _create_prompt(
|
|
197
|
+
self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
|
|
198
|
+
) -> list[int]:
|
|
180
199
|
if prompt_tokens <= 0:
|
|
181
|
-
return
|
|
200
|
+
return []
|
|
182
201
|
|
|
183
202
|
left = start_index
|
|
184
203
|
right = start_index + 4 * prompt_tokens
|
|
204
|
+
start_tokens = [unique_prefix] if unique_prefix else []
|
|
185
205
|
|
|
186
206
|
while left < right:
|
|
187
207
|
mid = (left + right) // 2
|
|
188
208
|
test_prompt = self.text_creator.create_text(start_index, mid - start_index)
|
|
189
|
-
test_tokens =
|
|
209
|
+
test_tokens = start_tokens + self.processor.encode(test_prompt)
|
|
190
210
|
|
|
191
|
-
if test_tokens == prompt_tokens:
|
|
192
|
-
return
|
|
193
|
-
elif test_tokens < prompt_tokens:
|
|
211
|
+
if len(test_tokens) == prompt_tokens:
|
|
212
|
+
return test_tokens
|
|
213
|
+
elif len(test_tokens) < prompt_tokens:
|
|
194
214
|
left = mid + 1
|
|
195
215
|
else:
|
|
196
216
|
right = mid
|
|
197
217
|
|
|
198
|
-
|
|
218
|
+
final_text = self.text_creator.create_text(start_index, left - start_index)
|
|
219
|
+
return start_tokens + self.processor.encode(final_text)
|
|
199
220
|
|
|
200
221
|
|
|
201
222
|
class SyntheticDatasetCreator(DatasetCreator):
|
|
202
223
|
@classmethod
|
|
203
|
-
def is_supported(
|
|
224
|
+
def is_supported(
|
|
225
|
+
cls,
|
|
226
|
+
data: Any,
|
|
227
|
+
data_args: Optional[dict[str, Any]], # noqa: ARG003
|
|
228
|
+
) -> bool:
|
|
204
229
|
if (
|
|
205
230
|
isinstance(data, Path)
|
|
206
231
|
and data.exists()
|
guidellm/logger.py
CHANGED
|
@@ -9,13 +9,16 @@ Environment Variables:
|
|
|
9
9
|
- GUIDELLM__LOGGING__DISABLED: Disable logging (default: false).
|
|
10
10
|
- GUIDELLM__LOGGING__CLEAR_LOGGERS: Clear existing loggers
|
|
11
11
|
from loguru (default: true).
|
|
12
|
-
-
|
|
12
|
+
- GUIDELLM__LOGGING__CONSOLE_LOG_LEVEL: Log level for console logging
|
|
13
13
|
(default: none, options: DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
|
14
|
-
-
|
|
14
|
+
- GUIDELLM__LOGGING__LOG_FILE: Path to the log file for file logging
|
|
15
15
|
(default: guidellm.log if log file level set else none)
|
|
16
|
-
-
|
|
16
|
+
- GUIDELLM__LOGGING__LOG_FILE_LEVEL: Log level for file logging
|
|
17
17
|
(default: INFO if log file set else none).
|
|
18
18
|
|
|
19
|
+
If logging isn't responding to the environment variables, run the `guidellm config`
|
|
20
|
+
command to validate that the environment variables match and are being set correctly.
|
|
21
|
+
|
|
19
22
|
Usage:
|
|
20
23
|
from guidellm import logger, configure_logger, LoggerConfig
|
|
21
24
|
|
|
@@ -68,7 +71,8 @@ def configure_logger(config: LoggingSettings = settings.logging):
|
|
|
68
71
|
logger.add(
|
|
69
72
|
sys.stdout,
|
|
70
73
|
level=config.console_log_level.upper(),
|
|
71
|
-
format="{time
|
|
74
|
+
format="<green>{time:YY-MM-DD HH:mm:ss}</green>|<level>{level: <8}</level> \
|
|
75
|
+
|<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
|
|
72
76
|
)
|
|
73
77
|
|
|
74
78
|
if config.log_file or config.log_file_level:
|
guidellm/objects/__init__.py
CHANGED
|
@@ -8,11 +8,11 @@ from .statistics import (
|
|
|
8
8
|
)
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
11
|
-
"StandardBaseModel",
|
|
12
|
-
"StatusBreakdown",
|
|
13
11
|
"DistributionSummary",
|
|
14
12
|
"Percentiles",
|
|
15
13
|
"RunningStats",
|
|
14
|
+
"StandardBaseModel",
|
|
15
|
+
"StatusBreakdown",
|
|
16
16
|
"StatusDistributionSummary",
|
|
17
17
|
"TimeRunningStats",
|
|
18
18
|
]
|
guidellm/objects/pydantic.py
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Generic, Optional, TypeVar
|
|
2
4
|
|
|
5
|
+
import yaml
|
|
3
6
|
from loguru import logger
|
|
4
7
|
from pydantic import BaseModel, ConfigDict, Field
|
|
5
8
|
|
|
6
9
|
__all__ = ["StandardBaseModel", "StatusBreakdown"]
|
|
7
10
|
|
|
11
|
+
T = TypeVar("T", bound="StandardBaseModel")
|
|
12
|
+
|
|
8
13
|
|
|
9
14
|
class StandardBaseModel(BaseModel):
|
|
10
15
|
"""
|
|
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
|
|
|
27
32
|
data,
|
|
28
33
|
)
|
|
29
34
|
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_default(cls: type[T], field: str) -> Any:
|
|
37
|
+
"""Get default values for model fields"""
|
|
38
|
+
return cls.model_fields[field].default
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
|
|
42
|
+
"""
|
|
43
|
+
Attempt to create a new instance of the model using
|
|
44
|
+
data loaded from json or yaml file.
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
with filename.open() as f:
|
|
48
|
+
if str(filename).endswith(".json"):
|
|
49
|
+
data = json.load(f)
|
|
50
|
+
else: # Assume everything else is yaml
|
|
51
|
+
data = yaml.safe_load(f)
|
|
52
|
+
except (json.JSONDecodeError, yaml.YAMLError) as e:
|
|
53
|
+
logger.error(f"Failed to parse {filename} as type {cls.__name__}")
|
|
54
|
+
raise ValueError(f"Error when parsing file: {filename}") from e
|
|
55
|
+
|
|
56
|
+
data.update(overrides)
|
|
57
|
+
return cls.model_validate(data)
|
|
58
|
+
|
|
30
59
|
|
|
31
60
|
SuccessfulT = TypeVar("SuccessfulT")
|
|
32
61
|
ErroredT = TypeVar("ErroredT")
|
guidellm/objects/statistics.py
CHANGED
|
@@ -9,10 +9,10 @@ from pydantic import Field, computed_field
|
|
|
9
9
|
from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
|
-
"Percentiles",
|
|
13
12
|
"DistributionSummary",
|
|
14
|
-
"
|
|
13
|
+
"Percentiles",
|
|
15
14
|
"RunningStats",
|
|
15
|
+
"StatusDistributionSummary",
|
|
16
16
|
"TimeRunningStats",
|
|
17
17
|
]
|
|
18
18
|
|
|
@@ -37,6 +37,9 @@ class Percentiles(StandardBaseModel):
|
|
|
37
37
|
p25: float = Field(
|
|
38
38
|
description="The 25th percentile of the distribution.",
|
|
39
39
|
)
|
|
40
|
+
p50: float = Field(
|
|
41
|
+
description="The 50th percentile of the distribution.",
|
|
42
|
+
)
|
|
40
43
|
p75: float = Field(
|
|
41
44
|
description="The 75th percentile of the distribution.",
|
|
42
45
|
)
|
|
@@ -159,6 +162,7 @@ class DistributionSummary(StandardBaseModel):
|
|
|
159
162
|
p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004
|
|
160
163
|
p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004
|
|
161
164
|
p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004
|
|
165
|
+
p50=cdf[np.argmax(cdf[:, 1] >= 0.50), 0].item(), # noqa: PLR2004
|
|
162
166
|
p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004
|
|
163
167
|
p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004
|
|
164
168
|
p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004
|
|
@@ -172,6 +176,7 @@ class DistributionSummary(StandardBaseModel):
|
|
|
172
176
|
p05=0,
|
|
173
177
|
p10=0,
|
|
174
178
|
p25=0,
|
|
179
|
+
p50=0,
|
|
175
180
|
p75=0,
|
|
176
181
|
p90=0,
|
|
177
182
|
p95=0,
|
|
@@ -238,18 +243,9 @@ class DistributionSummary(StandardBaseModel):
|
|
|
238
243
|
"""
|
|
239
244
|
if distribution_type == "concurrency":
|
|
240
245
|
# convert to delta changes based on when requests were running
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
time_deltas[end] -= 1
|
|
245
|
-
|
|
246
|
-
# convert to the events over time measuring concurrency changes
|
|
247
|
-
events = []
|
|
248
|
-
active = 0
|
|
249
|
-
|
|
250
|
-
for time, delta in sorted(time_deltas.items()):
|
|
251
|
-
active += delta
|
|
252
|
-
events.append((time, active))
|
|
246
|
+
events = [(start, 1) for start, _ in requests] + [
|
|
247
|
+
(end, -1) for _, end in requests
|
|
248
|
+
]
|
|
253
249
|
elif distribution_type == "rate":
|
|
254
250
|
# convert to events for when requests finished
|
|
255
251
|
global_start = min(start for start, _ in requests) if requests else 0
|
|
@@ -276,6 +272,16 @@ class DistributionSummary(StandardBaseModel):
|
|
|
276
272
|
else:
|
|
277
273
|
flattened_events.append((time, val))
|
|
278
274
|
|
|
275
|
+
if distribution_type == "concurrency":
|
|
276
|
+
# convert to the events over time measuring concurrency changes
|
|
277
|
+
events_over_time: list[tuple[float, float]] = []
|
|
278
|
+
active = 0
|
|
279
|
+
for time, delta in flattened_events:
|
|
280
|
+
active += delta # type: ignore [assignment]
|
|
281
|
+
events_over_time.append((time, active))
|
|
282
|
+
|
|
283
|
+
flattened_events = events_over_time
|
|
284
|
+
|
|
279
285
|
# convert to value distribution function
|
|
280
286
|
distribution: dict[float, float] = defaultdict(float)
|
|
281
287
|
|