guidellm 0.3.0rc20250507__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (55) hide show
  1. guidellm/__init__.py +8 -13
  2. guidellm/__main__.py +290 -69
  3. guidellm/backend/__init__.py +6 -6
  4. guidellm/backend/backend.py +25 -4
  5. guidellm/backend/openai.py +153 -30
  6. guidellm/backend/response.py +6 -2
  7. guidellm/benchmark/__init__.py +16 -22
  8. guidellm/benchmark/aggregator.py +3 -3
  9. guidellm/benchmark/benchmark.py +11 -12
  10. guidellm/benchmark/benchmarker.py +2 -2
  11. guidellm/benchmark/entrypoints.py +34 -10
  12. guidellm/benchmark/output.py +59 -8
  13. guidellm/benchmark/profile.py +4 -4
  14. guidellm/benchmark/progress.py +2 -2
  15. guidellm/benchmark/scenario.py +104 -0
  16. guidellm/benchmark/scenarios/__init__.py +0 -0
  17. guidellm/config.py +32 -7
  18. guidellm/dataset/__init__.py +4 -4
  19. guidellm/dataset/creator.py +1 -1
  20. guidellm/dataset/synthetic.py +36 -11
  21. guidellm/logger.py +8 -4
  22. guidellm/objects/__init__.py +2 -2
  23. guidellm/objects/pydantic.py +30 -1
  24. guidellm/objects/statistics.py +20 -14
  25. guidellm/preprocess/__init__.py +3 -0
  26. guidellm/preprocess/dataset.py +374 -0
  27. guidellm/presentation/__init__.py +28 -0
  28. guidellm/presentation/builder.py +27 -0
  29. guidellm/presentation/data_models.py +232 -0
  30. guidellm/presentation/injector.py +66 -0
  31. guidellm/request/__init__.py +6 -3
  32. guidellm/request/loader.py +5 -5
  33. guidellm/{scheduler → request}/types.py +4 -1
  34. guidellm/scheduler/__init__.py +10 -15
  35. guidellm/scheduler/queues.py +25 -0
  36. guidellm/scheduler/result.py +21 -3
  37. guidellm/scheduler/scheduler.py +68 -60
  38. guidellm/scheduler/strategy.py +26 -24
  39. guidellm/scheduler/worker.py +64 -103
  40. guidellm/utils/__init__.py +17 -5
  41. guidellm/utils/cli.py +62 -0
  42. guidellm/utils/default_group.py +105 -0
  43. guidellm/utils/dict.py +23 -0
  44. guidellm/utils/hf_datasets.py +36 -0
  45. guidellm/utils/random.py +1 -1
  46. guidellm/utils/text.py +12 -5
  47. guidellm/version.py +6 -0
  48. guidellm-0.3.1.dist-info/METADATA +329 -0
  49. guidellm-0.3.1.dist-info/RECORD +62 -0
  50. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/WHEEL +1 -1
  51. guidellm-0.3.0rc20250507.dist-info/METADATA +0 -451
  52. guidellm-0.3.0rc20250507.dist-info/RECORD +0 -48
  53. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/entry_points.txt +0 -0
  54. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/licenses/LICENSE +0 -0
  55. {guidellm-0.3.0rc20250507.dist-info → guidellm-0.3.1.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@ import csv
2
2
  import json
3
3
  import math
4
4
  from collections import OrderedDict
5
+ from copy import deepcopy
5
6
  from datetime import datetime
6
7
  from pathlib import Path
7
8
  from typing import Any, Literal, Optional, Union
@@ -25,12 +26,16 @@ from guidellm.objects import (
25
26
  StandardBaseModel,
26
27
  StatusDistributionSummary,
27
28
  )
29
+ from guidellm.presentation import UIDataBuilder
30
+ from guidellm.presentation.injector import create_report
28
31
  from guidellm.scheduler import strategy_display_str
29
32
  from guidellm.utils import Colors, split_text_list_by_length
33
+ from guidellm.utils.dict import recursive_key_update
34
+ from guidellm.utils.text import camelize_str
30
35
 
31
36
  __all__ = [
32
- "GenerativeBenchmarksReport",
33
37
  "GenerativeBenchmarksConsole",
38
+ "GenerativeBenchmarksReport",
34
39
  ]
35
40
 
36
41
 
@@ -68,6 +73,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
68
73
  if type_ == "csv":
69
74
  raise ValueError(f"CSV file type is not supported for loading: {path}.")
70
75
 
76
+ if type_ == "html":
77
+ raise ValueError(f"HTML file type is not supported for loading: {path}.")
78
+
71
79
  raise ValueError(f"Unsupported file type: {type_} for {path}.")
72
80
 
73
81
  benchmarks: list[GenerativeBenchmark] = Field(
@@ -114,6 +122,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
114
122
  if type_ == "csv":
115
123
  return self.save_csv(path)
116
124
 
125
+ if type_ == "html":
126
+ return self.save_html(path)
127
+
117
128
  raise ValueError(f"Unsupported file type: {type_} for {path}.")
118
129
 
119
130
  def save_json(self, path: Union[str, Path]) -> Path:
@@ -133,10 +144,9 @@ class GenerativeBenchmarksReport(StandardBaseModel):
133
144
  )
134
145
 
135
146
  model_dict = self.model_dump()
136
- model_json = json.dumps(model_dict)
137
147
 
138
- with path.open("w") as file:
139
- file.write(model_json)
148
+ with path.open("w", encoding="utf-8") as file:
149
+ json.dump(model_dict, file, ensure_ascii=False, indent=4)
140
150
 
141
151
  return path
142
152
 
@@ -220,11 +230,29 @@ class GenerativeBenchmarksReport(StandardBaseModel):
220
230
 
221
231
  return path
222
232
 
233
+ def save_html(self, path: Union[str, Path]) -> Path:
234
+ """
235
+ Download html, inject report data and save to a file.
236
+
237
+ :param path: The path to create the report at.
238
+ :return: The path to the report.
239
+ """
240
+
241
+ data_builder = UIDataBuilder(self.benchmarks)
242
+ data = data_builder.to_dict()
243
+ camel_data = recursive_key_update(deepcopy(data), camelize_str)
244
+ ui_api_data = {}
245
+ for k, v in camel_data.items():
246
+ key = f"window.{k} = {{}};"
247
+ value = f"window.{k} = {json.dumps(v, indent=2)};\n"
248
+ ui_api_data[key] = value
249
+ return create_report(ui_api_data, path)
250
+
223
251
  @staticmethod
224
252
  def _file_setup(
225
253
  path: Union[str, Path],
226
- default_file_type: Literal["json", "yaml", "csv"] = "json",
227
- ) -> tuple[Path, Literal["json", "yaml", "csv"]]:
254
+ default_file_type: Literal["json", "yaml", "csv", "html"] = "json",
255
+ ) -> tuple[Path, Literal["json", "yaml", "csv", "html"]]:
228
256
  path = Path(path) if not isinstance(path, Path) else path
229
257
 
230
258
  if path.is_dir():
@@ -242,7 +270,13 @@ class GenerativeBenchmarksReport(StandardBaseModel):
242
270
  if path_suffix in [".csv"]:
243
271
  return path, "csv"
244
272
 
245
- raise ValueError(f"Unsupported file extension: {path_suffix} for {path}.")
273
+ if path_suffix in [".html"]:
274
+ return path, "html"
275
+
276
+ raise ValueError(
277
+ f"Unsupported file extension: {path_suffix} for {path}; "
278
+ "expected json, yaml, csv, or html."
279
+ )
246
280
 
247
281
  @staticmethod
248
282
  def _benchmark_desc_headers_and_values(
@@ -889,7 +923,7 @@ class GenerativeBenchmarksConsole:
889
923
  "Request Stats": (1, 2),
890
924
  "Out Tok/sec": (3, 3),
891
925
  "Tot Tok/sec": (4, 4),
892
- "Req Latency (ms)": (5, 7),
926
+ "Req Latency (sec)": (5, 7),
893
927
  "TTFT (ms)": (8, 10),
894
928
  "ITL (ms)": (11, 13),
895
929
  "TPOT (ms)": (14, 16),
@@ -944,3 +978,20 @@ class GenerativeBenchmarksConsole:
944
978
  title="Benchmarks Stats",
945
979
  sections=sections,
946
980
  )
981
+
982
+ def print_full_report(self):
983
+ """
984
+ Print out the benchmark statistics to the console.
985
+ Temporarily enables the console if it's disabled.
986
+
987
+ Format:
988
+ - Metadata
989
+ - Info
990
+ - Stats
991
+ """
992
+ orig_enabled = self.enabled
993
+ self.enabled = True
994
+ self.print_benchmarks_metadata()
995
+ self.print_benchmarks_info()
996
+ self.print_benchmarks_stats()
997
+ self.enabled = orig_enabled
@@ -17,13 +17,13 @@ from guidellm.scheduler import (
17
17
  )
18
18
 
19
19
  __all__ = [
20
- "ProfileType",
20
+ "AsyncProfile",
21
+ "ConcurrentProfile",
21
22
  "Profile",
23
+ "ProfileType",
24
+ "SweepProfile",
22
25
  "SynchronousProfile",
23
- "ConcurrentProfile",
24
26
  "ThroughputProfile",
25
- "AsyncProfile",
26
- "SweepProfile",
27
27
  "create_profile",
28
28
  ]
29
29
 
@@ -33,10 +33,10 @@ from guidellm.scheduler import (
33
33
  from guidellm.utils import Colors
34
34
 
35
35
  __all__ = [
36
- "BenchmarkerTaskProgressState",
37
36
  "BenchmarkerProgressDisplay",
38
- "GenerativeTextBenchmarkerTaskProgressState",
37
+ "BenchmarkerTaskProgressState",
39
38
  "GenerativeTextBenchmarkerProgressDisplay",
39
+ "GenerativeTextBenchmarkerTaskProgressState",
40
40
  ]
41
41
 
42
42
 
@@ -0,0 +1,104 @@
1
+ from collections.abc import Iterable
2
+ from functools import cache
3
+ from pathlib import Path
4
+ from typing import Annotated, Any, Literal, Optional, TypeVar, Union
5
+
6
+ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
7
+ from pydantic import BeforeValidator, Field, NonNegativeInt, PositiveFloat, PositiveInt
8
+ from transformers.tokenization_utils_base import ( # type: ignore[import]
9
+ PreTrainedTokenizerBase,
10
+ )
11
+
12
+ from guidellm.backend.backend import BackendType
13
+ from guidellm.benchmark.profile import ProfileType
14
+ from guidellm.objects.pydantic import StandardBaseModel
15
+ from guidellm.scheduler.strategy import StrategyType
16
+
17
+ __ALL__ = ["Scenario", "GenerativeTextScenario", "get_builtin_scenarios"]
18
+
19
+ SCENARIO_DIR = Path(__file__).parent / "scenarios/"
20
+
21
+
22
+ @cache
23
+ def get_builtin_scenarios() -> list[str]:
24
+ """Returns list of builtin scenario names."""
25
+ return [p.stem for p in SCENARIO_DIR.glob("*.json")]
26
+
27
+
28
+ def parse_float_list(value: Union[str, float, list[float]]) -> list[float]:
29
+ """
30
+ Parse a comma separated string to a list of float
31
+ or convert single float list of one or pass float
32
+ list through.
33
+ """
34
+ if isinstance(value, (int, float)):
35
+ return [value]
36
+ elif isinstance(value, list):
37
+ return value
38
+
39
+ values = value.split(",") if "," in value else [value]
40
+
41
+ try:
42
+ return [float(val) for val in values]
43
+ except ValueError as err:
44
+ raise ValueError(
45
+ "must be a number or comma-separated list of numbers."
46
+ ) from err
47
+
48
+
49
+ T = TypeVar("T", bound="Scenario")
50
+
51
+
52
+ class Scenario(StandardBaseModel):
53
+ """
54
+ Parent Scenario class with common options for all benchmarking types.
55
+ """
56
+
57
+ target: str
58
+
59
+ @classmethod
60
+ def from_builtin(cls: type[T], name: str, overrides: Optional[dict] = None) -> T:
61
+ filename = SCENARIO_DIR / f"{name}.json"
62
+
63
+ if not filename.is_file():
64
+ raise ValueError(f"{name} is not a valid builtin scenario")
65
+
66
+ return cls.from_file(filename, overrides)
67
+
68
+
69
+ class GenerativeTextScenario(Scenario):
70
+ """
71
+ Scenario class for generative text benchmarks.
72
+ """
73
+
74
+ class Config:
75
+ # NOTE: This prevents errors due to unvalidatable
76
+ # types like PreTrainedTokenizerBase
77
+ arbitrary_types_allowed = True
78
+
79
+ backend_type: BackendType = "openai_http"
80
+ backend_args: Optional[dict[str, Any]] = None
81
+ model: Optional[str] = None
82
+ processor: Optional[Union[str, Path, PreTrainedTokenizerBase]] = None
83
+ processor_args: Optional[dict[str, Any]] = None
84
+ data: Union[
85
+ str,
86
+ Path,
87
+ Iterable[Union[str, dict[str, Any]]],
88
+ Dataset,
89
+ DatasetDict,
90
+ IterableDataset,
91
+ IterableDatasetDict,
92
+ ]
93
+ data_args: Optional[dict[str, Any]] = None
94
+ data_sampler: Optional[Literal["random"]] = None
95
+ rate_type: Union[StrategyType, ProfileType]
96
+ rate: Annotated[
97
+ Optional[list[PositiveFloat]], BeforeValidator(parse_float_list)
98
+ ] = None
99
+ max_seconds: Optional[PositiveFloat] = None
100
+ max_requests: Optional[PositiveInt] = None
101
+ warmup_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
102
+ cooldown_percent: Annotated[Optional[float], Field(gt=0, le=1)] = None
103
+ output_sampling: Optional[NonNegativeInt] = None
104
+ random_seed: int = 42
File without changes
guidellm/config.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import os
2
3
  from collections.abc import Sequence
3
4
  from enum import Enum
4
5
  from typing import Literal, Optional
@@ -11,8 +12,8 @@ __all__ = [
11
12
  "Environment",
12
13
  "LoggingSettings",
13
14
  "OpenAISettings",
14
- "print_config",
15
15
  "Settings",
16
+ "print_config",
16
17
  "reload_settings",
17
18
  "settings",
18
19
  ]
@@ -30,10 +31,10 @@ class Environment(str, Enum):
30
31
 
31
32
 
32
33
  ENV_REPORT_MAPPING = {
33
- Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
34
- Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
35
- Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
36
- Environment.LOCAL: "tests/dummy/report.html",
34
+ Environment.PROD: "https://blog.vllm.ai/guidellm/ui/v0.3.0/index.html",
35
+ Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/v0.3.0/index.html",
36
+ Environment.DEV: "https://blog.vllm.ai/guidellm/ui/dev/index.html",
37
+ Environment.LOCAL: "http://localhost:3000/index.html",
37
38
  }
38
39
 
39
40
 
@@ -81,10 +82,24 @@ class OpenAISettings(BaseModel):
81
82
 
82
83
  api_key: Optional[str] = None
83
84
  bearer_token: Optional[str] = None
85
+ headers: Optional[dict[str, str]] = None
84
86
  organization: Optional[str] = None
85
87
  project: Optional[str] = None
86
88
  base_url: str = "http://localhost:8000"
87
89
  max_output_tokens: int = 16384
90
+ verify: bool = True
91
+ max_output_key: dict[Literal["text_completions", "chat_completions"], str] = {
92
+ "text_completions": "max_tokens",
93
+ "chat_completions": "max_completion_tokens",
94
+ }
95
+
96
+
97
+ class ReportGenerationSettings(BaseModel):
98
+ """
99
+ Report generation settings for the application
100
+ """
101
+
102
+ source: str = ""
88
103
 
89
104
 
90
105
  class Settings(BaseSettings):
@@ -115,13 +130,18 @@ class Settings(BaseSettings):
115
130
  default_sweep_number: int = 10
116
131
 
117
132
  # HTTP settings
133
+ request_follow_redirects: bool = True
118
134
  request_timeout: int = 60 * 5 # 5 minutes
119
135
  request_http2: bool = True
120
136
 
121
137
  # Scheduler settings
122
138
  max_concurrency: int = 512
123
- max_worker_processes: int = 10
124
- max_add_requests_per_loop: int = 20
139
+ max_worker_processes: int = Field(
140
+ # use number of CPUs - 1, but at least 10
141
+ default_factory=lambda: max((os.cpu_count() or 1) - 1, 10)
142
+ )
143
+ min_queued_requests: int = 20
144
+ scheduler_start_delay: float = 5
125
145
 
126
146
  # Data settings
127
147
  dataset: DatasetSettings = DatasetSettings()
@@ -139,6 +159,9 @@ class Settings(BaseSettings):
139
159
  )
140
160
  openai: OpenAISettings = OpenAISettings()
141
161
 
162
+ # Report settings
163
+ report_generation: ReportGenerationSettings = ReportGenerationSettings()
164
+
142
165
  # Output settings
143
166
  table_border_char: str = "="
144
167
  table_headers_border_char: str = "-"
@@ -147,6 +170,8 @@ class Settings(BaseSettings):
147
170
  @model_validator(mode="after")
148
171
  @classmethod
149
172
  def set_default_source(cls, values):
173
+ if not values.report_generation.source:
174
+ values.report_generation.source = ENV_REPORT_MAPPING.get(values.env)
150
175
  return values
151
176
 
152
177
  def generate_env_file(self) -> str:
@@ -10,13 +10,13 @@ from .synthetic import (
10
10
  )
11
11
 
12
12
  __all__ = [
13
- "DatasetCreator",
14
13
  "ColumnInputTypes",
15
- "HFDatasetsCreator",
16
- "load_dataset",
14
+ "DatasetCreator",
17
15
  "FileDatasetCreator",
16
+ "HFDatasetsCreator",
18
17
  "InMemoryDatasetCreator",
19
- "SyntheticDatasetCreator",
20
18
  "SyntheticDatasetConfig",
19
+ "SyntheticDatasetCreator",
21
20
  "SyntheticTextItemsGenerator",
21
+ "load_dataset",
22
22
  ]
@@ -5,7 +5,7 @@ from typing import Any, Literal, Optional, Union
5
5
  from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
6
6
  from transformers import PreTrainedTokenizerBase # type: ignore[import]
7
7
 
8
- __all__ = ["DatasetCreator", "ColumnInputTypes"]
8
+ __all__ = ["ColumnInputTypes", "DatasetCreator"]
9
9
 
10
10
  ColumnInputTypes = Literal[
11
11
  "prompt_column",
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import random
3
3
  from collections.abc import Iterable, Iterator
4
+ from itertools import cycle
4
5
  from pathlib import Path
5
6
  from typing import Any, Literal, Optional, Union
6
7
 
@@ -18,13 +19,18 @@ from guidellm.dataset.creator import ColumnInputTypes, DatasetCreator
18
19
  from guidellm.utils import EndlessTextCreator, IntegerRangeSampler, check_load_processor
19
20
 
20
21
  __all__ = [
21
- "SyntheticDatasetCreator",
22
22
  "SyntheticDatasetConfig",
23
+ "SyntheticDatasetCreator",
23
24
  "SyntheticTextItemsGenerator",
24
25
  ]
25
26
 
26
27
 
27
28
  class SyntheticDatasetConfig(BaseModel):
29
+ prefix_tokens: int = Field(
30
+ description="The number of shared prefix tokens to prepend to each prompt.",
31
+ ge=0,
32
+ default=0,
33
+ )
28
34
  prompt_tokens: int = Field(
29
35
  description="The average number of text tokens generated for prompts.",
30
36
  gt=0,
@@ -163,6 +169,10 @@ class SyntheticTextItemsGenerator(
163
169
  )
164
170
  # ensure diff distribution from output tokens
165
171
  rand = random.Random(self.random_seed + 2) # noqa: S311
172
+ unique_prefix_iter = cycle(self.processor.get_vocab().values())
173
+
174
+ prefix_index = rand.randint(0, len(self.text_creator.words))
175
+ prefix_tokens = self._create_prompt(self.config.prefix_tokens, prefix_index)
166
176
 
167
177
  for _, prompt_tokens, output_tokens in zip(
168
178
  range(self.config.samples),
@@ -170,37 +180,52 @@ class SyntheticTextItemsGenerator(
170
180
  output_tokens_sampler,
171
181
  ):
172
182
  start_index = rand.randint(0, len(self.text_creator.words))
183
+ prompt_text = self.processor.decode(
184
+ prefix_tokens
185
+ + self._create_prompt(
186
+ prompt_tokens, start_index, next(unique_prefix_iter)
187
+ ),
188
+ skip_special_tokens=True,
189
+ )
173
190
  yield {
174
- "prompt": self._create_prompt(prompt_tokens, start_index),
175
- "prompt_tokens_count": prompt_tokens,
191
+ "prompt": prompt_text,
192
+ "prompt_tokens_count": self.config.prefix_tokens + prompt_tokens,
176
193
  "output_tokens_count": output_tokens,
177
194
  }
178
195
 
179
- def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
196
+ def _create_prompt(
197
+ self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
198
+ ) -> list[int]:
180
199
  if prompt_tokens <= 0:
181
- return ""
200
+ return []
182
201
 
183
202
  left = start_index
184
203
  right = start_index + 4 * prompt_tokens
204
+ start_tokens = [unique_prefix] if unique_prefix else []
185
205
 
186
206
  while left < right:
187
207
  mid = (left + right) // 2
188
208
  test_prompt = self.text_creator.create_text(start_index, mid - start_index)
189
- test_tokens = len(self.processor.tokenize(test_prompt))
209
+ test_tokens = start_tokens + self.processor.encode(test_prompt)
190
210
 
191
- if test_tokens == prompt_tokens:
192
- return test_prompt
193
- elif test_tokens < prompt_tokens:
211
+ if len(test_tokens) == prompt_tokens:
212
+ return test_tokens
213
+ elif len(test_tokens) < prompt_tokens:
194
214
  left = mid + 1
195
215
  else:
196
216
  right = mid
197
217
 
198
- return self.text_creator.create_text(start_index, left - start_index)
218
+ final_text = self.text_creator.create_text(start_index, left - start_index)
219
+ return start_tokens + self.processor.encode(final_text)
199
220
 
200
221
 
201
222
  class SyntheticDatasetCreator(DatasetCreator):
202
223
  @classmethod
203
- def is_supported(cls, data: Any, data_args: Optional[dict[str, Any]]) -> bool: # noqa: ARG003
224
+ def is_supported(
225
+ cls,
226
+ data: Any,
227
+ data_args: Optional[dict[str, Any]], # noqa: ARG003
228
+ ) -> bool:
204
229
  if (
205
230
  isinstance(data, Path)
206
231
  and data.exists()
guidellm/logger.py CHANGED
@@ -9,13 +9,16 @@ Environment Variables:
9
9
  - GUIDELLM__LOGGING__DISABLED: Disable logging (default: false).
10
10
  - GUIDELLM__LOGGING__CLEAR_LOGGERS: Clear existing loggers
11
11
  from loguru (default: true).
12
- - GUIDELLM__LOGGING__LOG_LEVEL: Log level for console logging
12
+ - GUIDELLM__LOGGING__CONSOLE_LOG_LEVEL: Log level for console logging
13
13
  (default: none, options: DEBUG, INFO, WARNING, ERROR, CRITICAL).
14
- - GUIDELLM__LOGGING__FILE: Path to the log file for file logging
14
+ - GUIDELLM__LOGGING__LOG_FILE: Path to the log file for file logging
15
15
  (default: guidellm.log if log file level set else none)
16
- - GUIDELLM__LOGGING__FILE_LEVEL: Log level for file logging
16
+ - GUIDELLM__LOGGING__LOG_FILE_LEVEL: Log level for file logging
17
17
  (default: INFO if log file set else none).
18
18
 
19
+ If logging isn't responding to the environment variables, run the `guidellm config`
20
+ command to validate that the environment variables match and are being set correctly.
21
+
19
22
  Usage:
20
23
  from guidellm import logger, configure_logger, LoggerConfig
21
24
 
@@ -68,7 +71,8 @@ def configure_logger(config: LoggingSettings = settings.logging):
68
71
  logger.add(
69
72
  sys.stdout,
70
73
  level=config.console_log_level.upper(),
71
- format="{time} | {function} | {level} - {message}",
74
+ format="<green>{time:YY-MM-DD HH:mm:ss}</green>|<level>{level: <8}</level> \
75
+ |<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
72
76
  )
73
77
 
74
78
  if config.log_file or config.log_file_level:
@@ -8,11 +8,11 @@ from .statistics import (
8
8
  )
9
9
 
10
10
  __all__ = [
11
- "StandardBaseModel",
12
- "StatusBreakdown",
13
11
  "DistributionSummary",
14
12
  "Percentiles",
15
13
  "RunningStats",
14
+ "StandardBaseModel",
15
+ "StatusBreakdown",
16
16
  "StatusDistributionSummary",
17
17
  "TimeRunningStats",
18
18
  ]
@@ -1,10 +1,15 @@
1
- from typing import Any, Generic, TypeVar
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, Generic, Optional, TypeVar
2
4
 
5
+ import yaml
3
6
  from loguru import logger
4
7
  from pydantic import BaseModel, ConfigDict, Field
5
8
 
6
9
  __all__ = ["StandardBaseModel", "StatusBreakdown"]
7
10
 
11
+ T = TypeVar("T", bound="StandardBaseModel")
12
+
8
13
 
9
14
  class StandardBaseModel(BaseModel):
10
15
  """
@@ -27,6 +32,30 @@ class StandardBaseModel(BaseModel):
27
32
  data,
28
33
  )
29
34
 
35
+ @classmethod
36
+ def get_default(cls: type[T], field: str) -> Any:
37
+ """Get default values for model fields"""
38
+ return cls.model_fields[field].default
39
+
40
+ @classmethod
41
+ def from_file(cls: type[T], filename: Path, overrides: Optional[dict] = None) -> T:
42
+ """
43
+ Attempt to create a new instance of the model using
44
+ data loaded from json or yaml file.
45
+ """
46
+ try:
47
+ with filename.open() as f:
48
+ if str(filename).endswith(".json"):
49
+ data = json.load(f)
50
+ else: # Assume everything else is yaml
51
+ data = yaml.safe_load(f)
52
+ except (json.JSONDecodeError, yaml.YAMLError) as e:
53
+ logger.error(f"Failed to parse {filename} as type {cls.__name__}")
54
+ raise ValueError(f"Error when parsing file: {filename}") from e
55
+
56
+ data.update(overrides)
57
+ return cls.model_validate(data)
58
+
30
59
 
31
60
  SuccessfulT = TypeVar("SuccessfulT")
32
61
  ErroredT = TypeVar("ErroredT")
@@ -9,10 +9,10 @@ from pydantic import Field, computed_field
9
9
  from guidellm.objects.pydantic import StandardBaseModel, StatusBreakdown
10
10
 
11
11
  __all__ = [
12
- "Percentiles",
13
12
  "DistributionSummary",
14
- "StatusDistributionSummary",
13
+ "Percentiles",
15
14
  "RunningStats",
15
+ "StatusDistributionSummary",
16
16
  "TimeRunningStats",
17
17
  ]
18
18
 
@@ -37,6 +37,9 @@ class Percentiles(StandardBaseModel):
37
37
  p25: float = Field(
38
38
  description="The 25th percentile of the distribution.",
39
39
  )
40
+ p50: float = Field(
41
+ description="The 50th percentile of the distribution.",
42
+ )
40
43
  p75: float = Field(
41
44
  description="The 75th percentile of the distribution.",
42
45
  )
@@ -159,6 +162,7 @@ class DistributionSummary(StandardBaseModel):
159
162
  p05=cdf[np.argmax(cdf[:, 1] >= 0.05), 0].item(), # noqa: PLR2004
160
163
  p10=cdf[np.argmax(cdf[:, 1] >= 0.1), 0].item(), # noqa: PLR2004
161
164
  p25=cdf[np.argmax(cdf[:, 1] >= 0.25), 0].item(), # noqa: PLR2004
165
+ p50=cdf[np.argmax(cdf[:, 1] >= 0.50), 0].item(), # noqa: PLR2004
162
166
  p75=cdf[np.argmax(cdf[:, 1] >= 0.75), 0].item(), # noqa: PLR2004
163
167
  p90=cdf[np.argmax(cdf[:, 1] >= 0.9), 0].item(), # noqa: PLR2004
164
168
  p95=cdf[np.argmax(cdf[:, 1] >= 0.95), 0].item(), # noqa: PLR2004
@@ -172,6 +176,7 @@ class DistributionSummary(StandardBaseModel):
172
176
  p05=0,
173
177
  p10=0,
174
178
  p25=0,
179
+ p50=0,
175
180
  p75=0,
176
181
  p90=0,
177
182
  p95=0,
@@ -238,18 +243,9 @@ class DistributionSummary(StandardBaseModel):
238
243
  """
239
244
  if distribution_type == "concurrency":
240
245
  # convert to delta changes based on when requests were running
241
- time_deltas: dict[float, int] = defaultdict(int)
242
- for start, end in requests:
243
- time_deltas[start] += 1
244
- time_deltas[end] -= 1
245
-
246
- # convert to the events over time measuring concurrency changes
247
- events = []
248
- active = 0
249
-
250
- for time, delta in sorted(time_deltas.items()):
251
- active += delta
252
- events.append((time, active))
246
+ events = [(start, 1) for start, _ in requests] + [
247
+ (end, -1) for _, end in requests
248
+ ]
253
249
  elif distribution_type == "rate":
254
250
  # convert to events for when requests finished
255
251
  global_start = min(start for start, _ in requests) if requests else 0
@@ -276,6 +272,16 @@ class DistributionSummary(StandardBaseModel):
276
272
  else:
277
273
  flattened_events.append((time, val))
278
274
 
275
+ if distribution_type == "concurrency":
276
+ # convert to the events over time measuring concurrency changes
277
+ events_over_time: list[tuple[float, float]] = []
278
+ active = 0
279
+ for time, delta in flattened_events:
280
+ active += delta # type: ignore [assignment]
281
+ events_over_time.append((time, active))
282
+
283
+ flattened_events = events_over_time
284
+
279
285
  # convert to value distribution function
280
286
  distribution: dict[float, float] = defaultdict(float)
281
287
 
@@ -0,0 +1,3 @@
1
+ from .dataset import ShortPromptStrategy, process_dataset
2
+
3
+ __all__ = ["ShortPromptStrategy", "process_dataset"]