guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Benchmark data models and metrics for generative AI performance measurement.
|
|
3
|
+
|
|
4
|
+
Provides comprehensive data structures for capturing, storing, and analyzing
|
|
5
|
+
benchmark results from scheduler-driven generative AI workload executions.
|
|
6
|
+
Core abstractions include base benchmark interfaces, generative-specific
|
|
7
|
+
metrics with token/latency distributions, request-level statistics tracking,
|
|
8
|
+
and multi-benchmark reporting capabilities. These models enable detailed
|
|
9
|
+
performance analysis including throughput, latency, concurrency patterns, and
|
|
10
|
+
domain-specific metrics for text, image, video, and audio generation tasks.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Literal
|
|
16
|
+
|
|
17
|
+
from pydantic import Field, computed_field
|
|
18
|
+
|
|
19
|
+
from guidellm.benchmark.schemas.base import Benchmark, BenchmarkConfig
|
|
20
|
+
from guidellm.benchmark.schemas.generative.accumulator import (
|
|
21
|
+
GenerativeBenchmarkAccumulator,
|
|
22
|
+
)
|
|
23
|
+
from guidellm.benchmark.schemas.generative.metrics import (
|
|
24
|
+
GenerativeMetrics,
|
|
25
|
+
SchedulerMetrics,
|
|
26
|
+
)
|
|
27
|
+
from guidellm.scheduler import SchedulerState
|
|
28
|
+
from guidellm.schemas import (
|
|
29
|
+
GenerativeRequestStats,
|
|
30
|
+
StatusBreakdown,
|
|
31
|
+
StatusDistributionSummary,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__all__ = ["GenerativeBenchmark"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GenerativeBenchmark(Benchmark[GenerativeBenchmarkAccumulator]):
|
|
38
|
+
"""
|
|
39
|
+
Complete generative AI benchmark results with specialized metrics.
|
|
40
|
+
|
|
41
|
+
Encapsulates comprehensive performance data from scheduler-driven generative
|
|
42
|
+
workload executions including request-level statistics, token/latency distributions,
|
|
43
|
+
throughput analysis, and concurrency patterns. Provides computed fields for temporal
|
|
44
|
+
analysis and status-grouped request details for detailed post-execution reporting.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
type_: Literal["generative_benchmark"] = "generative_benchmark" # type: ignore[assignment]
|
|
48
|
+
|
|
49
|
+
config: BenchmarkConfig = Field(
|
|
50
|
+
description="Configuration parameters for this benchmark execution",
|
|
51
|
+
)
|
|
52
|
+
scheduler_state: SchedulerState = Field(
|
|
53
|
+
description="Final state of the scheduler after benchmark completion",
|
|
54
|
+
)
|
|
55
|
+
scheduler_metrics: SchedulerMetrics = Field(
|
|
56
|
+
description="Scheduler timing and performance statistics",
|
|
57
|
+
)
|
|
58
|
+
metrics: GenerativeMetrics = Field(
|
|
59
|
+
description="Performance metrics and statistical distributions",
|
|
60
|
+
)
|
|
61
|
+
requests: StatusBreakdown[
|
|
62
|
+
list[GenerativeRequestStats],
|
|
63
|
+
list[GenerativeRequestStats],
|
|
64
|
+
list[GenerativeRequestStats],
|
|
65
|
+
None,
|
|
66
|
+
] = Field(
|
|
67
|
+
description=(
|
|
68
|
+
"Request details grouped by status: successful, incomplete, errored"
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@computed_field # type: ignore[prop-decorator]
|
|
73
|
+
@property
|
|
74
|
+
def start_time(self) -> float:
|
|
75
|
+
"""
|
|
76
|
+
:return: Benchmark start time in seconds since epoch
|
|
77
|
+
"""
|
|
78
|
+
return self.scheduler_metrics.measure_start_time
|
|
79
|
+
|
|
80
|
+
@computed_field # type: ignore[prop-decorator]
|
|
81
|
+
@property
|
|
82
|
+
def end_time(self) -> float:
|
|
83
|
+
"""
|
|
84
|
+
:return: Benchmark end time in seconds since epoch
|
|
85
|
+
"""
|
|
86
|
+
return self.scheduler_metrics.measure_end_time
|
|
87
|
+
|
|
88
|
+
@computed_field # type: ignore[prop-decorator]
|
|
89
|
+
@property
|
|
90
|
+
def duration(self) -> float:
|
|
91
|
+
"""
|
|
92
|
+
:return: Total benchmark execution duration in seconds
|
|
93
|
+
"""
|
|
94
|
+
return self.end_time - self.start_time
|
|
95
|
+
|
|
96
|
+
@computed_field # type: ignore[prop-decorator]
|
|
97
|
+
@property
|
|
98
|
+
def warmup_duration(self) -> float:
|
|
99
|
+
"""
|
|
100
|
+
:return: Warmup phase duration in seconds
|
|
101
|
+
"""
|
|
102
|
+
return (
|
|
103
|
+
self.scheduler_metrics.measure_start_time
|
|
104
|
+
- self.scheduler_metrics.request_start_time
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
@computed_field # type: ignore[prop-decorator]
|
|
108
|
+
@property
|
|
109
|
+
def cooldown_duration(self) -> float:
|
|
110
|
+
"""
|
|
111
|
+
:return: Cooldown phase duration in seconds
|
|
112
|
+
"""
|
|
113
|
+
return (
|
|
114
|
+
self.scheduler_metrics.request_end_time
|
|
115
|
+
- self.scheduler_metrics.measure_end_time
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def request_latency(self) -> StatusDistributionSummary:
|
|
120
|
+
"""
|
|
121
|
+
:return: Statistical distribution of request latencies across all requests
|
|
122
|
+
"""
|
|
123
|
+
return self.metrics.request_latency
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def request_throughput(self) -> StatusDistributionSummary:
|
|
127
|
+
"""
|
|
128
|
+
:return: Statistical distribution of throughput measured in requests per second
|
|
129
|
+
"""
|
|
130
|
+
return self.metrics.requests_per_second
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def request_concurrency(self) -> StatusDistributionSummary:
|
|
134
|
+
"""
|
|
135
|
+
:return: Statistical distribution of concurrent requests throughout execution
|
|
136
|
+
"""
|
|
137
|
+
return self.metrics.request_concurrency
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def compile(
|
|
141
|
+
cls,
|
|
142
|
+
accumulator: GenerativeBenchmarkAccumulator,
|
|
143
|
+
scheduler_state: SchedulerState,
|
|
144
|
+
) -> GenerativeBenchmark:
|
|
145
|
+
"""
|
|
146
|
+
Compile final benchmark results from accumulated execution state.
|
|
147
|
+
|
|
148
|
+
:param accumulator: Accumulated benchmark state with request statistics
|
|
149
|
+
:param scheduler_state: Final scheduler state after execution completion
|
|
150
|
+
:return: Compiled generative benchmark instance with complete metrics
|
|
151
|
+
"""
|
|
152
|
+
return GenerativeBenchmark(
|
|
153
|
+
config=accumulator.config,
|
|
154
|
+
scheduler_state=scheduler_state,
|
|
155
|
+
scheduler_metrics=SchedulerMetrics.compile(accumulator, scheduler_state),
|
|
156
|
+
metrics=GenerativeMetrics.compile(accumulator),
|
|
157
|
+
requests=StatusBreakdown(
|
|
158
|
+
successful=accumulator.completed.get_sampled(),
|
|
159
|
+
incomplete=accumulator.incomplete.get_sampled(),
|
|
160
|
+
errored=accumulator.errored.get_sampled(),
|
|
161
|
+
total=None,
|
|
162
|
+
),
|
|
163
|
+
)
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration entrypoints for generative text benchmark execution.
|
|
3
|
+
|
|
4
|
+
Defines parameter schemas and construction logic for creating benchmark runs from
|
|
5
|
+
scenario files or runtime arguments. Provides flexible configuration loading with
|
|
6
|
+
support for built-in scenarios, custom YAML/JSON files, and programmatic overrides.
|
|
7
|
+
Handles serialization of complex types including backends, processors, and profiles
|
|
8
|
+
for persistent storage and reproduction of benchmark configurations.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import inspect
|
|
14
|
+
import json
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Literal
|
|
18
|
+
|
|
19
|
+
import yaml
|
|
20
|
+
from pydantic import (
|
|
21
|
+
AliasChoices,
|
|
22
|
+
AliasGenerator,
|
|
23
|
+
ConfigDict,
|
|
24
|
+
Field,
|
|
25
|
+
NonNegativeFloat,
|
|
26
|
+
ValidationError,
|
|
27
|
+
ValidatorFunctionWrapHandler,
|
|
28
|
+
field_serializer,
|
|
29
|
+
field_validator,
|
|
30
|
+
)
|
|
31
|
+
from torch.utils.data import Sampler
|
|
32
|
+
from transformers import PreTrainedTokenizerBase
|
|
33
|
+
|
|
34
|
+
from guidellm.backends import Backend, BackendType
|
|
35
|
+
from guidellm.benchmark.profiles import Profile, ProfileType
|
|
36
|
+
from guidellm.benchmark.scenarios import get_builtin_scenarios
|
|
37
|
+
from guidellm.benchmark.schemas.base import TransientPhaseConfig
|
|
38
|
+
from guidellm.data import DatasetPreprocessor, RequestFormatter
|
|
39
|
+
from guidellm.scheduler import StrategyType
|
|
40
|
+
from guidellm.schemas import StandardBaseModel
|
|
41
|
+
|
|
42
|
+
__all__ = ["BenchmarkGenerativeTextArgs"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class BenchmarkGenerativeTextArgs(StandardBaseModel):
|
|
46
|
+
"""
|
|
47
|
+
Configuration arguments for generative text benchmark execution.
|
|
48
|
+
|
|
49
|
+
Defines all parameters for benchmark setup including target endpoint, data
|
|
50
|
+
sources, backend configuration, processing pipeline, output formatting, and
|
|
51
|
+
execution constraints. Supports loading from scenario files and merging with
|
|
52
|
+
runtime overrides for flexible benchmark construction from multiple sources.
|
|
53
|
+
|
|
54
|
+
Example::
|
|
55
|
+
|
|
56
|
+
# Load from built-in scenario with overrides
|
|
57
|
+
args = BenchmarkGenerativeTextArgs.create(
|
|
58
|
+
scenario="chat",
|
|
59
|
+
target="http://localhost:8000/v1",
|
|
60
|
+
max_requests=1000
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Create from keyword arguments only
|
|
64
|
+
args = BenchmarkGenerativeTextArgs(
|
|
65
|
+
target="http://localhost:8000/v1",
|
|
66
|
+
data=["path/to/dataset.json"],
|
|
67
|
+
profile="fixed",
|
|
68
|
+
rate=10.0
|
|
69
|
+
)
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def create(
|
|
74
|
+
cls, scenario: Path | str | None, **kwargs: dict[str, Any]
|
|
75
|
+
) -> BenchmarkGenerativeTextArgs:
|
|
76
|
+
"""
|
|
77
|
+
Create benchmark args from scenario file and keyword arguments.
|
|
78
|
+
|
|
79
|
+
Loads base configuration from scenario file (built-in or custom) and merges
|
|
80
|
+
with provided keyword arguments. Arguments explicitly set via kwargs override
|
|
81
|
+
scenario values, while defaulted kwargs are ignored to preserve scenario
|
|
82
|
+
settings.
|
|
83
|
+
|
|
84
|
+
:param scenario: Path to scenario file, built-in scenario name, or None
|
|
85
|
+
:param kwargs: Keyword arguments to override scenario values
|
|
86
|
+
:return: Configured benchmark args instance
|
|
87
|
+
:raises ValueError: If scenario is not found or file format is unsupported
|
|
88
|
+
"""
|
|
89
|
+
constructor_kwargs = {}
|
|
90
|
+
|
|
91
|
+
if scenario is not None:
|
|
92
|
+
if isinstance(scenario, str) and scenario in (
|
|
93
|
+
builtin_scenarios := get_builtin_scenarios()
|
|
94
|
+
):
|
|
95
|
+
scenario_path = builtin_scenarios[scenario]
|
|
96
|
+
elif Path(scenario).exists() and Path(scenario).is_file():
|
|
97
|
+
scenario_path = Path(scenario)
|
|
98
|
+
else:
|
|
99
|
+
raise ValueError(f"Scenario '{scenario}' not found.")
|
|
100
|
+
|
|
101
|
+
with scenario_path.open() as file:
|
|
102
|
+
if scenario_path.suffix == ".json":
|
|
103
|
+
scenario_data = json.load(file)
|
|
104
|
+
elif scenario_path.suffix in {".yaml", ".yml"}:
|
|
105
|
+
scenario_data = yaml.safe_load(file)
|
|
106
|
+
else:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"Unsupported scenario file format: {scenario_path.suffix}"
|
|
109
|
+
)
|
|
110
|
+
if "args" in scenario_data:
|
|
111
|
+
# loading from a report file
|
|
112
|
+
scenario_data = scenario_data["args"]
|
|
113
|
+
constructor_kwargs.update(scenario_data)
|
|
114
|
+
|
|
115
|
+
# Apply overrides from kwargs
|
|
116
|
+
constructor_kwargs.update(kwargs)
|
|
117
|
+
|
|
118
|
+
return cls.model_validate(constructor_kwargs)
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
|
|
122
|
+
"""
|
|
123
|
+
Retrieve default value for a model field.
|
|
124
|
+
|
|
125
|
+
Extracts the default value from field metadata, handling both static defaults
|
|
126
|
+
and factory functions.
|
|
127
|
+
|
|
128
|
+
:param field: Field name to retrieve default value for
|
|
129
|
+
:return: Default value for the field
|
|
130
|
+
:raises ValueError: If field does not exist
|
|
131
|
+
"""
|
|
132
|
+
if field not in cls.model_fields:
|
|
133
|
+
raise ValueError(f"Field '{field}' not found in {cls.__name__}")
|
|
134
|
+
|
|
135
|
+
field_info = cls.model_fields[field]
|
|
136
|
+
factory = field_info.default_factory
|
|
137
|
+
|
|
138
|
+
if factory is None:
|
|
139
|
+
return field_info.default
|
|
140
|
+
|
|
141
|
+
if len(inspect.signature(factory).parameters) == 0:
|
|
142
|
+
return factory() # type: ignore[call-arg]
|
|
143
|
+
else:
|
|
144
|
+
return factory({}) # type: ignore[call-arg]
|
|
145
|
+
|
|
146
|
+
model_config = ConfigDict(
|
|
147
|
+
extra="ignore",
|
|
148
|
+
use_enum_values=True,
|
|
149
|
+
from_attributes=True,
|
|
150
|
+
arbitrary_types_allowed=True,
|
|
151
|
+
validate_by_alias=True,
|
|
152
|
+
validate_by_name=True,
|
|
153
|
+
alias_generator=AliasGenerator(
|
|
154
|
+
# Support field names with hyphens
|
|
155
|
+
validation_alias=lambda field_name: AliasChoices(
|
|
156
|
+
field_name, field_name.replace("_", "-")
|
|
157
|
+
),
|
|
158
|
+
),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Required
|
|
162
|
+
target: str = Field(description="Target endpoint URL for benchmark execution")
|
|
163
|
+
data: list[Any] = Field(
|
|
164
|
+
description="List of dataset sources or data files",
|
|
165
|
+
default_factory=list,
|
|
166
|
+
min_length=1,
|
|
167
|
+
)
|
|
168
|
+
# Benchmark configuration
|
|
169
|
+
profile: StrategyType | ProfileType | Profile = Field(
|
|
170
|
+
default="sweep", description="Benchmark profile or scheduling strategy type"
|
|
171
|
+
)
|
|
172
|
+
rate: list[float] | None = Field(
|
|
173
|
+
default=None, description="Request rate(s) for rate-based scheduling"
|
|
174
|
+
)
|
|
175
|
+
# Backend configuration
|
|
176
|
+
backend: BackendType | Backend = Field(
|
|
177
|
+
default="openai_http", description="Backend type or instance for execution"
|
|
178
|
+
)
|
|
179
|
+
backend_kwargs: dict[str, Any] | None = Field(
|
|
180
|
+
default=None, description="Additional backend configuration arguments"
|
|
181
|
+
)
|
|
182
|
+
model: str | None = Field(default=None, description="Model identifier for backend")
|
|
183
|
+
# Data configuration
|
|
184
|
+
processor: str | Path | PreTrainedTokenizerBase | None = Field(
|
|
185
|
+
default=None, description="Tokenizer path, name, or instance for processing"
|
|
186
|
+
)
|
|
187
|
+
processor_args: dict[str, Any] | None = Field(
|
|
188
|
+
default=None, description="Additional tokenizer configuration arguments"
|
|
189
|
+
)
|
|
190
|
+
data_args: list[dict[str, Any]] | None = Field(
|
|
191
|
+
default_factory=list, # type: ignore[arg-type]
|
|
192
|
+
description="Per-dataset configuration arguments",
|
|
193
|
+
)
|
|
194
|
+
data_samples: int = Field(
|
|
195
|
+
default=-1, description="Number of samples to use from datasets (-1 for all)"
|
|
196
|
+
)
|
|
197
|
+
data_column_mapper: (
|
|
198
|
+
DatasetPreprocessor
|
|
199
|
+
| dict[str, str | list[str]]
|
|
200
|
+
| Literal["generative_column_mapper"]
|
|
201
|
+
) = Field(
|
|
202
|
+
default="generative_column_mapper",
|
|
203
|
+
description="Column mapping preprocessor for dataset fields",
|
|
204
|
+
)
|
|
205
|
+
data_request_formatter: RequestFormatter | dict[str, Any] | str = Field(
|
|
206
|
+
default="chat_completions",
|
|
207
|
+
description="Request formatting preprocessor or template name",
|
|
208
|
+
validation_alias=AliasChoices(
|
|
209
|
+
"data_request_formatter",
|
|
210
|
+
"data-request-formatter",
|
|
211
|
+
"request_type",
|
|
212
|
+
"request-type",
|
|
213
|
+
),
|
|
214
|
+
)
|
|
215
|
+
data_collator: Callable | Literal["generative"] | None = Field(
|
|
216
|
+
default="generative", description="Data collator for batch processing"
|
|
217
|
+
)
|
|
218
|
+
data_sampler: Sampler[int] | Literal["shuffle"] | None = Field(
|
|
219
|
+
default=None, description="Data sampler for request ordering"
|
|
220
|
+
)
|
|
221
|
+
data_num_workers: int | None = Field(
|
|
222
|
+
default=1, description="Number of workers for data loading"
|
|
223
|
+
)
|
|
224
|
+
dataloader_kwargs: dict[str, Any] | None = Field(
|
|
225
|
+
default=None, description="Additional dataloader configuration arguments"
|
|
226
|
+
)
|
|
227
|
+
random_seed: int = Field(default=42, description="Random seed for reproducibility")
|
|
228
|
+
# Output configuration
|
|
229
|
+
outputs: list[str] | tuple[str] = Field(
|
|
230
|
+
default_factory=lambda: ["json", "csv", "html"],
|
|
231
|
+
description=(
|
|
232
|
+
"The aliases of the output types to create with their default filenames "
|
|
233
|
+
"the file names and extensions of the output types to create"
|
|
234
|
+
),
|
|
235
|
+
)
|
|
236
|
+
output_dir: str | Path = Field(
|
|
237
|
+
default_factory=Path.cwd,
|
|
238
|
+
description="The directory path to save file output types in",
|
|
239
|
+
)
|
|
240
|
+
# Benchmarker configuration
|
|
241
|
+
sample_requests: int | None = Field(
|
|
242
|
+
default=10,
|
|
243
|
+
description="Number of requests to sample for detailed metrics (None for all)",
|
|
244
|
+
)
|
|
245
|
+
warmup: int | float | dict | TransientPhaseConfig | None = Field(
|
|
246
|
+
default=None,
|
|
247
|
+
description=(
|
|
248
|
+
"Warmup phase config: time or requests before measurement starts "
|
|
249
|
+
"(overlapping requests count toward measurement)"
|
|
250
|
+
),
|
|
251
|
+
)
|
|
252
|
+
cooldown: int | float | dict | TransientPhaseConfig | None = Field(
|
|
253
|
+
default=None,
|
|
254
|
+
description=(
|
|
255
|
+
"Cooldown phase config: time or requests after measurement ends "
|
|
256
|
+
"(overlapping requests count toward measurement)"
|
|
257
|
+
),
|
|
258
|
+
)
|
|
259
|
+
rampup: NonNegativeFloat = Field(
|
|
260
|
+
default=0.0,
|
|
261
|
+
description=(
|
|
262
|
+
"The time, in seconds, to ramp up the request rate over. "
|
|
263
|
+
"Only applicable for Throughput/Concurrent strategies"
|
|
264
|
+
),
|
|
265
|
+
)
|
|
266
|
+
prefer_response_metrics: bool = Field(
|
|
267
|
+
default=True,
|
|
268
|
+
description="Whether to prefer backend response metrics over request metrics",
|
|
269
|
+
)
|
|
270
|
+
# Constraints configuration
|
|
271
|
+
max_seconds: int | float | None = Field(
|
|
272
|
+
default=None, description="Maximum benchmark execution time in seconds"
|
|
273
|
+
)
|
|
274
|
+
max_requests: int | None = Field(
|
|
275
|
+
default=None, description="Maximum number of requests to execute"
|
|
276
|
+
)
|
|
277
|
+
max_errors: int | None = Field(
|
|
278
|
+
default=None, description="Maximum number of errors before stopping"
|
|
279
|
+
)
|
|
280
|
+
max_error_rate: float | None = Field(
|
|
281
|
+
default=None, description="Maximum error rate (0-1) before stopping"
|
|
282
|
+
)
|
|
283
|
+
max_global_error_rate: float | None = Field(
|
|
284
|
+
default=None, description="Maximum global error rate (0-1) before stopping"
|
|
285
|
+
)
|
|
286
|
+
over_saturation: dict[str, Any] | None = Field(
|
|
287
|
+
default=None,
|
|
288
|
+
description=(
|
|
289
|
+
"Over-saturation detection configuration. A dict with configuration "
|
|
290
|
+
"parameters (enabled, min_seconds, max_window_seconds, "
|
|
291
|
+
"moe_threshold, etc.)."
|
|
292
|
+
),
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
@field_validator("data", "data_args", "rate", mode="wrap")
|
|
296
|
+
@classmethod
|
|
297
|
+
def single_to_list(
|
|
298
|
+
cls, value: Any, handler: ValidatorFunctionWrapHandler
|
|
299
|
+
) -> list[Any]:
|
|
300
|
+
"""
|
|
301
|
+
Ensures field is always a list.
|
|
302
|
+
|
|
303
|
+
:param value: Input value for the 'data' field
|
|
304
|
+
:return: List of data sources
|
|
305
|
+
"""
|
|
306
|
+
try:
|
|
307
|
+
return handler(value)
|
|
308
|
+
except ValidationError as err:
|
|
309
|
+
# If validation fails, try wrapping the value in a list
|
|
310
|
+
if err.errors()[0]["type"] == "list_type":
|
|
311
|
+
return handler([value])
|
|
312
|
+
else:
|
|
313
|
+
raise
|
|
314
|
+
|
|
315
|
+
@field_serializer("backend")
|
|
316
|
+
def serialize_backend(self, backend: BackendType | Backend) -> str:
|
|
317
|
+
"""Serialize backend to type string."""
|
|
318
|
+
return backend.type_ if isinstance(backend, Backend) else backend
|
|
319
|
+
|
|
320
|
+
@field_serializer("data")
|
|
321
|
+
def serialize_data(self, data: list[Any]) -> list[str | None]:
|
|
322
|
+
"""Serialize data items to strings."""
|
|
323
|
+
return [
|
|
324
|
+
item if isinstance(item, str | type(None)) else str(item) for item in data
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
@field_serializer("data_collator")
|
|
328
|
+
def serialize_data_collator(
|
|
329
|
+
self, data_collator: Callable | Literal["generative"] | None
|
|
330
|
+
) -> str | None:
|
|
331
|
+
"""Serialize data_collator to string or None."""
|
|
332
|
+
return data_collator if isinstance(data_collator, str) else None
|
|
333
|
+
|
|
334
|
+
@field_serializer("data_column_mapper")
|
|
335
|
+
def serialize_data_column_mapper(
|
|
336
|
+
self,
|
|
337
|
+
data_column_mapper: (
|
|
338
|
+
DatasetPreprocessor
|
|
339
|
+
| dict[str, str | list[str]]
|
|
340
|
+
| Literal["generative_column_mapper"]
|
|
341
|
+
),
|
|
342
|
+
) -> dict | str:
|
|
343
|
+
"""Serialize data_column_mapper to dict or string."""
|
|
344
|
+
return data_column_mapper if isinstance(data_column_mapper, dict | str) else {}
|
|
345
|
+
|
|
346
|
+
@field_serializer("data_request_formatter")
|
|
347
|
+
def serialize_data_request_formatter(
|
|
348
|
+
self, data_request_formatter: RequestFormatter | dict[str, Any] | str
|
|
349
|
+
) -> dict | str:
|
|
350
|
+
"""Serialize data_request_formatter to dict or string."""
|
|
351
|
+
return (
|
|
352
|
+
data_request_formatter
|
|
353
|
+
if isinstance(data_request_formatter, dict | str)
|
|
354
|
+
else {}
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
@field_serializer("data_sampler")
|
|
358
|
+
def serialize_data_sampler(
|
|
359
|
+
self, data_sampler: Sampler[int] | Literal["shuffle"] | None
|
|
360
|
+
) -> str | None:
|
|
361
|
+
"""Serialize data_sampler to string or None."""
|
|
362
|
+
return data_sampler if isinstance(data_sampler, str) else None
|
|
363
|
+
|
|
364
|
+
@field_serializer("output_dir")
|
|
365
|
+
def serialize_output_dir(self, output_dir: str | Path) -> str | None:
|
|
366
|
+
"""Serialize output_dir to string."""
|
|
367
|
+
return str(output_dir) if output_dir is not None else None
|
|
368
|
+
|
|
369
|
+
@field_serializer("processor")
|
|
370
|
+
def serialize_processor(
|
|
371
|
+
self, processor: str | Path | PreTrainedTokenizerBase | None
|
|
372
|
+
) -> str | None:
|
|
373
|
+
"""Serialize processor to string."""
|
|
374
|
+
if processor is None:
|
|
375
|
+
return None
|
|
376
|
+
return processor if isinstance(processor, str) else str(processor)
|
|
377
|
+
|
|
378
|
+
@field_serializer("profile")
|
|
379
|
+
def serialize_profile(self, profile: StrategyType | ProfileType | Profile) -> str:
|
|
380
|
+
"""Serialize profile to type string."""
|
|
381
|
+
return profile.type_ if isinstance(profile, Profile) else profile
|