guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a18.dist-info/RECORD +0 -62
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
import random
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from math import ceil
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, computed_field
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
|
-
from guidellm.benchmark
|
|
9
|
+
from guidellm.benchmark import GenerativeBenchmark
|
|
10
10
|
|
|
11
|
-
from guidellm.
|
|
11
|
+
from guidellm.utils import DistributionSummary
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class Bucket(BaseModel):
|
|
15
|
-
value:
|
|
15
|
+
value: float | int
|
|
16
16
|
count: int
|
|
17
17
|
|
|
18
18
|
@staticmethod
|
|
19
19
|
def from_data(
|
|
20
|
-
data:
|
|
21
|
-
bucket_width:
|
|
22
|
-
n_buckets:
|
|
20
|
+
data: list[float] | list[int],
|
|
21
|
+
bucket_width: float | None = None,
|
|
22
|
+
n_buckets: int | None = None,
|
|
23
23
|
) -> tuple[list["Bucket"], float]:
|
|
24
24
|
if not data:
|
|
25
25
|
return [], 1.0
|
|
@@ -35,7 +35,7 @@ class Bucket(BaseModel):
|
|
|
35
35
|
else:
|
|
36
36
|
n_buckets = ceil(range_v / bucket_width)
|
|
37
37
|
|
|
38
|
-
bucket_counts: defaultdict[
|
|
38
|
+
bucket_counts: defaultdict[float | int, int] = defaultdict(int)
|
|
39
39
|
for val in data:
|
|
40
40
|
idx = int((val - min_v) // bucket_width)
|
|
41
41
|
if idx >= n_buckets:
|
|
@@ -67,12 +67,12 @@ class RunInfo(BaseModel):
|
|
|
67
67
|
|
|
68
68
|
@classmethod
|
|
69
69
|
def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
|
|
70
|
-
model = benchmarks[0].
|
|
70
|
+
model = benchmarks[0].benchmarker.backend.get("model", "N/A")
|
|
71
71
|
timestamp = max(
|
|
72
72
|
bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
|
|
73
73
|
)
|
|
74
74
|
return cls(
|
|
75
|
-
model=Model(name=model, size=0),
|
|
75
|
+
model=Model(name=model or "", size=0),
|
|
76
76
|
task="N/A",
|
|
77
77
|
timestamp=timestamp,
|
|
78
78
|
dataset=Dataset(name="N/A"),
|
|
@@ -80,7 +80,7 @@ class RunInfo(BaseModel):
|
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
class Distribution(BaseModel):
|
|
83
|
-
statistics:
|
|
83
|
+
statistics: DistributionSummary | None = None
|
|
84
84
|
buckets: list[Bucket]
|
|
85
85
|
bucket_width: float
|
|
86
86
|
|
|
@@ -108,8 +108,8 @@ class WorkloadDetails(BaseModel):
|
|
|
108
108
|
|
|
109
109
|
@classmethod
|
|
110
110
|
def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
|
|
111
|
-
target = benchmarks[0].
|
|
112
|
-
rate_type = benchmarks[0].
|
|
111
|
+
target = benchmarks[0].benchmarker.backend.get("target", "N/A")
|
|
112
|
+
rate_type = benchmarks[0].scheduler.strategy.type_
|
|
113
113
|
successful_requests = [
|
|
114
114
|
req for bm in benchmarks for req in bm.requests.successful
|
|
115
115
|
]
|
|
@@ -117,21 +117,25 @@ class WorkloadDetails(BaseModel):
|
|
|
117
117
|
range(len(successful_requests)), min(5, len(successful_requests))
|
|
118
118
|
)
|
|
119
119
|
sample_prompts = [
|
|
120
|
-
|
|
120
|
+
req.request_args.replace("\n", " ").replace('"', "'")
|
|
121
|
+
if (req := successful_requests[i]).request_args
|
|
122
|
+
else ""
|
|
121
123
|
for i in sample_indices
|
|
122
124
|
]
|
|
123
125
|
sample_outputs = [
|
|
124
|
-
|
|
126
|
+
req.output.replace("\n", " ").replace('"', "'")
|
|
127
|
+
if (req := successful_requests[i]).output
|
|
128
|
+
else ""
|
|
125
129
|
for i in sample_indices
|
|
126
130
|
]
|
|
127
131
|
|
|
128
132
|
prompt_tokens = [
|
|
129
|
-
float(req.prompt_tokens)
|
|
133
|
+
float(req.prompt_tokens) if req.prompt_tokens is not None else -1
|
|
130
134
|
for bm in benchmarks
|
|
131
135
|
for req in bm.requests.successful
|
|
132
136
|
]
|
|
133
137
|
output_tokens = [
|
|
134
|
-
float(req.output_tokens)
|
|
138
|
+
float(req.output_tokens) if req.output_tokens is not None else -1
|
|
135
139
|
for bm in benchmarks
|
|
136
140
|
for req in bm.requests.successful
|
|
137
141
|
]
|
|
@@ -152,13 +156,13 @@ class WorkloadDetails(BaseModel):
|
|
|
152
156
|
statistics=output_token_stats, buckets=output_token_buckets, bucket_width=1
|
|
153
157
|
)
|
|
154
158
|
|
|
155
|
-
min_start_time = benchmarks[0].
|
|
159
|
+
min_start_time = benchmarks[0].start_time
|
|
156
160
|
|
|
157
161
|
all_req_times = [
|
|
158
|
-
req.
|
|
162
|
+
req.info.timings.request_start - min_start_time
|
|
159
163
|
for bm in benchmarks
|
|
160
164
|
for req in bm.requests.successful
|
|
161
|
-
if req.
|
|
165
|
+
if req.info.timings.request_start is not None
|
|
162
166
|
]
|
|
163
167
|
number_of_buckets = len(benchmarks)
|
|
164
168
|
request_over_time_buckets, bucket_width = Bucket.from_data(
|
|
@@ -190,7 +194,7 @@ class TabularDistributionSummary(DistributionSummary):
|
|
|
190
194
|
"""
|
|
191
195
|
|
|
192
196
|
@computed_field
|
|
193
|
-
def percentile_rows(self) -> list[dict[str,
|
|
197
|
+
def percentile_rows(self) -> list[dict[str, str | float]]:
|
|
194
198
|
rows = [
|
|
195
199
|
{"percentile": name, "value": value}
|
|
196
200
|
for name, value in self.percentiles.model_dump().items()
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Union
|
|
4
3
|
|
|
5
4
|
from loguru import logger
|
|
6
5
|
|
|
7
|
-
from guidellm.
|
|
6
|
+
from guidellm.settings import settings
|
|
8
7
|
from guidellm.utils.text import load_text
|
|
9
8
|
|
|
10
9
|
|
|
11
|
-
def create_report(js_data: dict, output_path:
|
|
10
|
+
def create_report(js_data: dict, output_path: str | Path) -> Path:
|
|
12
11
|
"""
|
|
13
12
|
Creates a report from the dictionary and saves it to the output path.
|
|
14
13
|
|
guidellm/scheduler/__init__.py
CHANGED
|
@@ -1,47 +1,86 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
"""
|
|
2
|
+
Scheduler subsystem for orchestrating benchmark workloads and managing worker processes.
|
|
3
|
+
|
|
4
|
+
This module provides the core scheduling infrastructure for guidellm, including
|
|
5
|
+
strategies for controlling request timing patterns (synchronous, asynchronous,
|
|
6
|
+
constant rate, Poisson), constraints for limiting benchmark execution (duration,
|
|
7
|
+
error rates, request counts), and distributed execution through worker processes.
|
|
8
|
+
The scheduler coordinates between backend interfaces, manages benchmark state
|
|
9
|
+
transitions, and handles multi-turn request sequences with customizable timing
|
|
10
|
+
strategies and resource constraints.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .constraints import (
|
|
14
|
+
Constraint,
|
|
15
|
+
ConstraintInitializer,
|
|
16
|
+
ConstraintsInitializerFactory,
|
|
17
|
+
MaxDurationConstraint,
|
|
18
|
+
MaxErrorRateConstraint,
|
|
19
|
+
MaxErrorsConstraint,
|
|
20
|
+
MaxGlobalErrorRateConstraint,
|
|
21
|
+
MaxNumberConstraint,
|
|
22
|
+
PydanticConstraintInitializer,
|
|
23
|
+
SerializableConstraintInitializer,
|
|
24
|
+
UnserializableConstraintInitializer,
|
|
6
25
|
)
|
|
26
|
+
from .environments import Environment, NonDistributedEnvironment
|
|
7
27
|
from .scheduler import Scheduler
|
|
8
|
-
from .
|
|
28
|
+
from .schemas import (
|
|
29
|
+
BackendInterface,
|
|
30
|
+
BackendT,
|
|
31
|
+
MultiTurnRequestT,
|
|
32
|
+
RequestT,
|
|
33
|
+
ResponseT,
|
|
34
|
+
SchedulerMessagingPydanticRegistry,
|
|
35
|
+
SchedulerState,
|
|
36
|
+
SchedulerUpdateAction,
|
|
37
|
+
SchedulerUpdateActionProgress,
|
|
38
|
+
)
|
|
39
|
+
from .strategies import (
|
|
9
40
|
AsyncConstantStrategy,
|
|
10
41
|
AsyncPoissonStrategy,
|
|
11
42
|
ConcurrentStrategy,
|
|
12
43
|
SchedulingStrategy,
|
|
44
|
+
StrategyT,
|
|
13
45
|
StrategyType,
|
|
14
46
|
SynchronousStrategy,
|
|
15
47
|
ThroughputStrategy,
|
|
16
|
-
strategy_display_str,
|
|
17
|
-
)
|
|
18
|
-
from .worker import (
|
|
19
|
-
GenerativeRequestsWorker,
|
|
20
|
-
GenerativeRequestsWorkerDescription,
|
|
21
|
-
RequestsWorker,
|
|
22
|
-
ResolveStatus,
|
|
23
|
-
WorkerDescription,
|
|
24
|
-
WorkerProcessResult,
|
|
25
48
|
)
|
|
49
|
+
from .worker import WorkerProcess
|
|
50
|
+
from .worker_group import WorkerProcessGroup
|
|
26
51
|
|
|
27
52
|
__all__ = [
|
|
28
53
|
"AsyncConstantStrategy",
|
|
29
54
|
"AsyncPoissonStrategy",
|
|
55
|
+
"BackendInterface",
|
|
56
|
+
"BackendT",
|
|
30
57
|
"ConcurrentStrategy",
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
58
|
+
"Constraint",
|
|
59
|
+
"ConstraintInitializer",
|
|
60
|
+
"ConstraintsInitializerFactory",
|
|
61
|
+
"Environment",
|
|
62
|
+
"MaxDurationConstraint",
|
|
63
|
+
"MaxErrorRateConstraint",
|
|
64
|
+
"MaxErrorsConstraint",
|
|
65
|
+
"MaxGlobalErrorRateConstraint",
|
|
66
|
+
"MaxNumberConstraint",
|
|
67
|
+
"MultiTurnRequestT",
|
|
68
|
+
"NonDistributedEnvironment",
|
|
69
|
+
"PydanticConstraintInitializer",
|
|
70
|
+
"RequestT",
|
|
71
|
+
"ResponseT",
|
|
35
72
|
"Scheduler",
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
73
|
+
"SchedulerMessagingPydanticRegistry",
|
|
74
|
+
"SchedulerState",
|
|
75
|
+
"SchedulerUpdateAction",
|
|
76
|
+
"SchedulerUpdateActionProgress",
|
|
40
77
|
"SchedulingStrategy",
|
|
78
|
+
"SerializableConstraintInitializer",
|
|
79
|
+
"StrategyT",
|
|
41
80
|
"StrategyType",
|
|
42
81
|
"SynchronousStrategy",
|
|
43
82
|
"ThroughputStrategy",
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
"
|
|
83
|
+
"UnserializableConstraintInitializer",
|
|
84
|
+
"WorkerProcess",
|
|
85
|
+
"WorkerProcessGroup",
|
|
47
86
|
]
|