guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +524 -255
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +109 -0
- guidellm/backends/openai.py +340 -0
- guidellm/backends/response_handlers.py +428 -0
- guidellm/benchmark/__init__.py +69 -39
- guidellm/benchmark/benchmarker.py +160 -316
- guidellm/benchmark/entrypoints.py +560 -127
- guidellm/benchmark/outputs/__init__.py +24 -0
- guidellm/benchmark/outputs/console.py +633 -0
- guidellm/benchmark/outputs/csv.py +721 -0
- guidellm/benchmark/outputs/html.py +473 -0
- guidellm/benchmark/outputs/output.py +169 -0
- guidellm/benchmark/outputs/serialized.py +69 -0
- guidellm/benchmark/profiles.py +718 -0
- guidellm/benchmark/progress.py +553 -556
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas/__init__.py +66 -0
- guidellm/benchmark/schemas/base.py +402 -0
- guidellm/benchmark/schemas/generative/__init__.py +55 -0
- guidellm/benchmark/schemas/generative/accumulator.py +841 -0
- guidellm/benchmark/schemas/generative/benchmark.py +163 -0
- guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
- guidellm/benchmark/schemas/generative/metrics.py +927 -0
- guidellm/benchmark/schemas/generative/report.py +158 -0
- guidellm/data/__init__.py +34 -4
- guidellm/data/builders.py +541 -0
- guidellm/data/collators.py +16 -0
- guidellm/data/config.py +120 -0
- guidellm/data/deserializers/__init__.py +49 -0
- guidellm/data/deserializers/deserializer.py +141 -0
- guidellm/data/deserializers/file.py +223 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +246 -0
- guidellm/data/entrypoints.py +52 -0
- guidellm/data/loaders.py +190 -0
- guidellm/data/preprocessors/__init__.py +27 -0
- guidellm/data/preprocessors/formatters.py +410 -0
- guidellm/data/preprocessors/mappers.py +196 -0
- guidellm/data/preprocessors/preprocessor.py +30 -0
- guidellm/data/processor.py +29 -0
- guidellm/data/schemas.py +175 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +220 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +238 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/scheduler/__init__.py +69 -26
- guidellm/scheduler/constraints/__init__.py +49 -0
- guidellm/scheduler/constraints/constraint.py +325 -0
- guidellm/scheduler/constraints/error.py +411 -0
- guidellm/scheduler/constraints/factory.py +182 -0
- guidellm/scheduler/constraints/request.py +312 -0
- guidellm/scheduler/constraints/saturation.py +722 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +137 -368
- guidellm/scheduler/schemas.py +358 -0
- guidellm/scheduler/strategies.py +617 -0
- guidellm/scheduler/worker.py +413 -419
- guidellm/scheduler/worker_group.py +712 -0
- guidellm/schemas/__init__.py +65 -0
- guidellm/schemas/base.py +417 -0
- guidellm/schemas/info.py +188 -0
- guidellm/schemas/request.py +235 -0
- guidellm/schemas/request_stats.py +349 -0
- guidellm/schemas/response.py +124 -0
- guidellm/schemas/statistics.py +1018 -0
- guidellm/{config.py → settings.py} +31 -24
- guidellm/utils/__init__.py +71 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +132 -5
- guidellm/utils/console.py +566 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +159 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +2 -2
- guidellm-0.6.0a5.dist-info/METADATA +364 -0
- guidellm-0.6.0a5.dist-info/RECORD +109 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -708
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/output.py +0 -997
- guidellm/benchmark/profile.py +0 -409
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/objects/statistics.py +0 -953
- guidellm/preprocess/__init__.py +0 -3
- guidellm/preprocess/dataset.py +0 -374
- guidellm/presentation/__init__.py +0 -28
- guidellm/presentation/builder.py +0 -27
- guidellm/presentation/data_models.py +0 -232
- guidellm/presentation/injector.py +0 -66
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.3.1.dist-info/METADATA +0 -329
- guidellm-0.3.1.dist-info/RECORD +0 -62
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import Union
|
|
4
|
-
|
|
5
|
-
from loguru import logger
|
|
6
|
-
|
|
7
|
-
from guidellm.config import settings
|
|
8
|
-
from guidellm.utils.text import load_text
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
|
|
12
|
-
"""
|
|
13
|
-
Creates a report from the dictionary and saves it to the output path.
|
|
14
|
-
|
|
15
|
-
:param js_data: dict with match str and json data to inject
|
|
16
|
-
:type js_data: dict
|
|
17
|
-
:param output_path: the file to save the report to.
|
|
18
|
-
:type output_path: str
|
|
19
|
-
:return: the path to the saved report
|
|
20
|
-
:rtype: str
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
if not isinstance(output_path, Path):
|
|
24
|
-
output_path = Path(output_path)
|
|
25
|
-
|
|
26
|
-
html_content = load_text(settings.report_generation.source)
|
|
27
|
-
report_content = inject_data(
|
|
28
|
-
js_data,
|
|
29
|
-
html_content,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
-
output_path.write_text(report_content)
|
|
34
|
-
return output_path
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def inject_data(
|
|
38
|
-
js_data: dict,
|
|
39
|
-
html: str,
|
|
40
|
-
) -> str:
|
|
41
|
-
"""
|
|
42
|
-
Injects the json data into the HTML,
|
|
43
|
-
replacing placeholders only within the <head> section.
|
|
44
|
-
|
|
45
|
-
:param js_data: the json data to inject
|
|
46
|
-
:type js_data: dict
|
|
47
|
-
:param html: the html to inject the data into
|
|
48
|
-
:type html: str
|
|
49
|
-
:return: the html with the json data injected
|
|
50
|
-
:rtype: str
|
|
51
|
-
"""
|
|
52
|
-
head_match = re.search(r"<head[^>]*>(.*?)</head>", html, re.DOTALL | re.IGNORECASE)
|
|
53
|
-
if not head_match:
|
|
54
|
-
logger.warning("<head> section missing, returning original HTML.")
|
|
55
|
-
|
|
56
|
-
return html
|
|
57
|
-
|
|
58
|
-
head_content = head_match.group(1)
|
|
59
|
-
|
|
60
|
-
# Replace placeholders only inside the <head> content
|
|
61
|
-
for placeholder, script in js_data.items():
|
|
62
|
-
head_content = head_content.replace(placeholder, script)
|
|
63
|
-
|
|
64
|
-
# Rebuild the HTML
|
|
65
|
-
new_head = f"<head>{head_content}</head>"
|
|
66
|
-
return html[: head_match.start()] + new_head + html[head_match.end() :]
|
guidellm/request/__init__.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from .loader import (
|
|
2
|
-
GenerativeRequestLoader,
|
|
3
|
-
GenerativeRequestLoaderDescription,
|
|
4
|
-
RequestLoader,
|
|
5
|
-
RequestLoaderDescription,
|
|
6
|
-
)
|
|
7
|
-
from .request import GenerationRequest
|
|
8
|
-
from .types import RequestT, ResponseT
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"GenerationRequest",
|
|
12
|
-
"GenerativeRequestLoader",
|
|
13
|
-
"GenerativeRequestLoaderDescription",
|
|
14
|
-
"RequestLoader",
|
|
15
|
-
"RequestLoaderDescription",
|
|
16
|
-
"RequestT",
|
|
17
|
-
"ResponseT",
|
|
18
|
-
]
|
guidellm/request/loader.py
DELETED
|
@@ -1,284 +0,0 @@
|
|
|
1
|
-
from abc import abstractmethod
|
|
2
|
-
from collections.abc import Iterable, Iterator
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import (
|
|
5
|
-
Any,
|
|
6
|
-
Literal,
|
|
7
|
-
Optional,
|
|
8
|
-
Union,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
|
|
12
|
-
from transformers import PreTrainedTokenizerBase # type: ignore[import]
|
|
13
|
-
|
|
14
|
-
from guidellm.config import settings
|
|
15
|
-
from guidellm.dataset import ColumnInputTypes, load_dataset
|
|
16
|
-
from guidellm.objects import StandardBaseModel
|
|
17
|
-
from guidellm.request.request import GenerationRequest
|
|
18
|
-
|
|
19
|
-
__all__ = [
|
|
20
|
-
"GenerativeRequestLoader",
|
|
21
|
-
"GenerativeRequestLoaderDescription",
|
|
22
|
-
"RequestLoader",
|
|
23
|
-
"RequestLoaderDescription",
|
|
24
|
-
]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class RequestLoaderDescription(StandardBaseModel):
|
|
28
|
-
type_: Literal["request_loader"] = "request_loader"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class RequestLoader(Iterable):
|
|
32
|
-
@abstractmethod
|
|
33
|
-
def __iter__(self) -> Iterator: ...
|
|
34
|
-
|
|
35
|
-
@abstractmethod
|
|
36
|
-
def __len__(self) -> int: ...
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
@abstractmethod
|
|
40
|
-
def description(self) -> RequestLoaderDescription: ...
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class GenerativeRequestLoaderDescription(RequestLoaderDescription):
|
|
44
|
-
type_: Literal["generative_request_loader"] = "generative_request_loader" # type: ignore[assignment]
|
|
45
|
-
data: str
|
|
46
|
-
data_args: Optional[dict[str, Any]]
|
|
47
|
-
processor: str
|
|
48
|
-
processor_args: Optional[dict[str, Any]]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class GenerativeRequestLoader(RequestLoader):
|
|
52
|
-
DEFAULT_PROMPT_COLUMNS = [
|
|
53
|
-
"prompt",
|
|
54
|
-
"prompts",
|
|
55
|
-
"instruction",
|
|
56
|
-
"instructions",
|
|
57
|
-
"question",
|
|
58
|
-
"questions",
|
|
59
|
-
"input",
|
|
60
|
-
"inputs",
|
|
61
|
-
"context",
|
|
62
|
-
"content",
|
|
63
|
-
"conversation",
|
|
64
|
-
"conversations",
|
|
65
|
-
"turn",
|
|
66
|
-
"turns",
|
|
67
|
-
"text",
|
|
68
|
-
]
|
|
69
|
-
|
|
70
|
-
def __init__(
|
|
71
|
-
self,
|
|
72
|
-
data: Union[
|
|
73
|
-
str,
|
|
74
|
-
Path,
|
|
75
|
-
Iterable[Union[str, dict[str, Any]]],
|
|
76
|
-
Dataset,
|
|
77
|
-
DatasetDict,
|
|
78
|
-
IterableDataset,
|
|
79
|
-
IterableDatasetDict,
|
|
80
|
-
],
|
|
81
|
-
data_args: Optional[dict[str, Any]],
|
|
82
|
-
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
|
|
83
|
-
processor_args: Optional[dict[str, Any]],
|
|
84
|
-
shuffle: bool = True,
|
|
85
|
-
iter_type: Literal["finite", "infinite"] = "finite",
|
|
86
|
-
random_seed: int = 42,
|
|
87
|
-
):
|
|
88
|
-
self.data = data
|
|
89
|
-
self.data_args = data_args
|
|
90
|
-
dataset, args_column_mappings = load_dataset(
|
|
91
|
-
data,
|
|
92
|
-
data_args,
|
|
93
|
-
processor,
|
|
94
|
-
processor_args,
|
|
95
|
-
random_seed,
|
|
96
|
-
)
|
|
97
|
-
self.dataset = dataset
|
|
98
|
-
self.processor = processor
|
|
99
|
-
self.processor_args = processor_args
|
|
100
|
-
self.shuffle = shuffle
|
|
101
|
-
self.iter_type = iter_type
|
|
102
|
-
self.random_seed = random_seed
|
|
103
|
-
|
|
104
|
-
self.column_mappings = self._create_column_mappings(args_column_mappings)
|
|
105
|
-
self.preserve_iter_state = iter_type == "infinite" # ensure no caching requests
|
|
106
|
-
self._preserved_iter = None
|
|
107
|
-
|
|
108
|
-
def __iter__(self) -> Iterator[GenerationRequest]:
|
|
109
|
-
scope_create_count = 0
|
|
110
|
-
|
|
111
|
-
while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None:
|
|
112
|
-
scope_create_count += 1
|
|
113
|
-
|
|
114
|
-
for item in dataset_iter:
|
|
115
|
-
yield self._create_request(item)
|
|
116
|
-
|
|
117
|
-
self._preserved_iter = None
|
|
118
|
-
|
|
119
|
-
def __len__(self) -> int:
|
|
120
|
-
if self.iter_type == "finite":
|
|
121
|
-
return self.num_unique_items()
|
|
122
|
-
|
|
123
|
-
raise ValueError(f"Unable to determine length of dataset: {self.data}")
|
|
124
|
-
|
|
125
|
-
@property
|
|
126
|
-
def description(self) -> GenerativeRequestLoaderDescription:
|
|
127
|
-
return GenerativeRequestLoaderDescription(
|
|
128
|
-
data=str(self.data),
|
|
129
|
-
data_args=self.data_args,
|
|
130
|
-
processor=str(self.processor),
|
|
131
|
-
processor_args=self.processor_args,
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
def num_unique_items(self, raise_err: bool = True) -> int:
|
|
135
|
-
try:
|
|
136
|
-
return len(self.dataset)
|
|
137
|
-
except Exception: # noqa: BLE001, S110
|
|
138
|
-
pass
|
|
139
|
-
|
|
140
|
-
dataset_size = self.dataset.info.dataset_size
|
|
141
|
-
if dataset_size is not None:
|
|
142
|
-
return dataset_size
|
|
143
|
-
|
|
144
|
-
if raise_err:
|
|
145
|
-
raise ValueError("Unable to determine number of items in the dataset")
|
|
146
|
-
|
|
147
|
-
return -1
|
|
148
|
-
|
|
149
|
-
def _create_column_mappings(
|
|
150
|
-
self,
|
|
151
|
-
args_column_mappings: dict[ColumnInputTypes, str],
|
|
152
|
-
) -> dict[ColumnInputTypes, str]:
|
|
153
|
-
column_mappings: dict[ColumnInputTypes, str] = {}
|
|
154
|
-
|
|
155
|
-
if "text_column" in args_column_mappings:
|
|
156
|
-
column_mappings["prompt_column"] = args_column_mappings["text_column"]
|
|
157
|
-
else:
|
|
158
|
-
column_mappings["prompt_column"] = self._extract_text_column()
|
|
159
|
-
|
|
160
|
-
if "prompt_tokens_count_column" in args_column_mappings:
|
|
161
|
-
column_mappings["prompt_tokens_count_column"] = args_column_mappings[
|
|
162
|
-
"prompt_tokens_count_column"
|
|
163
|
-
]
|
|
164
|
-
elif prompt_tokens_count_column := self._extract_prompt_tokens_count_column():
|
|
165
|
-
column_mappings["prompt_tokens_count_column"] = prompt_tokens_count_column
|
|
166
|
-
|
|
167
|
-
if "output_tokens_count_column" in args_column_mappings:
|
|
168
|
-
column_mappings["output_tokens_count_column"] = args_column_mappings[
|
|
169
|
-
"output_tokens_count_column"
|
|
170
|
-
]
|
|
171
|
-
elif output_tokens_count_column := self._extract_output_tokens_count_column():
|
|
172
|
-
column_mappings["output_tokens_count_column"] = output_tokens_count_column
|
|
173
|
-
|
|
174
|
-
return column_mappings
|
|
175
|
-
|
|
176
|
-
def _extract_text_column(self) -> str:
|
|
177
|
-
column_names = self._dataset_columns(
|
|
178
|
-
err_msg=(
|
|
179
|
-
"Unable to determine text column from dataset and it is required. "
|
|
180
|
-
"To specify the text column, set the 'text_column' key in the "
|
|
181
|
-
"'data_args' dictionary."
|
|
182
|
-
)
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
if not column_names:
|
|
186
|
-
raise ValueError(
|
|
187
|
-
"Unable to determine text column from dataset and it is required. "
|
|
188
|
-
"To specify the text column, set the 'text_column' key in the "
|
|
189
|
-
"'data_args' dictionary."
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
if len(column_names) == 1:
|
|
193
|
-
return column_names[0]
|
|
194
|
-
|
|
195
|
-
for def_column in self.DEFAULT_PROMPT_COLUMNS:
|
|
196
|
-
if def_column in column_names:
|
|
197
|
-
return def_column
|
|
198
|
-
|
|
199
|
-
raise ValueError(
|
|
200
|
-
f"Unable to determine text column from dataset columns: {column_names}. "
|
|
201
|
-
"To specify the text column, set the 'text_column' key in the "
|
|
202
|
-
"'data_args' dictionary."
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
def _extract_prompt_tokens_count_column(self) -> Optional[str]:
|
|
206
|
-
column_names = self._dataset_columns()
|
|
207
|
-
|
|
208
|
-
if column_names and "prompt_tokens_count" in column_names:
|
|
209
|
-
return "prompt_tokens_count"
|
|
210
|
-
|
|
211
|
-
if column_names and "prompt_tokens" in column_names:
|
|
212
|
-
return "prompt_tokens"
|
|
213
|
-
|
|
214
|
-
return None
|
|
215
|
-
|
|
216
|
-
def _extract_output_tokens_count_column(self) -> Optional[str]:
|
|
217
|
-
column_names = self._dataset_columns()
|
|
218
|
-
|
|
219
|
-
if column_names and "output_tokens_count" in column_names:
|
|
220
|
-
return "output_tokens_count"
|
|
221
|
-
|
|
222
|
-
if column_names and "output_tokens" in column_names:
|
|
223
|
-
return "output_tokens"
|
|
224
|
-
|
|
225
|
-
return None
|
|
226
|
-
|
|
227
|
-
def _dataset_columns(self, err_msg: Optional[str] = None) -> Optional[list[str]]:
|
|
228
|
-
try:
|
|
229
|
-
column_names = self.dataset.column_names
|
|
230
|
-
|
|
231
|
-
if not column_names and err_msg:
|
|
232
|
-
raise ValueError(f"No column names found in dataset: {self.data}")
|
|
233
|
-
except Exception as err:
|
|
234
|
-
if err_msg:
|
|
235
|
-
raise ValueError(err_msg) from err
|
|
236
|
-
|
|
237
|
-
column_names = None
|
|
238
|
-
|
|
239
|
-
return column_names
|
|
240
|
-
|
|
241
|
-
def _get_dataset_iter(
|
|
242
|
-
self, scope_create_count: int
|
|
243
|
-
) -> Optional[Iterator[dict[str, Any]]]:
|
|
244
|
-
if scope_create_count > 0 and self.iter_type != "infinite":
|
|
245
|
-
return None
|
|
246
|
-
|
|
247
|
-
if self.preserve_iter_state and self._preserved_iter is not None:
|
|
248
|
-
return self._preserved_iter
|
|
249
|
-
|
|
250
|
-
dataset = (
|
|
251
|
-
self.dataset
|
|
252
|
-
if not self.shuffle
|
|
253
|
-
else self.dataset.shuffle(seed=self.random_seed)
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
dataset_iter = iter(dataset)
|
|
257
|
-
|
|
258
|
-
if self.preserve_iter_state:
|
|
259
|
-
self._preserved_iter = dataset_iter
|
|
260
|
-
|
|
261
|
-
return dataset_iter
|
|
262
|
-
|
|
263
|
-
def _create_request(self, item: dict[str, Any]) -> GenerationRequest:
|
|
264
|
-
prompt_tokens = (
|
|
265
|
-
item[self.column_mappings["prompt_tokens_count_column"]]
|
|
266
|
-
if "prompt_tokens_count_column" in self.column_mappings
|
|
267
|
-
else None
|
|
268
|
-
)
|
|
269
|
-
output_tokens = (
|
|
270
|
-
item[self.column_mappings["output_tokens_count_column"]]
|
|
271
|
-
if "output_tokens_count_column" in self.column_mappings
|
|
272
|
-
else None
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
return GenerationRequest(
|
|
276
|
-
request_type=settings.preferred_route,
|
|
277
|
-
content=item[self.column_mappings["prompt_column"]],
|
|
278
|
-
stats=(
|
|
279
|
-
{"prompt_tokens": prompt_tokens} if prompt_tokens is not None else {}
|
|
280
|
-
),
|
|
281
|
-
constraints=(
|
|
282
|
-
{"output_tokens": output_tokens} if output_tokens is not None else {}
|
|
283
|
-
),
|
|
284
|
-
)
|
guidellm/request/request.py
DELETED
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import uuid
|
|
2
|
-
from typing import Any, Literal, Optional
|
|
3
|
-
|
|
4
|
-
from pydantic import Field
|
|
5
|
-
|
|
6
|
-
from guidellm.objects.pydantic import StandardBaseModel
|
|
7
|
-
|
|
8
|
-
__all__ = ["GenerationRequest"]
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class GenerationRequest(StandardBaseModel):
|
|
12
|
-
"""
|
|
13
|
-
A class representing a request for generation.
|
|
14
|
-
This class is used to encapsulate the details of a generation request,
|
|
15
|
-
including the request ID, type, content, parameters, statistics, and constraints.
|
|
16
|
-
It is designed to be used with the BackendRequestsWorker class to handle
|
|
17
|
-
the generation process.
|
|
18
|
-
|
|
19
|
-
:param request_id: The unique identifier for the request.
|
|
20
|
-
:param request_type: The type of request (e.g., text, chat).
|
|
21
|
-
:param content: The content for the request to send to the backend.
|
|
22
|
-
If request_type is 'text', this should be a string or list of strings
|
|
23
|
-
which will be resolved by backend.text_completions.
|
|
24
|
-
If request_type is 'chat', this should be a string,
|
|
25
|
-
a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]),
|
|
26
|
-
or Any raw content which will be resolved by backend.chat_completions.
|
|
27
|
-
If raw content, raw_content=True must be passed in the params.
|
|
28
|
-
:param params: Additional parameters for the request passed in as kwargs.
|
|
29
|
-
For an http backend, these are passed into the body of the request.
|
|
30
|
-
:param stats: Statistics for the request, such as the number of prompt tokens.
|
|
31
|
-
Used for tracking and reporting purposes.
|
|
32
|
-
:param constraints: Constraints for the request, such as the maximum number
|
|
33
|
-
of output tokens. Used for controlling the behavior of the backend.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
request_id: Optional[str] = Field(
|
|
37
|
-
default_factory=lambda: str(uuid.uuid4()),
|
|
38
|
-
description="The unique identifier for the request.",
|
|
39
|
-
)
|
|
40
|
-
request_type: Literal["text_completions", "chat_completions"] = Field(
|
|
41
|
-
default="text_completions",
|
|
42
|
-
description=(
|
|
43
|
-
"The type of request (e.g., text, chat). "
|
|
44
|
-
"If request_type='text_completions', resolved by backend.text_completions. "
|
|
45
|
-
"If request_typ='chat_completions', resolved by backend.chat_completions."
|
|
46
|
-
),
|
|
47
|
-
)
|
|
48
|
-
content: Any = Field(
|
|
49
|
-
description=(
|
|
50
|
-
"The content for the request to send to the backend. "
|
|
51
|
-
"If request_type is 'text', this should be a string or list of strings "
|
|
52
|
-
"which will be resolved by backend.text_completions. "
|
|
53
|
-
"If request_type is 'chat', this should be a string, "
|
|
54
|
-
"a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]), "
|
|
55
|
-
"or Any raw content which will be resolved by backend.chat_completions. "
|
|
56
|
-
"If raw content, raw_content=True must be passed in the params."
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
params: dict[str, Any] = Field(
|
|
60
|
-
default_factory=dict,
|
|
61
|
-
description=(
|
|
62
|
-
"Additional parameters for the request that will be passed in as kwargs. "
|
|
63
|
-
"For an http backend, these are passed into the body of the request. "
|
|
64
|
-
),
|
|
65
|
-
)
|
|
66
|
-
stats: dict[Literal["prompt_tokens"], int] = Field(
|
|
67
|
-
default_factory=dict,
|
|
68
|
-
description=(
|
|
69
|
-
"Statistics for the request, such as the number of prompt tokens. "
|
|
70
|
-
"Used for tracking and reporting purposes."
|
|
71
|
-
),
|
|
72
|
-
)
|
|
73
|
-
constraints: dict[Literal["output_tokens"], int] = Field(
|
|
74
|
-
default_factory=dict,
|
|
75
|
-
description=(
|
|
76
|
-
"Constraints for the request, such as the maximum number of output tokens. "
|
|
77
|
-
"Used for controlling the behavior of the backend."
|
|
78
|
-
),
|
|
79
|
-
)
|
guidellm/request/types.py
DELETED
guidellm/scheduler/queues.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Helper module for importing the correct queue types.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from queue import Empty as QueueEmpty
|
|
7
|
-
from queue import Full as QueueFull
|
|
8
|
-
from queue import Queue
|
|
9
|
-
from typing import Generic
|
|
10
|
-
|
|
11
|
-
from guidellm.request.types import RequestT, ResponseT
|
|
12
|
-
from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult
|
|
13
|
-
|
|
14
|
-
__all__ = [
|
|
15
|
-
"MPQueues",
|
|
16
|
-
"Queue",
|
|
17
|
-
"QueueEmpty",
|
|
18
|
-
"QueueFull",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass
|
|
23
|
-
class MPQueues(Generic[RequestT, ResponseT]):
|
|
24
|
-
requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
|
|
25
|
-
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]
|
guidellm/scheduler/result.py
DELETED
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import (
|
|
3
|
-
Generic,
|
|
4
|
-
Literal,
|
|
5
|
-
Optional,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
from guidellm.objects import StandardBaseModel
|
|
9
|
-
from guidellm.request.types import RequestT, ResponseT
|
|
10
|
-
from guidellm.scheduler.strategy import SchedulingStrategy
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"SchedulerRequestInfo",
|
|
14
|
-
"SchedulerRequestResult",
|
|
15
|
-
"SchedulerResult",
|
|
16
|
-
"SchedulerRunInfo",
|
|
17
|
-
"WorkerProcessRequest",
|
|
18
|
-
"WorkerProcessResult",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class SchedulerRunInfo(StandardBaseModel):
|
|
23
|
-
"""
|
|
24
|
-
Information about the current run of the scheduler.
|
|
25
|
-
This class holds metadata about the scheduling run,
|
|
26
|
-
including the start and end times, the number of processes,
|
|
27
|
-
and the scheduling strategy used.
|
|
28
|
-
It also tracks the number of requests created, queued, pending,
|
|
29
|
-
and completed during the run.
|
|
30
|
-
|
|
31
|
-
:param start_time: The start time of the scheduling run.
|
|
32
|
-
:param end_time: The end time of the scheduling run;
|
|
33
|
-
if None, then this will be math.inf.
|
|
34
|
-
:param end_number: The maximum number of requests to be processed;
|
|
35
|
-
if None, then this will be math.inf.
|
|
36
|
-
:param processes: The number of processes used in the scheduling run.
|
|
37
|
-
:param strategy: The scheduling strategy used in the run.
|
|
38
|
-
This should be an instance of SchedulingStrategy.
|
|
39
|
-
:param created_requests: The number of requests created during the run.
|
|
40
|
-
:param queued_requests: The number of requests queued during the run.
|
|
41
|
-
:param scheduled_requests: The number of requests scheduled during the run.
|
|
42
|
-
(requests pending being sent to the worker but recieved by a process)
|
|
43
|
-
:param processing_requests: The number of requests actively being run.
|
|
44
|
-
:param completed_requests: The number of requests completed during the run.
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
start_time: float
|
|
48
|
-
end_time: float
|
|
49
|
-
end_number: float
|
|
50
|
-
processes: int
|
|
51
|
-
strategy: SchedulingStrategy
|
|
52
|
-
|
|
53
|
-
created_requests: int = 0
|
|
54
|
-
queued_requests: int = 0
|
|
55
|
-
scheduled_requests: int = 0
|
|
56
|
-
processing_requests: int = 0
|
|
57
|
-
completed_requests: int = 0
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class SchedulerRequestInfo(StandardBaseModel):
|
|
61
|
-
"""
|
|
62
|
-
Information about a specific request run through the scheduler.
|
|
63
|
-
This class holds metadata about the request, including
|
|
64
|
-
the targeted start time, queued time, start time, end time,
|
|
65
|
-
and the process ID that handled the request.
|
|
66
|
-
|
|
67
|
-
:param targeted_start_time: The targeted start time for the request (time.time()).
|
|
68
|
-
:param queued_time: The time the request was queued (time.time()).
|
|
69
|
-
:param scheduled_time: The time the request was scheduled (time.time())
|
|
70
|
-
(any sleep time before the request was sent to the worker).
|
|
71
|
-
:param worker_start: The time the worker started processing request (time.time()).
|
|
72
|
-
:param worker_end: The time the worker finished processing request. (time.time()).
|
|
73
|
-
:param process_id: The ID of the underlying process that handled the request.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
requested: bool = False
|
|
77
|
-
completed: bool = False
|
|
78
|
-
errored: bool = False
|
|
79
|
-
canceled: bool = False
|
|
80
|
-
|
|
81
|
-
targeted_start_time: float = -1
|
|
82
|
-
queued_time: float = -1
|
|
83
|
-
dequeued_time: float = -1
|
|
84
|
-
scheduled_time: float = -1
|
|
85
|
-
worker_start: float = -1
|
|
86
|
-
request_start: float = -1
|
|
87
|
-
request_end: float = -1
|
|
88
|
-
worker_end: float = -1
|
|
89
|
-
process_id: int = -1
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
class SchedulerResult(StandardBaseModel):
|
|
93
|
-
"""
|
|
94
|
-
The yielded, iterative result for a scheduler run.
|
|
95
|
-
These are triggered on the start and end of the run,
|
|
96
|
-
as well as on the start and end of each request.
|
|
97
|
-
Depending on the type, it will hold the request and response
|
|
98
|
-
along with information and statistics about the request and general run.
|
|
99
|
-
|
|
100
|
-
:param type_: The type of the result, which can be one of:
|
|
101
|
-
- "run_start": Indicates the start of the run.
|
|
102
|
-
- "run_complete": Indicates the completion of the run (teardown happens after).
|
|
103
|
-
- "request_start": Indicates the start of a request.
|
|
104
|
-
- "request_complete": Indicates the completion of a request.
|
|
105
|
-
:param request: The request that was processed.
|
|
106
|
-
:param response: The response from the worker for the request.
|
|
107
|
-
:param request_info: Information about the request, including
|
|
108
|
-
the targeted start time, queued time, start time, end time,
|
|
109
|
-
and the process ID that handled the request.
|
|
110
|
-
:param run_info: Information about the current run of the scheduler,
|
|
111
|
-
including the start and end times, the number of processes,
|
|
112
|
-
and the scheduling strategy used.
|
|
113
|
-
It also tracks the number of requests created, queued, pending,
|
|
114
|
-
and completed during the run.
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
pydantic_type: Literal["scheduler_result"] = "scheduler_result"
|
|
118
|
-
type_: Literal[
|
|
119
|
-
"run_start",
|
|
120
|
-
"run_complete",
|
|
121
|
-
"request_scheduled",
|
|
122
|
-
"request_start",
|
|
123
|
-
"request_complete",
|
|
124
|
-
]
|
|
125
|
-
run_info: SchedulerRunInfo
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class SchedulerRequestResult(
|
|
129
|
-
SchedulerResult,
|
|
130
|
-
Generic[RequestT, ResponseT],
|
|
131
|
-
):
|
|
132
|
-
pydantic_type: Literal["scheduler_request_result"] = "scheduler_request_result" # type: ignore[assignment]
|
|
133
|
-
type_: Literal[
|
|
134
|
-
"request_scheduled",
|
|
135
|
-
"request_start",
|
|
136
|
-
"request_complete",
|
|
137
|
-
]
|
|
138
|
-
request: RequestT
|
|
139
|
-
request_info: SchedulerRequestInfo
|
|
140
|
-
response: Optional[ResponseT] = None
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
@dataclass
|
|
144
|
-
class WorkerProcessRequest(Generic[RequestT, ResponseT]):
|
|
145
|
-
request: RequestT
|
|
146
|
-
timeout_time: float
|
|
147
|
-
queued_time: float
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
@dataclass
|
|
151
|
-
class WorkerProcessResult(Generic[RequestT, ResponseT]):
|
|
152
|
-
type_: Literal["request_scheduled", "request_start", "request_complete"]
|
|
153
|
-
request: RequestT
|
|
154
|
-
response: Optional[ResponseT]
|
|
155
|
-
info: SchedulerRequestInfo
|