guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
guidellm/backend/response.py
DELETED
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
from typing import Any, Literal, Optional
|
|
2
|
-
|
|
3
|
-
from pydantic import computed_field
|
|
4
|
-
|
|
5
|
-
from guidellm.config import settings
|
|
6
|
-
from guidellm.objects.pydantic import StandardBaseModel
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"RequestArgs",
|
|
10
|
-
"ResponseSummary",
|
|
11
|
-
"StreamingResponseType",
|
|
12
|
-
"StreamingTextResponse",
|
|
13
|
-
]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
StreamingResponseType = Literal["start", "iter"]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class StreamingTextResponse(StandardBaseModel):
|
|
20
|
-
"""
|
|
21
|
-
A model representing the response content for a streaming text request.
|
|
22
|
-
|
|
23
|
-
:param type_: The type of the response; either 'start' or 'iter'.
|
|
24
|
-
:param value: The value of the response up to this iteration.
|
|
25
|
-
:param start_time: The time.time() the request started.
|
|
26
|
-
:param iter_count: The iteration count for the response. For 'start' this is 0
|
|
27
|
-
and for the first 'iter' it is 1.
|
|
28
|
-
:param delta: The text delta added to the response for this stream iteration.
|
|
29
|
-
:param time: If 'start', the time.time() the request started.
|
|
30
|
-
If 'iter', the time.time() the iteration was received.
|
|
31
|
-
:param request_id: The unique identifier for the request, if any.
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
type_: StreamingResponseType
|
|
35
|
-
value: str
|
|
36
|
-
start_time: float
|
|
37
|
-
first_iter_time: Optional[float]
|
|
38
|
-
iter_count: int
|
|
39
|
-
delta: str
|
|
40
|
-
time: float
|
|
41
|
-
request_id: Optional[str] = None
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class RequestArgs(StandardBaseModel):
|
|
45
|
-
"""
|
|
46
|
-
A model representing the arguments for a request to a backend.
|
|
47
|
-
Biases towards an HTTP request, but can be used for other types of backends.
|
|
48
|
-
|
|
49
|
-
:param target: The target URL or function for the request.
|
|
50
|
-
:param headers: The headers, if any, included in the request such as authorization.
|
|
51
|
-
:param params: The query parameters, if any, included in the request.
|
|
52
|
-
:param payload: The payload / arguments for the request including the prompt /
|
|
53
|
-
content and other configurations.
|
|
54
|
-
:param timeout: The timeout for the request in seconds, if any.
|
|
55
|
-
:param http2: Whether HTTP/2 was used for the request, if applicable.
|
|
56
|
-
:param follow_redirects: Whether the request should follow redirect responses.
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
target: str
|
|
60
|
-
headers: dict[str, str]
|
|
61
|
-
params: dict[str, str]
|
|
62
|
-
payload: dict[str, Any]
|
|
63
|
-
timeout: Optional[float] = None
|
|
64
|
-
http2: Optional[bool] = None
|
|
65
|
-
follow_redirects: Optional[bool] = None
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class ResponseSummary(StandardBaseModel):
|
|
69
|
-
"""
|
|
70
|
-
A model representing a summary of a backend request.
|
|
71
|
-
Always returned as the final iteration of a streaming request.
|
|
72
|
-
|
|
73
|
-
:param value: The final value returned from the request.
|
|
74
|
-
:param request_args: The arguments used to make the request.
|
|
75
|
-
:param iterations: The number of iterations in the request.
|
|
76
|
-
:param start_time: The time the request started.
|
|
77
|
-
:param end_time: The time the request ended.
|
|
78
|
-
:param first_iter_time: The time the first iteration was received.
|
|
79
|
-
:param last_iter_time: The time the last iteration was received.
|
|
80
|
-
:param request_prompt_tokens: The number of tokens measured in the prompt
|
|
81
|
-
for the request, if any.
|
|
82
|
-
:param request_output_tokens: The number of tokens enforced for the output
|
|
83
|
-
for the request, if any.
|
|
84
|
-
:param response_prompt_tokens: The number of tokens measured in the prompt
|
|
85
|
-
for the response, if any.
|
|
86
|
-
:param response_output_tokens: The number of tokens measured in the output
|
|
87
|
-
for the response, if any.
|
|
88
|
-
:param request_id: The unique identifier for the request, if any.
|
|
89
|
-
:param error: The error message, if any, returned from making the request.
|
|
90
|
-
"""
|
|
91
|
-
|
|
92
|
-
value: str
|
|
93
|
-
request_args: RequestArgs
|
|
94
|
-
iterations: int = 0
|
|
95
|
-
start_time: float
|
|
96
|
-
end_time: float
|
|
97
|
-
first_iter_time: Optional[float]
|
|
98
|
-
last_iter_time: Optional[float]
|
|
99
|
-
request_prompt_tokens: Optional[int] = None
|
|
100
|
-
request_output_tokens: Optional[int] = None
|
|
101
|
-
response_prompt_tokens: Optional[int] = None
|
|
102
|
-
response_output_tokens: Optional[int] = None
|
|
103
|
-
request_id: Optional[str] = None
|
|
104
|
-
error: Optional[str] = None
|
|
105
|
-
|
|
106
|
-
@computed_field # type: ignore[misc]
|
|
107
|
-
@property
|
|
108
|
-
def prompt_tokens(self) -> Optional[int]:
|
|
109
|
-
"""
|
|
110
|
-
The number of tokens measured in the prompt based on preferences
|
|
111
|
-
for trusting the input or response.
|
|
112
|
-
|
|
113
|
-
:return: The number of tokens in the prompt, if any.
|
|
114
|
-
"""
|
|
115
|
-
if settings.preferred_prompt_tokens_source == "request":
|
|
116
|
-
return self.request_prompt_tokens or self.response_prompt_tokens
|
|
117
|
-
|
|
118
|
-
return self.response_prompt_tokens or self.request_prompt_tokens
|
|
119
|
-
|
|
120
|
-
@computed_field # type: ignore[misc]
|
|
121
|
-
@property
|
|
122
|
-
def output_tokens(self) -> Optional[int]:
|
|
123
|
-
"""
|
|
124
|
-
The number of tokens measured in the output based on preferences
|
|
125
|
-
for trusting the input or response.
|
|
126
|
-
|
|
127
|
-
:return: The number of tokens in the output, if any.
|
|
128
|
-
"""
|
|
129
|
-
if self.error is not None:
|
|
130
|
-
# error occurred, can't trust request tokens were all generated
|
|
131
|
-
return self.response_prompt_tokens
|
|
132
|
-
|
|
133
|
-
if settings.preferred_output_tokens_source == "request":
|
|
134
|
-
return self.request_output_tokens or self.response_output_tokens
|
|
135
|
-
|
|
136
|
-
return self.response_output_tokens or self.request_output_tokens
|