guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
guidellm/request/request.py
DELETED
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import uuid
|
|
2
|
-
from typing import Any, Literal, Optional
|
|
3
|
-
|
|
4
|
-
from pydantic import Field
|
|
5
|
-
|
|
6
|
-
from guidellm.objects.pydantic import StandardBaseModel
|
|
7
|
-
|
|
8
|
-
__all__ = ["GenerationRequest"]
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class GenerationRequest(StandardBaseModel):
|
|
12
|
-
"""
|
|
13
|
-
A class representing a request for generation.
|
|
14
|
-
This class is used to encapsulate the details of a generation request,
|
|
15
|
-
including the request ID, type, content, parameters, statistics, and constraints.
|
|
16
|
-
It is designed to be used with the BackendRequestsWorker class to handle
|
|
17
|
-
the generation process.
|
|
18
|
-
|
|
19
|
-
:param request_id: The unique identifier for the request.
|
|
20
|
-
:param request_type: The type of request (e.g., text, chat).
|
|
21
|
-
:param content: The content for the request to send to the backend.
|
|
22
|
-
If request_type is 'text', this should be a string or list of strings
|
|
23
|
-
which will be resolved by backend.text_completions.
|
|
24
|
-
If request_type is 'chat', this should be a string,
|
|
25
|
-
a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]),
|
|
26
|
-
or Any raw content which will be resolved by backend.chat_completions.
|
|
27
|
-
If raw content, raw_content=True must be passed in the params.
|
|
28
|
-
:param params: Additional parameters for the request passed in as kwargs.
|
|
29
|
-
For an http backend, these are passed into the body of the request.
|
|
30
|
-
:param stats: Statistics for the request, such as the number of prompt tokens.
|
|
31
|
-
Used for tracking and reporting purposes.
|
|
32
|
-
:param constraints: Constraints for the request, such as the maximum number
|
|
33
|
-
of output tokens. Used for controlling the behavior of the backend.
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
request_id: Optional[str] = Field(
|
|
37
|
-
default_factory=lambda: str(uuid.uuid4()),
|
|
38
|
-
description="The unique identifier for the request.",
|
|
39
|
-
)
|
|
40
|
-
request_type: Literal["text_completions", "chat_completions"] = Field(
|
|
41
|
-
default="text_completions",
|
|
42
|
-
description=(
|
|
43
|
-
"The type of request (e.g., text, chat). "
|
|
44
|
-
"If request_type='text_completions', resolved by backend.text_completions. "
|
|
45
|
-
"If request_typ='chat_completions', resolved by backend.chat_completions."
|
|
46
|
-
),
|
|
47
|
-
)
|
|
48
|
-
content: Any = Field(
|
|
49
|
-
description=(
|
|
50
|
-
"The content for the request to send to the backend. "
|
|
51
|
-
"If request_type is 'text', this should be a string or list of strings "
|
|
52
|
-
"which will be resolved by backend.text_completions. "
|
|
53
|
-
"If request_type is 'chat', this should be a string, "
|
|
54
|
-
"a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]), "
|
|
55
|
-
"or Any raw content which will be resolved by backend.chat_completions. "
|
|
56
|
-
"If raw content, raw_content=True must be passed in the params."
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
params: dict[str, Any] = Field(
|
|
60
|
-
default_factory=dict,
|
|
61
|
-
description=(
|
|
62
|
-
"Additional parameters for the request that will be passed in as kwargs. "
|
|
63
|
-
"For an http backend, these are passed into the body of the request. "
|
|
64
|
-
),
|
|
65
|
-
)
|
|
66
|
-
stats: dict[Literal["prompt_tokens"], int] = Field(
|
|
67
|
-
default_factory=dict,
|
|
68
|
-
description=(
|
|
69
|
-
"Statistics for the request, such as the number of prompt tokens. "
|
|
70
|
-
"Used for tracking and reporting purposes."
|
|
71
|
-
),
|
|
72
|
-
)
|
|
73
|
-
constraints: dict[Literal["output_tokens"], int] = Field(
|
|
74
|
-
default_factory=dict,
|
|
75
|
-
description=(
|
|
76
|
-
"Constraints for the request, such as the maximum number of output tokens. "
|
|
77
|
-
"Used for controlling the behavior of the backend."
|
|
78
|
-
),
|
|
79
|
-
)
|
guidellm/request/types.py
DELETED
guidellm/scheduler/queues.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Helper module for importing the correct queue types.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from queue import Empty as QueueEmpty
|
|
7
|
-
from queue import Full as QueueFull
|
|
8
|
-
from queue import Queue
|
|
9
|
-
from typing import Generic
|
|
10
|
-
|
|
11
|
-
from guidellm.request.types import RequestT, ResponseT
|
|
12
|
-
from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult
|
|
13
|
-
|
|
14
|
-
__all__ = [
|
|
15
|
-
"MPQueues",
|
|
16
|
-
"Queue",
|
|
17
|
-
"QueueEmpty",
|
|
18
|
-
"QueueFull",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass
|
|
23
|
-
class MPQueues(Generic[RequestT, ResponseT]):
|
|
24
|
-
requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
|
|
25
|
-
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]
|
guidellm/scheduler/result.py
DELETED
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import (
|
|
3
|
-
Generic,
|
|
4
|
-
Literal,
|
|
5
|
-
Optional,
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
from guidellm.objects import StandardBaseModel
|
|
9
|
-
from guidellm.request.types import RequestT, ResponseT
|
|
10
|
-
from guidellm.scheduler.strategy import SchedulingStrategy
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"SchedulerRequestInfo",
|
|
14
|
-
"SchedulerRequestResult",
|
|
15
|
-
"SchedulerResult",
|
|
16
|
-
"SchedulerRunInfo",
|
|
17
|
-
"WorkerProcessRequest",
|
|
18
|
-
"WorkerProcessResult",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class SchedulerRunInfo(StandardBaseModel):
|
|
23
|
-
"""
|
|
24
|
-
Information about the current run of the scheduler.
|
|
25
|
-
This class holds metadata about the scheduling run,
|
|
26
|
-
including the start and end times, the number of processes,
|
|
27
|
-
and the scheduling strategy used.
|
|
28
|
-
It also tracks the number of requests created, queued, pending,
|
|
29
|
-
and completed during the run.
|
|
30
|
-
|
|
31
|
-
:param start_time: The start time of the scheduling run.
|
|
32
|
-
:param end_time: The end time of the scheduling run;
|
|
33
|
-
if None, then this will be math.inf.
|
|
34
|
-
:param end_number: The maximum number of requests to be processed;
|
|
35
|
-
if None, then this will be math.inf.
|
|
36
|
-
:param processes: The number of processes used in the scheduling run.
|
|
37
|
-
:param strategy: The scheduling strategy used in the run.
|
|
38
|
-
This should be an instance of SchedulingStrategy.
|
|
39
|
-
:param created_requests: The number of requests created during the run.
|
|
40
|
-
:param queued_requests: The number of requests queued during the run.
|
|
41
|
-
:param scheduled_requests: The number of requests scheduled during the run.
|
|
42
|
-
(requests pending being sent to the worker but recieved by a process)
|
|
43
|
-
:param processing_requests: The number of requests actively being run.
|
|
44
|
-
:param completed_requests: The number of requests completed during the run.
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
start_time: float
|
|
48
|
-
end_time: float
|
|
49
|
-
end_number: float
|
|
50
|
-
processes: int
|
|
51
|
-
strategy: SchedulingStrategy
|
|
52
|
-
|
|
53
|
-
created_requests: int = 0
|
|
54
|
-
queued_requests: int = 0
|
|
55
|
-
scheduled_requests: int = 0
|
|
56
|
-
processing_requests: int = 0
|
|
57
|
-
completed_requests: int = 0
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class SchedulerRequestInfo(StandardBaseModel):
|
|
61
|
-
"""
|
|
62
|
-
Information about a specific request run through the scheduler.
|
|
63
|
-
This class holds metadata about the request, including
|
|
64
|
-
the targeted start time, queued time, start time, end time,
|
|
65
|
-
and the process ID that handled the request.
|
|
66
|
-
|
|
67
|
-
:param targeted_start_time: The targeted start time for the request (time.time()).
|
|
68
|
-
:param queued_time: The time the request was queued (time.time()).
|
|
69
|
-
:param scheduled_time: The time the request was scheduled (time.time())
|
|
70
|
-
(any sleep time before the request was sent to the worker).
|
|
71
|
-
:param worker_start: The time the worker started processing request (time.time()).
|
|
72
|
-
:param worker_end: The time the worker finished processing request. (time.time()).
|
|
73
|
-
:param process_id: The ID of the underlying process that handled the request.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
requested: bool = False
|
|
77
|
-
completed: bool = False
|
|
78
|
-
errored: bool = False
|
|
79
|
-
canceled: bool = False
|
|
80
|
-
|
|
81
|
-
targeted_start_time: float = -1
|
|
82
|
-
queued_time: float = -1
|
|
83
|
-
dequeued_time: float = -1
|
|
84
|
-
scheduled_time: float = -1
|
|
85
|
-
worker_start: float = -1
|
|
86
|
-
request_start: float = -1
|
|
87
|
-
request_end: float = -1
|
|
88
|
-
worker_end: float = -1
|
|
89
|
-
process_id: int = -1
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
class SchedulerResult(StandardBaseModel):
|
|
93
|
-
"""
|
|
94
|
-
The yielded, iterative result for a scheduler run.
|
|
95
|
-
These are triggered on the start and end of the run,
|
|
96
|
-
as well as on the start and end of each request.
|
|
97
|
-
Depending on the type, it will hold the request and response
|
|
98
|
-
along with information and statistics about the request and general run.
|
|
99
|
-
|
|
100
|
-
:param type_: The type of the result, which can be one of:
|
|
101
|
-
- "run_start": Indicates the start of the run.
|
|
102
|
-
- "run_complete": Indicates the completion of the run (teardown happens after).
|
|
103
|
-
- "request_start": Indicates the start of a request.
|
|
104
|
-
- "request_complete": Indicates the completion of a request.
|
|
105
|
-
:param request: The request that was processed.
|
|
106
|
-
:param response: The response from the worker for the request.
|
|
107
|
-
:param request_info: Information about the request, including
|
|
108
|
-
the targeted start time, queued time, start time, end time,
|
|
109
|
-
and the process ID that handled the request.
|
|
110
|
-
:param run_info: Information about the current run of the scheduler,
|
|
111
|
-
including the start and end times, the number of processes,
|
|
112
|
-
and the scheduling strategy used.
|
|
113
|
-
It also tracks the number of requests created, queued, pending,
|
|
114
|
-
and completed during the run.
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
pydantic_type: Literal["scheduler_result"] = "scheduler_result"
|
|
118
|
-
type_: Literal[
|
|
119
|
-
"run_start",
|
|
120
|
-
"run_complete",
|
|
121
|
-
"request_scheduled",
|
|
122
|
-
"request_start",
|
|
123
|
-
"request_complete",
|
|
124
|
-
]
|
|
125
|
-
run_info: SchedulerRunInfo
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class SchedulerRequestResult(
|
|
129
|
-
SchedulerResult,
|
|
130
|
-
Generic[RequestT, ResponseT],
|
|
131
|
-
):
|
|
132
|
-
pydantic_type: Literal["scheduler_request_result"] = "scheduler_request_result" # type: ignore[assignment]
|
|
133
|
-
type_: Literal[
|
|
134
|
-
"request_scheduled",
|
|
135
|
-
"request_start",
|
|
136
|
-
"request_complete",
|
|
137
|
-
]
|
|
138
|
-
request: RequestT
|
|
139
|
-
request_info: SchedulerRequestInfo
|
|
140
|
-
response: Optional[ResponseT] = None
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
@dataclass
|
|
144
|
-
class WorkerProcessRequest(Generic[RequestT, ResponseT]):
|
|
145
|
-
request: RequestT
|
|
146
|
-
timeout_time: float
|
|
147
|
-
queued_time: float
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
@dataclass
|
|
151
|
-
class WorkerProcessResult(Generic[RequestT, ResponseT]):
|
|
152
|
-
type_: Literal["request_scheduled", "request_start", "request_complete"]
|
|
153
|
-
request: RequestT
|
|
154
|
-
response: Optional[ResponseT]
|
|
155
|
-
info: SchedulerRequestInfo
|