guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a18.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,272 @@
1
+ """
2
+ Core data structures and interfaces for the GuideLLM scheduler system.
3
+
4
+ Provides type-safe abstractions for distributed request processing, timing
5
+ measurements, and backend interfaces for benchmarking operations. Central to
6
+ the scheduler architecture, enabling request lifecycle tracking, backend
7
+ coordination, and state management across distributed worker processes.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import time
13
+ from collections.abc import AsyncIterator
14
+ from typing import Any, Generic, Literal, Protocol, TypeVar
15
+
16
+ from pydantic import Field
17
+ from typing_extensions import TypeAliasType, TypedDict
18
+
19
+ from guidellm.schemas import RequestInfo
20
+ from guidellm.utils import RegistryMixin, StandardBaseModel
21
+ from guidellm.utils.registry import RegistryObjT
22
+
23
+ __all__ = [
24
+ "BackendInterface",
25
+ "BackendT",
26
+ "MultiTurnRequestT",
27
+ "RequestT",
28
+ "ResponseT",
29
+ "SchedulerMessagingPydanticRegistry",
30
+ "SchedulerState",
31
+ "SchedulerUpdateAction",
32
+ "SchedulerUpdateActionProgress",
33
+ ]
34
+
35
+ RequestT = TypeVar("RequestT")
36
+ "Generic request object type for scheduler processing"
37
+
38
+ ResponseT = TypeVar("ResponseT")
39
+ "Generic response object type returned by backend processing"
40
+
41
+ MultiTurnRequestT = TypeAliasType(
42
+ "MultiTurnRequestT",
43
+ list[RequestT | tuple[RequestT, float]] | tuple[RequestT | tuple[RequestT, float]],
44
+ type_params=(RequestT,),
45
+ )
46
+ "Multi-turn request structure supporting conversation history with optional delays"
47
+
48
+
49
+ class SchedulerMessagingPydanticRegistry(RegistryMixin[RegistryObjT]):
50
+ """
51
+ Registry for Pydantic types used in scheduler inter-process messaging.
52
+
53
+ Enables generic interface for defining Pydantic class types used for
54
+ communication between distributed scheduler components and worker processes.
55
+ """
56
+
57
+
58
+ class BackendInterface(Protocol, Generic[RequestT, ResponseT]):
59
+ """
60
+ Protocol defining the interface for request processing backends.
61
+
62
+ Establishes the contract for backend implementations that process requests
63
+ within the scheduler system. Backends manage initialization, validation,
64
+ processing, and shutdown lifecycle. All properties must be pickleable before
65
+ process_startup is called for multi-process environments.
66
+
67
+ Example:
68
+ ::
69
+ class CustomBackend(BackendInterface):
70
+ @property
71
+ def processes_limit(self) -> int:
72
+ return 4
73
+
74
+ async def resolve(self, request, request_info, history=None):
75
+ yield response, updated_request_info
76
+ """
77
+
78
+ @property
79
+ def processes_limit(self) -> int | None:
80
+ """
81
+ :return: Maximum worker processes supported, or None if unlimited
82
+ """
83
+
84
+ @property
85
+ def requests_limit(self) -> int | None:
86
+ """
87
+ :return: Maximum concurrent requests supported, or None if unlimited
88
+ """
89
+
90
+ @property
91
+ def info(self) -> dict[str, Any]:
92
+ """
93
+ :return: Backend metadata including model initialization and configuration
94
+ """
95
+
96
+ async def process_startup(self) -> None:
97
+ """
98
+ Perform backend initialization and startup procedures.
99
+
100
+ :raises Exception: Implementation-specific exceptions for startup failures
101
+ """
102
+
103
+ async def validate(self) -> None:
104
+ """
105
+ Validate backend configuration and operational status.
106
+
107
+ :raises Exception: Implementation-specific exceptions for validation failures
108
+ """
109
+
110
+ async def process_shutdown(self) -> None:
111
+ """
112
+ Perform backend cleanup and shutdown procedures.
113
+
114
+ :raises Exception: Implementation-specific exceptions for shutdown failures
115
+ """
116
+
117
+ async def resolve(
118
+ self,
119
+ request: RequestT,
120
+ request_info: RequestInfo,
121
+ history: list[tuple[RequestT, ResponseT]] | None = None,
122
+ ) -> AsyncIterator[tuple[ResponseT, RequestInfo]]:
123
+ """
124
+ Process a request and yield incremental response updates.
125
+
126
+ :param request: The request object to process
127
+ :param request_info: Scheduling metadata and timing information
128
+ :param history: Conversation history for multi-turn requests
129
+ :yield: Tuples of (response, updated_request_info) for each response chunk
130
+ :raises Exception: Implementation-specific exceptions for processing failures
131
+ """
132
+
133
+
134
+ BackendT = TypeVar("BackendT", bound=BackendInterface)
135
+ "Generic backend interface type for request processing"
136
+
137
+
138
+ class SchedulerUpdateActionProgress(TypedDict, total=False):
139
+ """
140
+ Progress tracking data for scheduler operations.
141
+
142
+ Provides estimates for remaining work in scheduler operations, including
143
+ fraction complete, request counts, and duration. Used by constraints and
144
+ monitoring systems to track execution progress and make termination decisions.
145
+ """
146
+
147
+ remaining_fraction: float | None
148
+ remaining_requests: float | None
149
+ remaining_duration: float | None
150
+
151
+
152
+ class SchedulerUpdateAction(StandardBaseModel):
153
+ """
154
+ Control directives for scheduler behavior and operations.
155
+
156
+ Encapsulates control signals for scheduler operations including request
157
+ queuing and processing directives. Used by constraints to communicate
158
+ termination conditions and progress to scheduler components.
159
+
160
+ Example:
161
+ ::
162
+ action = SchedulerUpdateAction(
163
+ request_queuing="stop",
164
+ request_processing="continue",
165
+ metadata={"reason": "max_requests_reached"}
166
+ )
167
+ """
168
+
169
+ request_queuing: Literal["continue", "stop"] = Field(
170
+ default="continue", description="Action to take for request queuing operations"
171
+ )
172
+ request_processing: Literal["continue", "stop_local", "stop_all"] = Field(
173
+ default="continue",
174
+ description="Action to take for request processing operations",
175
+ )
176
+ metadata: dict[str, Any] = Field(
177
+ default_factory=dict,
178
+ description="Additional context and data for the scheduler action",
179
+ )
180
+ progress: SchedulerUpdateActionProgress = Field(
181
+ default_factory=lambda: SchedulerUpdateActionProgress(),
182
+ description="Progress information for the scheduler action",
183
+ )
184
+
185
+
186
+ class SchedulerState(StandardBaseModel):
187
+ """
188
+ Comprehensive state tracking for scheduler execution.
189
+
190
+ Tracks scheduler execution progress, request counts, timing information,
191
+ and constraint enforcement. Central to scheduler coordination, providing
192
+ real-time metrics for monitoring and decision-making across distributed
193
+ worker processes.
194
+
195
+ Example:
196
+ ::
197
+ state = SchedulerState(node_id=0, num_processes=4)
198
+ state.created_requests += 1
199
+ state.queued_requests += 1
200
+ completion_rate = state.processed_requests / state.created_requests
201
+ """
202
+
203
+ node_id: int = Field(
204
+ description="Unique identifier for this scheduler node", default=-1
205
+ )
206
+ num_processes: int = Field(
207
+ description="Number of worker processes in this scheduler", default=-1
208
+ )
209
+ start_time: float = Field(
210
+ description="Unix timestamp when the scheduler started",
211
+ default_factory=time.time,
212
+ )
213
+ end_time: float | None = Field(
214
+ default=None, description="Unix timestamp when the scheduler stopped"
215
+ )
216
+ end_queuing_time: float | None = Field(
217
+ default=None, description="Unix timestamp when request queuing stopped"
218
+ )
219
+ end_queuing_constraints: dict[str, SchedulerUpdateAction] = Field(
220
+ default_factory=dict,
221
+ description="Constraints that triggered queuing termination",
222
+ )
223
+ end_processing_time: float | None = Field(
224
+ default=None, description="Unix timestamp when request processing stopped"
225
+ )
226
+ end_processing_constraints: dict[str, SchedulerUpdateAction] = Field(
227
+ default_factory=dict,
228
+ description="Constraints that triggered processing termination",
229
+ )
230
+ scheduler_constraints: dict[str, SchedulerUpdateAction] = Field(
231
+ default_factory=dict,
232
+ description="Latest state from all constraints applied during scheduler run",
233
+ )
234
+
235
+ remaining_fraction: float | None = Field(
236
+ default=None,
237
+ description="Estimated fraction of remaining progress, if known",
238
+ )
239
+ remaining_requests: float | None = Field(
240
+ default=None,
241
+ description="Estimated number of remaining requests to process, if known",
242
+ )
243
+ remaining_duration: float | None = Field(
244
+ default=None,
245
+ description="Estimated remaining time in seconds for scheduler run, if known",
246
+ )
247
+
248
+ created_requests: int = Field(
249
+ default=0, description="Total number of requests created"
250
+ )
251
+ queued_requests: int = Field(
252
+ default=0, description="Total number of requests queued for processing"
253
+ )
254
+ pending_requests: int = Field(
255
+ default=0,
256
+ description="Number of requests pending processing within a worker",
257
+ )
258
+ processing_requests: int = Field(
259
+ default=0, description="Number of requests currently being processed"
260
+ )
261
+ processed_requests: int = Field(
262
+ default=0, description="Number of requests that completed processing"
263
+ )
264
+ successful_requests: int = Field(
265
+ default=0, description="Number of requests that completed successfully"
266
+ )
267
+ errored_requests: int = Field(
268
+ default=0, description="Number of requests that failed with errors"
269
+ )
270
+ cancelled_requests: int = Field(
271
+ default=0, description="Number of requests that were cancelled"
272
+ )