guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,358 @@
1
+ """
2
+ Core data structures and interfaces for the GuideLLM scheduler system.
3
+
4
+ Provides type-safe abstractions for distributed request processing, timing
5
+ measurements, and backend interfaces for benchmarking operations. Central to
6
+ the scheduler architecture, enabling request lifecycle tracking, backend
7
+ coordination, and state management across distributed worker processes.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import time
13
+ from collections.abc import AsyncIterator
14
+ from typing import Any, Generic, Literal, Protocol, TypeVar
15
+
16
+ from pydantic import Field
17
+ from typing_extensions import TypeAliasType
18
+
19
+ from guidellm.schemas import RequestInfo, StandardBaseModel
20
+ from guidellm.utils import RegistryMixin
21
+ from guidellm.utils.registry import RegistryObjT
22
+
23
+ __all__ = [
24
+ "BackendInterface",
25
+ "BackendT",
26
+ "MultiTurnRequestT",
27
+ "RequestT",
28
+ "ResponseT",
29
+ "SchedulerMessagingPydanticRegistry",
30
+ "SchedulerProgress",
31
+ "SchedulerState",
32
+ "SchedulerUpdateAction",
33
+ ]
34
+
35
+ RequestT = TypeVar("RequestT")
36
+ "Generic request object type for scheduler processing"
37
+
38
+ ResponseT = TypeVar("ResponseT")
39
+ "Generic response object type returned by backend processing"
40
+
41
+ MultiTurnRequestT = TypeAliasType(
42
+ "MultiTurnRequestT",
43
+ list[RequestT | tuple[RequestT, float]] | tuple[RequestT | tuple[RequestT, float]],
44
+ type_params=(RequestT,),
45
+ )
46
+ "Multi-turn request structure supporting conversation history with optional delays"
47
+
48
+
49
+ class SchedulerMessagingPydanticRegistry(RegistryMixin[RegistryObjT]):
50
+ """
51
+ Registry for Pydantic types used in scheduler inter-process messaging.
52
+
53
+ Enables generic interface for defining Pydantic class types used for
54
+ communication between distributed scheduler components and worker processes.
55
+ """
56
+
57
+
58
+ class BackendInterface(Protocol, Generic[RequestT, ResponseT]):
59
+ """
60
+ Protocol defining the interface for request processing backends.
61
+
62
+ Establishes the contract for backend implementations that process requests
63
+ within the scheduler system. Backends manage initialization, validation,
64
+ processing, and shutdown lifecycle. All properties must be pickleable before
65
+ process_startup is called for multi-process environments.
66
+
67
+ Example:
68
+ ::
69
+ class CustomBackend(BackendInterface):
70
+ @property
71
+ def processes_limit(self) -> int:
72
+ return 4
73
+
74
+ async def resolve(self, request, request_info, history=None):
75
+ yield response, updated_request_info
76
+ """
77
+
78
+ @property
79
+ def processes_limit(self) -> int | None:
80
+ """
81
+ :return: Maximum worker processes supported, or None if unlimited
82
+ """
83
+
84
+ @property
85
+ def requests_limit(self) -> int | None:
86
+ """
87
+ :return: Maximum concurrent requests supported, or None if unlimited
88
+ """
89
+
90
+ @property
91
+ def info(self) -> dict[str, Any]:
92
+ """
93
+ :return: Backend metadata including model initialization and configuration
94
+ """
95
+
96
+ async def process_startup(self) -> None:
97
+ """
98
+ Perform backend initialization and startup procedures.
99
+
100
+ :raises Exception: Implementation-specific exceptions for startup failures
101
+ """
102
+
103
+ async def validate(self) -> None:
104
+ """
105
+ Validate backend configuration and operational status.
106
+
107
+ :raises Exception: Implementation-specific exceptions for validation failures
108
+ """
109
+
110
+ async def process_shutdown(self) -> None:
111
+ """
112
+ Perform backend cleanup and shutdown procedures.
113
+
114
+ :raises Exception: Implementation-specific exceptions for shutdown failures
115
+ """
116
+
117
+ async def resolve(
118
+ self,
119
+ request: RequestT,
120
+ request_info: RequestInfo,
121
+ history: list[tuple[RequestT, ResponseT]] | None = None,
122
+ ) -> AsyncIterator[tuple[ResponseT, RequestInfo]]:
123
+ """
124
+ Process a request and yield incremental response updates.
125
+
126
+ :param request: The request object to process
127
+ :param request_info: Scheduling metadata and timing information
128
+ :param history: Conversation history for multi-turn requests
129
+ :yield: Tuples of (response, updated_request_info) for each response chunk
130
+ :raises Exception: Implementation-specific exceptions for processing failures
131
+ """
132
+
133
+
134
+ BackendT = TypeVar("BackendT", bound=BackendInterface)
135
+ "Generic backend interface type for request processing"
136
+
137
+
138
+ class SchedulerProgress(StandardBaseModel):
139
+ """
140
+ Progress tracking data for scheduler operations.
141
+
142
+ Provides estimates for remaining work in scheduler operations, including
143
+ fraction complete, request counts, and duration. Used by constraints and
144
+ monitoring systems to track execution progress and make termination decisions.
145
+ """
146
+
147
+ remaining_requests: float | None = Field(
148
+ description="Estimated number of remaining requests to process", default=None
149
+ )
150
+ total_requests: float | None = Field(
151
+ description="Total number of requests to process", default=None
152
+ )
153
+ remaining_duration: float | None = Field(
154
+ description="Estimated remaining duration in seconds", default=None
155
+ )
156
+ total_duration: float | None = Field(
157
+ description="Total duration in seconds to process for", default=None
158
+ )
159
+ stop_time: float | None = Field(
160
+ description="The timestamp the processing stopped at", default=None
161
+ )
162
+
163
+ @property
164
+ def remaining_fraction(self) -> float | None:
165
+ """
166
+ :return: Estimated fraction of remaining progress, if known
167
+ """
168
+ fraction: float | None = None
169
+
170
+ if (requests_fraction := self.remaining_requests_fraction) is not None:
171
+ fraction = requests_fraction
172
+
173
+ if (duration_fraction := self.remaining_duration_fraction) is not None:
174
+ fraction = (
175
+ duration_fraction
176
+ if fraction is None
177
+ else min(fraction, duration_fraction)
178
+ )
179
+
180
+ return fraction
181
+
182
+ @property
183
+ def remaining_requests_fraction(self) -> float | None:
184
+ """
185
+ :return: Estimated fraction of remaining requests, if known
186
+ """
187
+ return (
188
+ self.remaining_requests / float(self.total_requests)
189
+ if self.remaining_requests is not None
190
+ and self.total_requests is not None
191
+ and self.total_requests > 0
192
+ else None
193
+ )
194
+
195
+ @property
196
+ def remaining_duration_fraction(self) -> float | None:
197
+ """
198
+ :return: Estimated fraction of remaining duration, if known
199
+ """
200
+ return (
201
+ self.remaining_duration / float(self.total_duration)
202
+ if self.remaining_duration is not None
203
+ and self.total_duration is not None
204
+ and self.total_duration > 0
205
+ else None
206
+ )
207
+
208
+ def combine(self, other: SchedulerProgress) -> SchedulerProgress:
209
+ """
210
+ Combine two progress instances, taking the minimum remaining estimates.
211
+
212
+ :param other: Another progress instance to combine with
213
+ :return: New progress instance with combined estimates
214
+ """
215
+ if (other_req_fraction := other.remaining_requests_fraction) is not None and (
216
+ (cur_req_fraction := self.remaining_requests_fraction) is None
217
+ or other_req_fraction < cur_req_fraction
218
+ ):
219
+ # Only update if the other is more advanced (lower fraction)
220
+ self.remaining_requests = other.remaining_requests
221
+ self.total_requests = other.total_requests
222
+
223
+ if (other_dur_fraction := other.remaining_duration_fraction) is not None and (
224
+ (cur_dur_fraction := self.remaining_duration_fraction) is None
225
+ or other_dur_fraction < cur_dur_fraction
226
+ ):
227
+ # Only update if the other is more advanced (lower fraction)
228
+ self.remaining_duration = other.remaining_duration
229
+ self.total_duration = other.total_duration
230
+
231
+ if other.stop_time is not None and (
232
+ self.stop_time is None or other.stop_time < self.stop_time
233
+ ):
234
+ # Only update if the other has an earlier stop time
235
+ self.stop_time = other.stop_time
236
+
237
+ return self
238
+
239
+
240
+ class SchedulerUpdateAction(StandardBaseModel):
241
+ """
242
+ Control directives for scheduler behavior and operations.
243
+
244
+ Encapsulates control signals for scheduler operations including request
245
+ queuing and processing directives. Used by constraints to communicate
246
+ termination conditions and progress to scheduler components.
247
+
248
+ Example:
249
+ ::
250
+ action = SchedulerUpdateAction(
251
+ request_queuing="stop",
252
+ request_processing="continue",
253
+ metadata={"reason": "max_requests_reached"}
254
+ )
255
+ """
256
+
257
+ request_queuing: Literal["continue", "stop"] = Field(
258
+ default="continue", description="Action to take for request queuing operations"
259
+ )
260
+ request_processing: Literal["continue", "stop_local", "stop_all"] = Field(
261
+ default="continue",
262
+ description="Action to take for request processing operations",
263
+ )
264
+ metadata: dict[str, Any] = Field(
265
+ default_factory=dict,
266
+ description="Additional context and data for the scheduler action",
267
+ )
268
+ progress: SchedulerProgress = Field(
269
+ default_factory=lambda: SchedulerProgress(),
270
+ description="Progress information for the scheduler action",
271
+ )
272
+
273
+
274
+ class SchedulerState(StandardBaseModel):
275
+ """
276
+ Comprehensive state tracking for scheduler execution.
277
+
278
+ Tracks scheduler execution progress, request counts, timing information,
279
+ and constraint enforcement. Central to scheduler coordination, providing
280
+ real-time metrics for monitoring and decision-making across distributed
281
+ worker processes.
282
+
283
+ Example:
284
+ ::
285
+ state = SchedulerState(node_id=0, num_processes=4)
286
+ state.created_requests += 1
287
+ state.queued_requests += 1
288
+ completion_rate = state.processed_requests / state.created_requests
289
+ """
290
+
291
+ node_id: int = Field(
292
+ description="Unique identifier for this scheduler node", default=-1
293
+ )
294
+ num_processes: int = Field(
295
+ description="Number of worker processes in this scheduler", default=-1
296
+ )
297
+ start_time: float = Field(
298
+ description="Unix timestamp when the scheduler started",
299
+ default_factory=time.time,
300
+ )
301
+ end_time: float | None = Field(
302
+ default=None, description="Unix timestamp when the scheduler stopped"
303
+ )
304
+ start_requests_time: float | None = Field(
305
+ default=None, description="Unix timestamp of the first sent request"
306
+ )
307
+ end_requests_time: float | None = Field(
308
+ default=None, description="Unix timestamp of the last finalized request"
309
+ )
310
+ end_queuing_time: float | None = Field(
311
+ default=None, description="Unix timestamp when request queuing stopped"
312
+ )
313
+ end_queuing_constraints: dict[str, SchedulerUpdateAction] = Field(
314
+ default_factory=dict,
315
+ description="Constraints that triggered queuing termination",
316
+ )
317
+ end_processing_time: float | None = Field(
318
+ default=None, description="Unix timestamp when request processing stopped"
319
+ )
320
+ end_processing_constraints: dict[str, SchedulerUpdateAction] = Field(
321
+ default_factory=dict,
322
+ description="Constraints that triggered processing termination",
323
+ )
324
+ scheduler_constraints: dict[str, SchedulerUpdateAction] = Field(
325
+ default_factory=dict,
326
+ description="Latest state from all constraints applied during scheduler run",
327
+ )
328
+
329
+ progress: SchedulerProgress = Field(
330
+ default_factory=lambda: SchedulerProgress(),
331
+ description="Overall progress information for the scheduler run",
332
+ )
333
+
334
+ created_requests: int = Field(
335
+ default=0, description="Total number of requests created"
336
+ )
337
+ queued_requests: int = Field(
338
+ default=0, description="Total number of requests queued for processing"
339
+ )
340
+ pending_requests: int = Field(
341
+ default=0,
342
+ description="Number of requests pending processing within a worker",
343
+ )
344
+ processing_requests: int = Field(
345
+ default=0, description="Number of requests currently being processed"
346
+ )
347
+ processed_requests: int = Field(
348
+ default=0, description="Number of requests that completed processing"
349
+ )
350
+ successful_requests: int = Field(
351
+ default=0, description="Number of requests that completed successfully"
352
+ )
353
+ errored_requests: int = Field(
354
+ default=0, description="Number of requests that failed with errors"
355
+ )
356
+ cancelled_requests: int = Field(
357
+ default=0, description="Number of requests that were cancelled"
358
+ )