guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,252 @@
1
+ """
2
+ Environment abstractions for coordinating scheduler execution across distributed nodes.
3
+
4
+ Provides abstractions that handle synchronization, timing coordination, error
5
+ propagation, and lifecycle management for scheduler execution across single or
6
+ multiple nodes. The Environment protocol defines the interface for distributed
7
+ coordination while NonDistributedEnvironment provides a minimal implementation
8
+ for single-node execution. Environments manage the complete execution lifecycle
9
+ from parameter distribution through result aggregation.
10
+
11
+ Execution Flow:
12
+ 1. sync_run_params() - Distribute workload and synchronize parameters
13
+ 2. sync_run_start() - Coordinate synchronized start time
14
+ 3. update_run_iteration() - Update state after each request iteration
15
+ 4. sync_run_error() - Handle and propagate errors across nodes
16
+ 5. sync_run_end() - Aggregate results and finalize execution
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import time
22
+ from abc import ABC, abstractmethod
23
+ from collections.abc import AsyncIterator, Iterable
24
+ from typing import Generic
25
+
26
+ from guidellm.scheduler.constraints import Constraint
27
+ from guidellm.scheduler.schemas import (
28
+ MultiTurnRequestT,
29
+ RequestT,
30
+ ResponseT,
31
+ SchedulerState,
32
+ )
33
+ from guidellm.scheduler.strategies import SchedulingStrategy
34
+ from guidellm.schemas import RequestInfo
35
+ from guidellm.settings import settings
36
+ from guidellm.utils import InfoMixin
37
+
38
+ __all__ = ["Environment", "NonDistributedEnvironment"]
39
+
40
+
41
+ class Environment(ABC, Generic[RequestT, ResponseT], InfoMixin):
42
+ """
43
+ Abstract interface for coordinating scheduler execution across distributed nodes.
44
+
45
+ Defines the protocol for managing distributed scheduler execution including
46
+ parameter synchronization, timing coordination, state updates, error propagation,
47
+ and result aggregation. Implementations handle distributed coordination complexity
48
+ while providing a unified interface for scheduler orchestration.
49
+ """
50
+
51
+ @abstractmethod
52
+ async def sync_run_params(
53
+ self,
54
+ requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
55
+ strategy: SchedulingStrategy,
56
+ constraints: dict[str, Constraint],
57
+ ) -> tuple[
58
+ Iterable[RequestT | MultiTurnRequestT[RequestT]],
59
+ SchedulingStrategy,
60
+ dict[str, Constraint],
61
+ ]:
62
+ """
63
+ Synchronize execution parameters across nodes and resolve local scope.
64
+
65
+ :param requests: Complete set of requests to process across all nodes
66
+ :param strategy: Scheduling strategy to apply during execution
67
+ :param constraints: Runtime constraints to enforce during execution
68
+ :return: Tuple of (local_requests, strategy, constraints) for this node
69
+ :raises Exception: If parameter synchronization fails or nodes inconsistent
70
+ """
71
+ ...
72
+
73
+ @abstractmethod
74
+ async def sync_run_start(self) -> float:
75
+ """
76
+ Coordinate synchronized start time across all nodes.
77
+
78
+ :return: Unix timestamp when all nodes should begin processing
79
+ :raises Exception: If startup synchronization fails across nodes
80
+ """
81
+ ...
82
+
83
+ @abstractmethod
84
+ async def update_run_iteration(
85
+ self,
86
+ response: ResponseT | None,
87
+ request: RequestT,
88
+ request_info: RequestInfo,
89
+ state: SchedulerState,
90
+ ):
91
+ """
92
+ Update environment state with completed request iteration results.
93
+
94
+ :param response: Response generated for the request, if successful
95
+ :param request: The processed request
96
+ :param request_info: Metadata about request processing including timings
97
+ :param state: Current scheduler state with metrics and progress
98
+ :raises Exception: If state update fails or indicates critical errors
99
+ """
100
+ ...
101
+
102
+ @abstractmethod
103
+ async def sync_run_error(self, err: list[Exception] | Exception):
104
+ """
105
+ Handle and propagate errors across all active nodes.
106
+
107
+ :param err: The exception(s) that occurred during execution
108
+ """
109
+ ...
110
+
111
+ @abstractmethod
112
+ async def sync_run_end(
113
+ self,
114
+ ) -> AsyncIterator[
115
+ tuple[
116
+ ResponseT | None,
117
+ RequestT,
118
+ RequestInfo,
119
+ SchedulerState,
120
+ ]
121
+ ]:
122
+ """
123
+ Finalize execution and aggregate results from all nodes.
124
+
125
+ :return: Iterator of (response, request, request_info, state) tuples from
126
+ remote nodes in distributed environments, empty for non-distributed
127
+ :raises Exception: Any errors that occurred during execution
128
+ """
129
+ yield None # type: ignore[misc]
130
+
131
+
132
+ class NonDistributedEnvironment(Environment[RequestT, ResponseT]):
133
+ """
134
+ Single-node scheduler execution environment with minimal coordination overhead.
135
+
136
+ Implements the Environment interface with no-op synchronization for local testing,
137
+ development, and single-machine benchmarking. All synchronization methods return
138
+ immediately without distributed coordination logic.
139
+
140
+ Example:
141
+ ::
142
+ from guidellm.scheduler import (
143
+ MaxNumberConstraint,
144
+ NonDistributedEnvironment,
145
+ RequestInfo,
146
+ SchedulerState,
147
+ SynchronousStrategy,
148
+ )
149
+
150
+ env = NonDistributedEnvironment()
151
+ requests = [f"req_{ind}" for ind in range(5)]
152
+ strategy = SynchronousStrategy()
153
+ constraints = {"max_num": MaxNumberConstraint(max_num=5)}
154
+ state = SchedulerState()
155
+
156
+ local_req, local_strat, local_const = await env.sync_run_params(
157
+ requests, strategy, constraints
158
+ )
159
+ start_time = await env.sync_run_start()
160
+ for req in local_req:
161
+ state.processed_requests += 1
162
+ await env.update_run_iteration(f"resp_{req}", req, RequestInfo(), state)
163
+ async for nonlocal_req in env.sync_run_end():
164
+ state.processed_requests += 1
165
+ """
166
+
167
+ def __init__(self):
168
+ """
169
+ Initialize single-node environment with empty error storage.
170
+ """
171
+ self.run_errors: list[Exception] = []
172
+
173
+ async def sync_run_params(
174
+ self,
175
+ requests: Iterable[RequestT | MultiTurnRequestT[RequestT]],
176
+ strategy: SchedulingStrategy,
177
+ constraints: dict[str, Constraint],
178
+ ) -> tuple[
179
+ Iterable[RequestT | MultiTurnRequestT[RequestT]],
180
+ SchedulingStrategy,
181
+ dict[str, Constraint],
182
+ ]:
183
+ """
184
+ Return parameters unchanged for single-node execution.
185
+
186
+ :param requests: Requests to process locally
187
+ :param strategy: Scheduling strategy to apply during execution
188
+ :param constraints: Runtime constraints to enforce during execution
189
+ :return: Original (requests, strategy, constraints) tuple unchanged
190
+ """
191
+ return requests, strategy, constraints
192
+
193
+ async def sync_run_start(self) -> float:
194
+ """
195
+ Return current time plus configured delay for single-node startup.
196
+
197
+ :return: Unix timestamp when execution should begin
198
+ """
199
+ return time.time() + settings.scheduler_start_delay_non_distributed
200
+
201
+ async def update_run_iteration(
202
+ self,
203
+ response: ResponseT | None,
204
+ request: RequestT,
205
+ request_info: RequestInfo,
206
+ state: SchedulerState,
207
+ ):
208
+ """
209
+ No-op for single-node execution with no distributed state synchronization.
210
+
211
+ :param response: Response generated for the request, if successful
212
+ :param request: The processed request
213
+ :param request_info: Metadata about request processing including timings
214
+ :param state: Current scheduler state with metrics and progress
215
+ """
216
+
217
+ async def sync_run_error(self, err: Exception | list[Exception]):
218
+ """
219
+ Store error for later propagation during run finalization.
220
+
221
+ :param err: The exception(s) that occurred during execution
222
+ """
223
+ err = [err] if not isinstance(err, list) else err
224
+ self.run_errors.extend(err)
225
+
226
+ async def sync_run_end(
227
+ self,
228
+ ) -> AsyncIterator[
229
+ tuple[
230
+ ResponseT | None,
231
+ RequestT,
232
+ RequestInfo,
233
+ SchedulerState,
234
+ ]
235
+ ]:
236
+ """
237
+ Finalize single-node execution and propagate any stored errors.
238
+
239
+ :return: Empty iterator as there are no remote nodes
240
+ :raises Exception: Any error stored during execution via sync_run_error
241
+ """
242
+ if self.run_errors:
243
+ if len(self.run_errors) == 1:
244
+ raise self.run_errors[0]
245
+ else:
246
+ raise RuntimeError(
247
+ f"Errors occurred during execution: {self.run_errors}"
248
+ )
249
+
250
+ if False:
251
+ # Force compiler to recognize as generator
252
+ yield None # type: ignore[misc]