guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,33 @@
1
+ """
2
+ Backend infrastructure for GuideLLM language model interactions.
3
+
4
+ Provides abstract base classes, concrete backend implementations, and response
5
+ handlers for standardized communication with generative AI model providers.
6
+ The backend system supports distributed execution across worker processes with
7
+ pluggable response handlers for different API formats. Key components include
8
+ the abstract Backend base class, OpenAI-compatible HTTP backend, and response
9
+ handlers for processing streaming and non-streaming API responses.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from .backend import Backend, BackendType
15
+ from .openai import OpenAIHTTPBackend
16
+ from .response_handlers import (
17
+ AudioResponseHandler,
18
+ ChatCompletionsResponseHandler,
19
+ GenerationResponseHandler,
20
+ GenerationResponseHandlerFactory,
21
+ TextCompletionsResponseHandler,
22
+ )
23
+
24
+ __all__ = [
25
+ "AudioResponseHandler",
26
+ "Backend",
27
+ "BackendType",
28
+ "ChatCompletionsResponseHandler",
29
+ "GenerationResponseHandler",
30
+ "GenerationResponseHandlerFactory",
31
+ "OpenAIHTTPBackend",
32
+ "TextCompletionsResponseHandler",
33
+ ]
@@ -0,0 +1,110 @@
1
+ """
2
+ Backend interface and registry for generative AI model interactions.
3
+
4
+ Provides the abstract base class for implementing backends that communicate with
5
+ generative AI models. Backends handle the lifecycle of generation requests and
6
+ provide a standard interface for distributed execution across worker processes.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from abc import abstractmethod
12
+ from typing import Literal
13
+
14
+ from guidellm.scheduler import BackendInterface
15
+ from guidellm.schemas import GenerationRequest, GenerationResponse
16
+ from guidellm.utils import RegistryMixin
17
+
18
+ __all__ = [
19
+ "Backend",
20
+ "BackendType",
21
+ ]
22
+
23
+
24
+ BackendType = Literal["openai_http"]
25
+
26
+
27
+ class Backend(
28
+ RegistryMixin["type[Backend]"],
29
+ BackendInterface[GenerationRequest, GenerationResponse],
30
+ ):
31
+ """
32
+ Base class for generative AI backends with registry and lifecycle management.
33
+
34
+ Provides a standard interface for backends that communicate with generative AI
35
+ models. Combines the registry pattern for automatic discovery with a defined
36
+ lifecycle for process-based distributed execution. Backend state must be
37
+ pickleable for distributed execution across process boundaries.
38
+
39
+ Backend lifecycle phases:
40
+ 1. Creation and configuration
41
+ 2. Process startup - Initialize resources in worker process
42
+ 3. Validation - Verify backend readiness
43
+ 4. Request resolution - Process generation requests
44
+ 5. Process shutdown - Clean up resources
45
+
46
+ Example:
47
+ ::
48
+ @Backend.register("my_backend")
49
+ class MyBackend(Backend):
50
+ def __init__(self, api_key: str):
51
+ super().__init__("my_backend")
52
+ self.api_key = api_key
53
+
54
+ async def process_startup(self):
55
+ self.client = MyAPIClient(self.api_key)
56
+
57
+ backend = Backend.create("my_backend", api_key="secret")
58
+ """
59
+
60
+ @classmethod
61
+ def create(cls, type_: BackendType, **kwargs) -> Backend:
62
+ """
63
+ Create a backend instance based on the backend type.
64
+
65
+ :param type_: The type of backend to create
66
+ :param kwargs: Additional arguments for backend initialization
67
+ :return: An instance of a subclass of Backend
68
+ :raises ValueError: If the backend type is not registered
69
+ """
70
+
71
+ backend = cls.get_registered_object(type_)
72
+
73
+ if backend is None:
74
+ raise ValueError(
75
+ f"Backend type '{type_}' is not registered. "
76
+ f"Available types: {list(cls.registry.keys()) if cls.registry else []}"
77
+ )
78
+
79
+ return backend(**kwargs)
80
+
81
+ def __init__(self, type_: BackendType):
82
+ """
83
+ Initialize a backend instance.
84
+
85
+ :param type_: The backend type identifier
86
+ """
87
+ self.type_ = type_
88
+
89
+ @property
90
+ def processes_limit(self) -> int | None:
91
+ """
92
+ :return: Maximum number of worker processes supported, None if unlimited
93
+ """
94
+ return None
95
+
96
+ @property
97
+ def requests_limit(self) -> int | None:
98
+ """
99
+ :return: Maximum number of concurrent requests supported globally,
100
+ None if unlimited
101
+ """
102
+ return None
103
+
104
+ @abstractmethod
105
+ async def default_model(self) -> str | None:
106
+ """
107
+ :return: The default model name or identifier for generation requests,
108
+ None if no default model is available
109
+ """
110
+ ...
@@ -0,0 +1,355 @@
1
+ """
2
+ OpenAI HTTP backend implementation for GuideLLM.
3
+
4
+ Provides HTTP-based backend for OpenAI-compatible servers including OpenAI API,
5
+ vLLM servers, and other compatible inference engines. Supports text and chat
6
+ completions with streaming, authentication, and multimodal capabilities.
7
+ Handles request formatting, response parsing, error handling, and token usage
8
+ tracking with flexible parameter customization.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import time
15
+ from collections.abc import AsyncIterator
16
+ from typing import Any
17
+
18
+ import httpx
19
+
20
+ from guidellm.backends.backend import Backend
21
+ from guidellm.backends.response_handlers import (
22
+ GenerationResponseHandler,
23
+ GenerationResponseHandlerFactory,
24
+ )
25
+ from guidellm.schemas import GenerationRequest, GenerationResponse, RequestInfo
26
+
27
+ __all__ = ["OpenAIHTTPBackend"]
28
+
29
+
30
+ @Backend.register("openai_http")
31
+ class OpenAIHTTPBackend(Backend):
32
+ """
33
+ HTTP backend for OpenAI-compatible servers.
34
+
35
+ Supports OpenAI API, vLLM servers, and other compatible endpoints with
36
+ text/chat completions, streaming, authentication, and multimodal inputs.
37
+ Handles request formatting, response parsing, error handling, and token
38
+ usage tracking with flexible parameter customization.
39
+
40
+ Example:
41
+ ::
42
+ backend = OpenAIHTTPBackend(
43
+ target="http://localhost:8000",
44
+ model="gpt-3.5-turbo",
45
+ api_key="your-api-key"
46
+ )
47
+
48
+ await backend.process_startup()
49
+ async for response, request_info in backend.resolve(request, info):
50
+ process_response(response)
51
+ await backend.process_shutdown()
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ target: str,
57
+ model: str | None = None,
58
+ api_routes: dict[str, str] | None = None,
59
+ response_handlers: dict[str, Any] | None = None,
60
+ timeout: float = 60.0,
61
+ http2: bool = True,
62
+ follow_redirects: bool = True,
63
+ verify: bool = False,
64
+ validate_backend: bool | str | dict[str, Any] = True,
65
+ ):
66
+ """
67
+ Initialize OpenAI HTTP backend with server configuration.
68
+
69
+ :param target: Base URL of the OpenAI-compatible server
70
+ :param model: Model identifier for generation requests
71
+ :param api_routes: Custom API endpoint routes mapping
72
+ :param response_handlers: Custom response handlers for different request types
73
+ :param timeout: Request timeout in seconds
74
+ :param http2: Enable HTTP/2 protocol support
75
+ :param follow_redirects: Follow HTTP redirects automatically
76
+ :param verify: Enable SSL certificate verification
77
+ :param validate_backend: Backend validation configuration
78
+ """
79
+ super().__init__(type_="openai_http")
80
+
81
+ # Request Values
82
+ self.target = target.rstrip("/").removesuffix("/v1")
83
+ self.model = model
84
+
85
+ # Store configuration
86
+ self.api_routes = api_routes or {
87
+ "health": "health",
88
+ "models": "v1/models",
89
+ "text_completions": "v1/completions",
90
+ "chat_completions": "v1/chat/completions",
91
+ "audio_transcriptions": "v1/audio/transcriptions",
92
+ "audio_translations": "v1/audio/translations",
93
+ }
94
+ self.response_handlers = response_handlers
95
+ self.timeout = timeout
96
+ self.http2 = http2
97
+ self.follow_redirects = follow_redirects
98
+ self.verify = verify
99
+ self.validate_backend: dict[str, Any] | None = self._resolve_validate_kwargs(
100
+ validate_backend
101
+ )
102
+
103
+ # Runtime state
104
+ self._in_process = False
105
+ self._async_client: httpx.AsyncClient | None = None
106
+
107
+ @property
108
+ def info(self) -> dict[str, Any]:
109
+ """
110
+ Get backend configuration details.
111
+
112
+ :return: Dictionary containing backend configuration details
113
+ """
114
+ return {
115
+ "target": self.target,
116
+ "model": self.model,
117
+ "timeout": self.timeout,
118
+ "http2": self.http2,
119
+ "follow_redirects": self.follow_redirects,
120
+ "verify": self.verify,
121
+ "openai_paths": self.api_routes,
122
+ "validate_backend": self.validate_backend,
123
+ }
124
+
125
+ async def process_startup(self):
126
+ """
127
+ Initialize HTTP client and backend resources.
128
+
129
+ :raises RuntimeError: If backend is already initialized
130
+ :raises httpx.RequestError: If HTTP client cannot be created
131
+ """
132
+ if self._in_process:
133
+ raise RuntimeError("Backend already started up for process.")
134
+
135
+ self._async_client = httpx.AsyncClient(
136
+ http2=self.http2,
137
+ timeout=self.timeout,
138
+ follow_redirects=self.follow_redirects,
139
+ verify=self.verify,
140
+ )
141
+ self._in_process = True
142
+
143
+ async def process_shutdown(self):
144
+ """
145
+ Clean up HTTP client and backend resources.
146
+
147
+ :raises RuntimeError: If backend was not properly initialized
148
+ :raises httpx.RequestError: If HTTP client cannot be closed
149
+ """
150
+ if not self._in_process:
151
+ raise RuntimeError("Backend not started up for process.")
152
+
153
+ await self._async_client.aclose() # type: ignore [union-attr]
154
+ self._async_client = None
155
+ self._in_process = False
156
+
157
+ async def validate(self):
158
+ """
159
+ Validate backend connectivity and configuration.
160
+
161
+ :raises RuntimeError: If backend cannot connect or validate configuration
162
+ """
163
+ if self._async_client is None:
164
+ raise RuntimeError("Backend not started up for process.")
165
+
166
+ if not self.validate_backend:
167
+ return
168
+
169
+ try:
170
+ response = await self._async_client.request(**self.validate_backend)
171
+ response.raise_for_status()
172
+ except Exception as exc:
173
+ raise RuntimeError(
174
+ "Backend validation request failed. Could not connect to the server "
175
+ "or validate the backend configuration."
176
+ ) from exc
177
+
178
+ async def available_models(self) -> list[str]:
179
+ """
180
+ Get available models from the target server.
181
+
182
+ :return: List of model identifiers
183
+ :raises httpx.HTTPError: If models endpoint returns an error
184
+ :raises RuntimeError: If backend is not initialized
185
+ """
186
+ if self._async_client is None:
187
+ raise RuntimeError("Backend not started up for process.")
188
+
189
+ target = f"{self.target}/{self.api_routes['models']}"
190
+ response = await self._async_client.get(target)
191
+ response.raise_for_status()
192
+
193
+ return [item["id"] for item in response.json()["data"]]
194
+
195
+ async def default_model(self) -> str | None:
196
+ """
197
+ Get the default model for this backend.
198
+
199
+ :return: Model name or None if no model is available
200
+ """
201
+ if self.model or not self._in_process:
202
+ return self.model
203
+
204
+ models = await self.available_models()
205
+ return models[0] if models else None
206
+
207
+ async def resolve(
208
+ self,
209
+ request: GenerationRequest,
210
+ request_info: RequestInfo,
211
+ history: list[tuple[GenerationRequest, GenerationResponse]] | None = None,
212
+ ) -> AsyncIterator[tuple[GenerationResponse, RequestInfo]]:
213
+ """
214
+ Process generation request and yield progressive responses.
215
+
216
+ Handles request formatting, timing tracking, API communication, and
217
+ response parsing with streaming support.
218
+
219
+ :param request: Generation request with content and parameters
220
+ :param request_info: Request tracking info updated with timing metadata
221
+ :param history: Conversation history (currently not supported)
222
+ :raises NotImplementedError: If history is provided
223
+ :raises RuntimeError: If backend is not initialized
224
+ :raises ValueError: If request type is unsupported
225
+ :yields: Tuples of (response, updated_request_info) as generation progresses
226
+ """
227
+ if self._async_client is None:
228
+ raise RuntimeError("Backend not started up for process.")
229
+
230
+ if history is not None:
231
+ raise NotImplementedError("Multi-turn requests not yet supported")
232
+
233
+ response_handler = self._resolve_response_handler(
234
+ request_type=request.request_type
235
+ )
236
+ if (request_path := self.api_routes.get(request.request_type)) is None:
237
+ raise ValueError(f"Unsupported request type '{request.request_type}'")
238
+ request_url = f"{self.target}/{request_path}"
239
+ request_files = (
240
+ {
241
+ key: tuple(value) if isinstance(value, list) else value
242
+ for key, value in request.arguments.files.items()
243
+ }
244
+ if request.arguments.files
245
+ else None
246
+ )
247
+ request_json = request.arguments.body if not request_files else None
248
+ request_data = request.arguments.body if request_files else None
249
+
250
+ if not request.arguments.stream:
251
+ request_info.timings.request_start = time.time()
252
+ response = await self._async_client.request(
253
+ request.arguments.method or "POST",
254
+ request_url,
255
+ params=request.arguments.params,
256
+ headers=request.arguments.headers,
257
+ json=request_json,
258
+ data=request_data,
259
+ files=request_files,
260
+ )
261
+ request_info.timings.request_end = time.time()
262
+ response.raise_for_status()
263
+ data = response.json()
264
+ yield response_handler.compile_non_streaming(request, data), request_info
265
+ return
266
+
267
+ try:
268
+ request_info.timings.request_start = time.time()
269
+
270
+ async with self._async_client.stream(
271
+ request.arguments.method or "POST",
272
+ request_url,
273
+ params=request.arguments.params,
274
+ headers=request.arguments.headers,
275
+ json=request_json,
276
+ data=request_data,
277
+ files=request_files,
278
+ ) as stream:
279
+ stream.raise_for_status()
280
+ end_reached = False
281
+
282
+ async for chunk in stream.aiter_lines():
283
+ iter_time = time.time()
284
+
285
+ if (
286
+ (iterations := response_handler.add_streaming_line(chunk))
287
+ is None
288
+ or iterations < 0
289
+ or end_reached
290
+ ):
291
+ end_reached = end_reached or iterations is None
292
+ continue
293
+
294
+ if (
295
+ request_info.timings.first_iteration is None
296
+ or request_info.timings.iterations is None
297
+ ):
298
+ request_info.timings.first_iteration = iter_time
299
+ request_info.timings.iterations = 0
300
+
301
+ request_info.timings.last_iteration = iter_time
302
+ request_info.timings.iterations += iterations
303
+
304
+ request_info.timings.request_end = time.time()
305
+ yield response_handler.compile_streaming(request), request_info
306
+ except asyncio.CancelledError as err:
307
+ # Yield current result to store iterative results before propagating
308
+ yield response_handler.compile_streaming(request), request_info
309
+ raise err
310
+
311
+ def _resolve_validate_kwargs(
312
+ self, validate_backend: bool | str | dict[str, Any]
313
+ ) -> dict[str, Any] | None:
314
+ if not (validate_kwargs := validate_backend):
315
+ return None
316
+
317
+ if validate_kwargs is True:
318
+ validate_kwargs = "health"
319
+
320
+ if isinstance(validate_kwargs, str) and validate_kwargs in self.api_routes:
321
+ validate_kwargs = f"{self.target}/{self.api_routes[validate_kwargs]}"
322
+
323
+ if isinstance(validate_kwargs, str):
324
+ validate_kwargs = {
325
+ "method": "GET",
326
+ "url": validate_kwargs,
327
+ }
328
+
329
+ if not isinstance(validate_kwargs, dict) or "url" not in validate_kwargs:
330
+ raise ValueError(
331
+ "validate_backend must be a boolean, string, or dictionary and contain "
332
+ f"a target URL. Got: {validate_kwargs}"
333
+ )
334
+
335
+ if "method" not in validate_kwargs:
336
+ validate_kwargs["method"] = "GET"
337
+
338
+ return validate_kwargs
339
+
340
+ def _resolve_response_handler(self, request_type: str) -> GenerationResponseHandler:
341
+ if (
342
+ self.response_handlers is not None
343
+ and (handler := self.response_handlers.get(request_type)) is not None
344
+ ):
345
+ return handler
346
+
347
+ handler_class = GenerationResponseHandlerFactory.get_registered_object(
348
+ request_type
349
+ )
350
+ if handler_class is None:
351
+ raise ValueError(
352
+ f"No response handler registered for request type '{request_type}'"
353
+ )
354
+
355
+ return handler_class()