guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +452 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +150 -317
- guidellm/benchmark/entrypoints.py +467 -128
- guidellm/benchmark/output.py +519 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2086 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +144 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +194 -0
- guidellm/data/deserializers/synthetic.py +348 -0
- guidellm/data/loaders.py +149 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +404 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +31 -0
- guidellm/data/processor.py +31 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +6 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +226 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +71 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
- guidellm-0.4.0a169.dist-info/RECORD +95 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a21.dist-info/RECORD +0 -62
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,778 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message encoding utilities for multiprocess communication with Pydantic model support.
|
|
3
|
+
|
|
4
|
+
Provides binary serialization and deserialization of Python objects using various
|
|
5
|
+
serialization formats and encoding packages to enable performance configurations
|
|
6
|
+
for distributed scheduler operations. Supports configurable two-stage processing
|
|
7
|
+
pipeline: object serialization (to dict/sequence) followed by binary encoding
|
|
8
|
+
(msgpack/msgspec) with specialized Pydantic model handling for type preservation.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Mapping
|
|
14
|
+
from typing import Any, ClassVar, Generic, Literal, TypeVar, cast
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import msgpack # type: ignore[import-untyped] # Optional dependency
|
|
18
|
+
from msgpack import Packer, Unpacker
|
|
19
|
+
|
|
20
|
+
HAS_MSGPACK = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
msgpack = Packer = Unpacker = None
|
|
23
|
+
HAS_MSGPACK = False
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from msgspec.msgpack import (
|
|
27
|
+
Decoder as MsgspecDecoder, # type: ignore[import-not-found] # Optional dependency
|
|
28
|
+
)
|
|
29
|
+
from msgspec.msgpack import (
|
|
30
|
+
Encoder as MsgspecEncoder, # type: ignore[import-not-found] # Optional dependency
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
HAS_MSGSPEC = True
|
|
34
|
+
except ImportError:
|
|
35
|
+
MsgspecDecoder = MsgspecEncoder = None # type: ignore[misc, assignment] # HAS_MSGSPEC will be checked at runtime
|
|
36
|
+
HAS_MSGSPEC = False
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
from pydantic import BaseModel
|
|
40
|
+
|
|
41
|
+
from guidellm.utils.imports import json
|
|
42
|
+
|
|
43
|
+
__all__ = [
|
|
44
|
+
"Encoder",
|
|
45
|
+
"EncodingTypesAlias",
|
|
46
|
+
"MessageEncoding",
|
|
47
|
+
"MsgT",
|
|
48
|
+
"ObjT",
|
|
49
|
+
"SerializationTypesAlias",
|
|
50
|
+
"Serializer",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
ObjT = TypeVar("ObjT")
|
|
54
|
+
MsgT = TypeVar("MsgT")
|
|
55
|
+
|
|
56
|
+
# Type alias for available serialization strategies
|
|
57
|
+
SerializationTypesAlias = Literal["dict", "sequence"] | None
|
|
58
|
+
# "Type alias for available binary encoding formats"
|
|
59
|
+
EncodingTypesAlias = Literal["msgpack", "msgspec"] | None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class MessageEncoding(Generic[ObjT, MsgT]):
|
|
63
|
+
"""
|
|
64
|
+
High-performance message encoding and decoding for multiprocessing communication.
|
|
65
|
+
|
|
66
|
+
Supports configurable object serialization and binary encoding with specialized
|
|
67
|
+
handling for Pydantic models. Provides a two-stage pipeline of serialization
|
|
68
|
+
(object to dict/str) followed by encoding (dict/str to binary) for optimal
|
|
69
|
+
performance and compatibility across different transport mechanisms used in
|
|
70
|
+
distributed scheduler operations.
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
::
|
|
74
|
+
from guidellm.utils.encoding import MessageEncoding
|
|
75
|
+
from pydantic import BaseModel
|
|
76
|
+
|
|
77
|
+
class DataModel(BaseModel):
|
|
78
|
+
name: str
|
|
79
|
+
value: int
|
|
80
|
+
|
|
81
|
+
# Configure with dict serialization and msgpack encoding
|
|
82
|
+
encoding = MessageEncoding(serialization="dict", encoding="msgpack")
|
|
83
|
+
encoding.register_pydantic(DataModel)
|
|
84
|
+
|
|
85
|
+
# Encode and decode objects
|
|
86
|
+
data = DataModel(name="test", value=42)
|
|
87
|
+
encoded_msg = encoding.encode(data)
|
|
88
|
+
decoded_data = encoding.decode(encoded_msg)
|
|
89
|
+
|
|
90
|
+
:cvar DEFAULT_ENCODING_PREFERENCE: Preferred encoding formats in priority order
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
DEFAULT_ENCODING_PREFERENCE: ClassVar[list[str]] = ["msgspec", "msgpack"]
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def encode_message(
|
|
97
|
+
cls,
|
|
98
|
+
obj: ObjT,
|
|
99
|
+
serializer: Serializer | None,
|
|
100
|
+
encoder: Encoder | None,
|
|
101
|
+
) -> MsgT:
|
|
102
|
+
"""
|
|
103
|
+
Encode object using specified serializer and encoder.
|
|
104
|
+
|
|
105
|
+
:param obj: Object to encode
|
|
106
|
+
:param serializer: Serializer for object conversion, None for no serialization
|
|
107
|
+
:param encoder: Encoder for binary conversion, None for no encoding
|
|
108
|
+
:return: Encoded message ready for transport
|
|
109
|
+
"""
|
|
110
|
+
serialized = serializer.serialize(obj) if serializer else obj
|
|
111
|
+
|
|
112
|
+
return cast("MsgT", encoder.encode(serialized) if encoder else serialized)
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def decode_message(
|
|
116
|
+
cls,
|
|
117
|
+
message: MsgT,
|
|
118
|
+
serializer: Serializer | None,
|
|
119
|
+
encoder: Encoder | None,
|
|
120
|
+
) -> ObjT:
|
|
121
|
+
"""
|
|
122
|
+
Decode message using specified serializer and encoder.
|
|
123
|
+
Must match the encoding configuration originally used.
|
|
124
|
+
|
|
125
|
+
:param message: Encoded message to decode
|
|
126
|
+
:param serializer: Serializer for object reconstruction, None for no
|
|
127
|
+
serialization
|
|
128
|
+
:param encoder: Encoder for binary decoding, None for no encoding
|
|
129
|
+
:return: Reconstructed object
|
|
130
|
+
"""
|
|
131
|
+
serialized = encoder.decode(message) if encoder else message
|
|
132
|
+
|
|
133
|
+
return cast(
|
|
134
|
+
"ObjT", serializer.deserialize(serialized) if serializer else serialized
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def __init__(
|
|
138
|
+
self,
|
|
139
|
+
serialization: SerializationTypesAlias = None,
|
|
140
|
+
encoding: EncodingTypesAlias | list[EncodingTypesAlias] = None,
|
|
141
|
+
pydantic_models: list[type[BaseModel]] | None = None,
|
|
142
|
+
) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Initialize MessageEncoding with serialization and encoding strategies.
|
|
145
|
+
|
|
146
|
+
:param serialization: Serialization strategy (None, "dict", or "sequence")
|
|
147
|
+
:param encoding: Encoding strategy (None, "msgpack", "msgspec", or
|
|
148
|
+
preference list)
|
|
149
|
+
"""
|
|
150
|
+
self.serializer = Serializer(serialization, pydantic_models)
|
|
151
|
+
self.encoder = Encoder(encoding)
|
|
152
|
+
|
|
153
|
+
def register_pydantic(self, model: type[BaseModel]) -> None:
|
|
154
|
+
"""
|
|
155
|
+
Register Pydantic model for specialized serialization handling.
|
|
156
|
+
|
|
157
|
+
:param model: Pydantic model class to register for type preservation
|
|
158
|
+
"""
|
|
159
|
+
self.serializer.register_pydantic(model)
|
|
160
|
+
|
|
161
|
+
def encode(self, obj: ObjT) -> MsgT:
|
|
162
|
+
"""
|
|
163
|
+
Encode object using instance configuration.
|
|
164
|
+
|
|
165
|
+
:param obj: Object to encode using configured serialization and encoding
|
|
166
|
+
:return: Encoded message ready for transport
|
|
167
|
+
"""
|
|
168
|
+
return self.encode_message(
|
|
169
|
+
obj=obj,
|
|
170
|
+
serializer=self.serializer,
|
|
171
|
+
encoder=self.encoder,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def decode(self, message: MsgT) -> ObjT:
|
|
175
|
+
"""
|
|
176
|
+
Decode message using instance configuration.
|
|
177
|
+
|
|
178
|
+
:param message: Encoded message to decode using configured strategies
|
|
179
|
+
:return: Reconstructed object
|
|
180
|
+
"""
|
|
181
|
+
return self.decode_message(
|
|
182
|
+
message=message,
|
|
183
|
+
serializer=self.serializer,
|
|
184
|
+
encoder=self.encoder,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class Encoder:
|
|
189
|
+
"""
|
|
190
|
+
Binary encoding and decoding using MessagePack or msgspec formats.
|
|
191
|
+
|
|
192
|
+
Handles binary serialization of Python objects using configurable encoding
|
|
193
|
+
strategies with automatic fallback when dependencies are unavailable. Supports
|
|
194
|
+
both standalone instances and pooled encoder/decoder pairs for performance
|
|
195
|
+
optimization in high-throughput scenarios.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(
|
|
199
|
+
self, encoding: EncodingTypesAlias | list[EncodingTypesAlias] = None
|
|
200
|
+
) -> None:
|
|
201
|
+
"""
|
|
202
|
+
Initialize encoder with specified encoding strategy.
|
|
203
|
+
|
|
204
|
+
:param encoding: Encoding format preference (None, "msgpack", "msgspec", or
|
|
205
|
+
preference list)
|
|
206
|
+
"""
|
|
207
|
+
self.encoding, self.encoder, self.decoder = self._resolve_encoding(encoding)
|
|
208
|
+
|
|
209
|
+
def encode(self, obj: Any) -> bytes | Any:
|
|
210
|
+
"""
|
|
211
|
+
Encode object to binary format using configured encoding strategy.
|
|
212
|
+
|
|
213
|
+
:param obj: Object to encode (must be serializable by chosen format)
|
|
214
|
+
:return: Encoded bytes or original object if no encoding configured
|
|
215
|
+
:raises ImportError: If required encoding library is not available
|
|
216
|
+
"""
|
|
217
|
+
if self.encoding == "msgpack":
|
|
218
|
+
if not HAS_MSGPACK:
|
|
219
|
+
raise ImportError("msgpack is not available")
|
|
220
|
+
|
|
221
|
+
return self.encoder.pack(obj) if self.encoder else msgpack.packb(obj)
|
|
222
|
+
|
|
223
|
+
if self.encoding == "msgspec":
|
|
224
|
+
if not HAS_MSGSPEC:
|
|
225
|
+
raise ImportError("msgspec is not available")
|
|
226
|
+
|
|
227
|
+
return (
|
|
228
|
+
self.encoder.encode(obj)
|
|
229
|
+
if self.encoder
|
|
230
|
+
else MsgspecEncoder().encode(obj)
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return obj
|
|
234
|
+
|
|
235
|
+
def decode(self, data: bytes | Any) -> Any:
|
|
236
|
+
"""
|
|
237
|
+
Decode binary data using configured encoding strategy.
|
|
238
|
+
|
|
239
|
+
:param data: Binary data to decode or object if no encoding configured
|
|
240
|
+
:return: Decoded Python object
|
|
241
|
+
:raises ImportError: If required encoding library is not available
|
|
242
|
+
"""
|
|
243
|
+
if self.encoding == "msgpack":
|
|
244
|
+
if not HAS_MSGPACK:
|
|
245
|
+
raise ImportError("msgpack is not available")
|
|
246
|
+
|
|
247
|
+
if self.decoder is not None:
|
|
248
|
+
self.decoder.feed(data)
|
|
249
|
+
return self.decoder.unpack()
|
|
250
|
+
|
|
251
|
+
return msgpack.unpackb(data, raw=False)
|
|
252
|
+
|
|
253
|
+
if self.encoding == "msgspec":
|
|
254
|
+
if not HAS_MSGSPEC:
|
|
255
|
+
raise ImportError("msgspec is not available")
|
|
256
|
+
|
|
257
|
+
if self.decoder is not None:
|
|
258
|
+
return self.decoder.decode(data)
|
|
259
|
+
|
|
260
|
+
return MsgspecDecoder().decode(data)
|
|
261
|
+
|
|
262
|
+
return data
|
|
263
|
+
|
|
264
|
+
def _resolve_encoding(
|
|
265
|
+
self, encoding: EncodingTypesAlias | list[EncodingTypesAlias] | None
|
|
266
|
+
) -> tuple[EncodingTypesAlias, Any, Any]:
|
|
267
|
+
def _get_available_encoder_decoder(
|
|
268
|
+
encoding: EncodingTypesAlias,
|
|
269
|
+
) -> tuple[Any, Any]:
|
|
270
|
+
if encoding == "msgpack" and HAS_MSGPACK:
|
|
271
|
+
return Packer(), Unpacker(raw=False)
|
|
272
|
+
if encoding == "msgspec" and HAS_MSGSPEC:
|
|
273
|
+
return MsgspecEncoder(), MsgspecDecoder()
|
|
274
|
+
return None, None
|
|
275
|
+
|
|
276
|
+
if not isinstance(encoding, list):
|
|
277
|
+
if encoding is None:
|
|
278
|
+
return None, None, None
|
|
279
|
+
|
|
280
|
+
encoder, decoder = _get_available_encoder_decoder(encoding)
|
|
281
|
+
if encoder is None or decoder is None:
|
|
282
|
+
raise ImportError(f"Encoding '{encoding}' is not available.")
|
|
283
|
+
|
|
284
|
+
return encoding, encoder, decoder
|
|
285
|
+
|
|
286
|
+
for test_encoding in encoding:
|
|
287
|
+
encoder, decoder = _get_available_encoder_decoder(test_encoding)
|
|
288
|
+
if encoder is not None and decoder is not None:
|
|
289
|
+
return test_encoding, encoder, decoder
|
|
290
|
+
|
|
291
|
+
return None, None, None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
PayloadType = Literal[
|
|
295
|
+
"pydantic",
|
|
296
|
+
"python",
|
|
297
|
+
"collection_tuple",
|
|
298
|
+
"collection_sequence",
|
|
299
|
+
"collection_mapping",
|
|
300
|
+
]
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class Serializer:
|
|
304
|
+
"""
|
|
305
|
+
Object serialization with specialized Pydantic model support.
|
|
306
|
+
|
|
307
|
+
Converts Python objects to serializable formats (dict/sequence) with type
|
|
308
|
+
preservation for Pydantic models. Maintains object integrity through
|
|
309
|
+
encoding/decoding cycles by storing class metadata and enabling proper
|
|
310
|
+
reconstruction of complex objects. Supports both dictionary-based and
|
|
311
|
+
sequence-based serialization strategies for different use cases.
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
def __init__(
|
|
315
|
+
self,
|
|
316
|
+
serialization: SerializationTypesAlias = None,
|
|
317
|
+
pydantic_models: list[type[BaseModel]] | None = None,
|
|
318
|
+
):
|
|
319
|
+
"""
|
|
320
|
+
Initialize serializer with strategy and Pydantic registry.
|
|
321
|
+
|
|
322
|
+
:param serialization: Default serialization strategy for this instance
|
|
323
|
+
"""
|
|
324
|
+
self.serialization = serialization
|
|
325
|
+
self.pydantic_registry: dict[tuple[str, str], type[BaseModel]] = {}
|
|
326
|
+
if pydantic_models:
|
|
327
|
+
for model in pydantic_models:
|
|
328
|
+
self.register_pydantic(model)
|
|
329
|
+
|
|
330
|
+
def register_pydantic(self, model: type[BaseModel]) -> None:
|
|
331
|
+
"""
|
|
332
|
+
Register Pydantic model for specialized serialization handling.
|
|
333
|
+
|
|
334
|
+
:param model: Pydantic model class to register for type preservation
|
|
335
|
+
"""
|
|
336
|
+
key = (model.__module__, model.__name__)
|
|
337
|
+
self.pydantic_registry[key] = model
|
|
338
|
+
|
|
339
|
+
def load_pydantic(self, type_name: str, module_name: str) -> type[BaseModel]:
|
|
340
|
+
"""
|
|
341
|
+
Load Pydantic class by name with registry fallback to dynamic import.
|
|
342
|
+
|
|
343
|
+
:param type_name: Class name to load
|
|
344
|
+
:param module_name: Module containing the class
|
|
345
|
+
:return: Loaded Pydantic model class
|
|
346
|
+
"""
|
|
347
|
+
key = (module_name, type_name)
|
|
348
|
+
|
|
349
|
+
if key in self.pydantic_registry:
|
|
350
|
+
return self.pydantic_registry[key]
|
|
351
|
+
|
|
352
|
+
# Dynamic import fallback; need to update to better handle generics
|
|
353
|
+
module = __import__(module_name, fromlist=[type_name])
|
|
354
|
+
pydantic_class = getattr(module, type_name)
|
|
355
|
+
self.pydantic_registry[key] = pydantic_class
|
|
356
|
+
|
|
357
|
+
return pydantic_class
|
|
358
|
+
|
|
359
|
+
def serialize(self, obj: Any) -> Any:
|
|
360
|
+
"""
|
|
361
|
+
Serialize object using specified or configured strategy.
|
|
362
|
+
|
|
363
|
+
:param obj: Object to serialize
|
|
364
|
+
:return: Serialized representation (dict, str, or original object)
|
|
365
|
+
"""
|
|
366
|
+
if self.serialization == "dict":
|
|
367
|
+
return self.to_dict(obj)
|
|
368
|
+
elif self.serialization == "sequence":
|
|
369
|
+
return self.to_sequence(obj)
|
|
370
|
+
|
|
371
|
+
return obj
|
|
372
|
+
|
|
373
|
+
def deserialize(self, msg: Any) -> Any:
|
|
374
|
+
"""
|
|
375
|
+
Deserialize object using specified or configured strategy.
|
|
376
|
+
|
|
377
|
+
:param msg: Serialized message to deserialize
|
|
378
|
+
:return: Reconstructed object
|
|
379
|
+
"""
|
|
380
|
+
if self.serialization == "dict":
|
|
381
|
+
return self.from_dict(msg)
|
|
382
|
+
elif self.serialization == "sequence":
|
|
383
|
+
return self.from_sequence(msg)
|
|
384
|
+
|
|
385
|
+
return msg
|
|
386
|
+
|
|
387
|
+
def to_dict(self, obj: Any) -> Any:
|
|
388
|
+
"""
|
|
389
|
+
Convert object to dictionary with Pydantic model type preservation.
|
|
390
|
+
|
|
391
|
+
:param obj: Object to convert (BaseModel, collections, or primitive)
|
|
392
|
+
:return: Dictionary representation with type metadata for Pydantic models
|
|
393
|
+
"""
|
|
394
|
+
if isinstance(obj, BaseModel):
|
|
395
|
+
return self.to_dict_pydantic(obj)
|
|
396
|
+
|
|
397
|
+
if isinstance(obj, list | tuple) and any(
|
|
398
|
+
isinstance(item, BaseModel) for item in obj
|
|
399
|
+
):
|
|
400
|
+
return [
|
|
401
|
+
self.to_dict_pydantic(item) if isinstance(item, BaseModel) else item
|
|
402
|
+
for item in obj
|
|
403
|
+
]
|
|
404
|
+
|
|
405
|
+
if isinstance(obj, dict) and any(
|
|
406
|
+
isinstance(value, BaseModel) for value in obj.values()
|
|
407
|
+
):
|
|
408
|
+
return {
|
|
409
|
+
key: self.to_dict_pydantic(value)
|
|
410
|
+
if isinstance(value, BaseModel)
|
|
411
|
+
else value
|
|
412
|
+
for key, value in obj.items()
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return obj
|
|
416
|
+
|
|
417
|
+
def from_dict(self, data: Any) -> Any:
|
|
418
|
+
"""
|
|
419
|
+
Reconstruct object from dictionary with Pydantic model type restoration.
|
|
420
|
+
|
|
421
|
+
:param data: Dictionary representation possibly containing type metadata
|
|
422
|
+
:return: Reconstructed object with proper types restored
|
|
423
|
+
"""
|
|
424
|
+
if isinstance(data, list | tuple):
|
|
425
|
+
return [
|
|
426
|
+
self.from_dict_pydantic(item)
|
|
427
|
+
if isinstance(item, dict) and "*PYD*" in item
|
|
428
|
+
else item
|
|
429
|
+
for item in data
|
|
430
|
+
]
|
|
431
|
+
elif isinstance(data, dict) and data:
|
|
432
|
+
if "*PYD*" in data:
|
|
433
|
+
return self.from_dict_pydantic(data)
|
|
434
|
+
|
|
435
|
+
return {
|
|
436
|
+
key: self.from_dict_pydantic(value)
|
|
437
|
+
if isinstance(value, dict) and "*PYD*" in value
|
|
438
|
+
else value
|
|
439
|
+
for key, value in data.items()
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
return data
|
|
443
|
+
|
|
444
|
+
def to_dict_pydantic(self, item: Any) -> Any:
|
|
445
|
+
"""
|
|
446
|
+
Convert item to dictionary with Pydantic type metadata.
|
|
447
|
+
|
|
448
|
+
:param item: Item to convert (may or may not be a Pydantic model)
|
|
449
|
+
:return: Dictionary with type preservation metadata
|
|
450
|
+
"""
|
|
451
|
+
return {
|
|
452
|
+
"*PYD*": True,
|
|
453
|
+
"typ": item.__class__.__name__,
|
|
454
|
+
"mod": item.__class__.__module__,
|
|
455
|
+
"dat": item.model_dump(mode="python"),
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
def from_dict_pydantic(self, item: dict[str, Any]) -> Any:
|
|
459
|
+
"""
|
|
460
|
+
Reconstruct object from dictionary with Pydantic type metadata.
|
|
461
|
+
|
|
462
|
+
:param item: Dictionary containing type metadata and data
|
|
463
|
+
:return: Reconstructed Pydantic model or original data
|
|
464
|
+
"""
|
|
465
|
+
type_name = item["typ"]
|
|
466
|
+
module_name = item["mod"]
|
|
467
|
+
model_class = self.load_pydantic(type_name, module_name)
|
|
468
|
+
|
|
469
|
+
return model_class.model_validate(item["dat"])
|
|
470
|
+
|
|
471
|
+
def to_sequence(self, obj: Any) -> str | Any:
|
|
472
|
+
"""
|
|
473
|
+
Convert object to sequence format with type-aware serialization.
|
|
474
|
+
|
|
475
|
+
Handles Pydantic models, collections, and mappings with proper type
|
|
476
|
+
preservation through structured sequence encoding.
|
|
477
|
+
|
|
478
|
+
:param obj: Object to serialize to sequence format
|
|
479
|
+
:return: Serialized sequence string or bytes
|
|
480
|
+
"""
|
|
481
|
+
payload_type: PayloadType
|
|
482
|
+
if isinstance(obj, BaseModel):
|
|
483
|
+
payload_type = "pydantic"
|
|
484
|
+
payload = self.to_sequence_pydantic(obj)
|
|
485
|
+
elif isinstance(obj, list | tuple) and any(
|
|
486
|
+
isinstance(item, BaseModel) for item in obj
|
|
487
|
+
):
|
|
488
|
+
payload_type = "collection_sequence"
|
|
489
|
+
payload = None
|
|
490
|
+
|
|
491
|
+
for item in obj:
|
|
492
|
+
is_pydantic = isinstance(item, BaseModel)
|
|
493
|
+
payload = self.pack_next_sequence(
|
|
494
|
+
type_="pydantic" if is_pydantic else "python",
|
|
495
|
+
payload=(
|
|
496
|
+
self.to_sequence_pydantic(item)
|
|
497
|
+
if is_pydantic
|
|
498
|
+
else self.to_sequence_python(item)
|
|
499
|
+
),
|
|
500
|
+
current=payload,
|
|
501
|
+
)
|
|
502
|
+
elif isinstance(obj, Mapping) and any(
|
|
503
|
+
isinstance(value, BaseModel) for value in obj.values()
|
|
504
|
+
):
|
|
505
|
+
payload_type = "collection_mapping"
|
|
506
|
+
keys = ",".join(str(key) for key in obj)
|
|
507
|
+
payload = keys.encode() + b"|"
|
|
508
|
+
for item in obj.values():
|
|
509
|
+
is_pydantic = isinstance(item, BaseModel)
|
|
510
|
+
payload = self.pack_next_sequence(
|
|
511
|
+
type_="pydantic" if is_pydantic else "python",
|
|
512
|
+
payload=(
|
|
513
|
+
self.to_sequence_pydantic(item)
|
|
514
|
+
if is_pydantic
|
|
515
|
+
else self.to_sequence_python(item)
|
|
516
|
+
),
|
|
517
|
+
current=payload,
|
|
518
|
+
)
|
|
519
|
+
else:
|
|
520
|
+
payload_type = "python"
|
|
521
|
+
payload = self.to_sequence_python(obj)
|
|
522
|
+
|
|
523
|
+
return self.pack_next_sequence(
|
|
524
|
+
payload_type, payload if payload is not None else "", None
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
def from_sequence(self, data: str | Any) -> Any: # noqa: C901, PLR0912
|
|
528
|
+
"""
|
|
529
|
+
Reconstruct object from sequence format with type restoration.
|
|
530
|
+
|
|
531
|
+
Handles deserialization of objects encoded with to_sequence, properly
|
|
532
|
+
restoring Pydantic models and collection structures.
|
|
533
|
+
|
|
534
|
+
:param data: Serialized sequence data to reconstruct
|
|
535
|
+
:return: Reconstructed object with proper types
|
|
536
|
+
:raises ValueError: If sequence format is invalid or contains multiple
|
|
537
|
+
packed sequences
|
|
538
|
+
"""
|
|
539
|
+
payload: str | bytes | None
|
|
540
|
+
type_, payload, remaining = self.unpack_next_sequence(data)
|
|
541
|
+
if remaining is not None:
|
|
542
|
+
raise ValueError("Data contains multiple packed sequences; expected one.")
|
|
543
|
+
|
|
544
|
+
if type_ == "pydantic":
|
|
545
|
+
return self.from_sequence_pydantic(payload)
|
|
546
|
+
|
|
547
|
+
if type_ == "python":
|
|
548
|
+
return self.from_sequence_python(payload)
|
|
549
|
+
|
|
550
|
+
if type_ in {"collection_sequence", "collection_tuple"}:
|
|
551
|
+
c_items = []
|
|
552
|
+
while payload:
|
|
553
|
+
type_, item_payload, payload = self.unpack_next_sequence(payload)
|
|
554
|
+
if type_ == "pydantic":
|
|
555
|
+
c_items.append(self.from_sequence_pydantic(item_payload))
|
|
556
|
+
elif type_ == "python":
|
|
557
|
+
c_items.append(self.from_sequence_python(item_payload))
|
|
558
|
+
else:
|
|
559
|
+
raise ValueError("Invalid type in collection sequence")
|
|
560
|
+
return c_items
|
|
561
|
+
|
|
562
|
+
if type_ != "collection_mapping":
|
|
563
|
+
raise ValueError(f"Invalid type for mapping sequence: {type_}")
|
|
564
|
+
|
|
565
|
+
if isinstance(payload, bytes):
|
|
566
|
+
keys_end = payload.index(b"|")
|
|
567
|
+
keys = payload[:keys_end].decode().split(",")
|
|
568
|
+
payload = payload[keys_end + 1 :]
|
|
569
|
+
else:
|
|
570
|
+
keys_end = payload.index("|")
|
|
571
|
+
keys = payload[:keys_end].split(",")
|
|
572
|
+
payload = payload[keys_end + 1 :]
|
|
573
|
+
|
|
574
|
+
items = {}
|
|
575
|
+
index = 0
|
|
576
|
+
while payload:
|
|
577
|
+
type_, item_payload, payload = self.unpack_next_sequence(payload)
|
|
578
|
+
if type_ == "pydantic":
|
|
579
|
+
items[keys[index]] = self.from_sequence_pydantic(item_payload)
|
|
580
|
+
elif type_ == "python":
|
|
581
|
+
items[keys[index]] = self.from_sequence_python(item_payload)
|
|
582
|
+
else:
|
|
583
|
+
raise ValueError("Invalid type in mapping sequence")
|
|
584
|
+
index += 1
|
|
585
|
+
return items
|
|
586
|
+
|
|
587
|
+
def to_sequence_pydantic(self, obj: BaseModel) -> str | bytes:
|
|
588
|
+
"""
|
|
589
|
+
Serialize Pydantic model to sequence format with class metadata.
|
|
590
|
+
|
|
591
|
+
:param obj: Pydantic model instance to serialize
|
|
592
|
+
:return: Sequence string or bytes containing class info and JSON data
|
|
593
|
+
"""
|
|
594
|
+
class_name: str = obj.__class__.__name__
|
|
595
|
+
class_module: str = obj.__class__.__module__
|
|
596
|
+
json_data = obj.__pydantic_serializer__.to_json(obj)
|
|
597
|
+
|
|
598
|
+
return class_name.encode() + b"|" + class_module.encode() + b"|" + json_data
|
|
599
|
+
|
|
600
|
+
def from_sequence_pydantic(self, data: str | bytes) -> BaseModel:
|
|
601
|
+
"""
|
|
602
|
+
Reconstruct Pydantic model from sequence format.
|
|
603
|
+
|
|
604
|
+
:param data: Sequence data containing class metadata and JSON
|
|
605
|
+
:return: Reconstructed Pydantic model instance
|
|
606
|
+
"""
|
|
607
|
+
json_data: str | bytes | bytearray
|
|
608
|
+
if isinstance(data, bytes):
|
|
609
|
+
class_name_end = data.index(b"|")
|
|
610
|
+
class_name = data[:class_name_end].decode()
|
|
611
|
+
module_name_end = data.index(b"|", class_name_end + 1)
|
|
612
|
+
module_name = data[class_name_end + 1 : module_name_end].decode()
|
|
613
|
+
json_data = data[module_name_end + 1 :]
|
|
614
|
+
else:
|
|
615
|
+
class_name_end = data.index("|")
|
|
616
|
+
class_name = data[:class_name_end]
|
|
617
|
+
module_name_end = data.index("|", class_name_end + 1)
|
|
618
|
+
module_name = data[class_name_end + 1 : module_name_end]
|
|
619
|
+
json_data = data[module_name_end + 1 :]
|
|
620
|
+
|
|
621
|
+
model_class = self.load_pydantic(class_name, module_name)
|
|
622
|
+
|
|
623
|
+
return model_class.model_validate_json(json_data)
|
|
624
|
+
|
|
625
|
+
def to_sequence_python(self, obj: Any) -> str | bytes:
|
|
626
|
+
"""
|
|
627
|
+
Serialize Python object to JSON format.
|
|
628
|
+
|
|
629
|
+
:param obj: Python object to serialize
|
|
630
|
+
:return: JSON string or bytes representation
|
|
631
|
+
"""
|
|
632
|
+
return json.dumps(obj)
|
|
633
|
+
|
|
634
|
+
def from_sequence_python(self, data: str | bytes) -> Any:
|
|
635
|
+
"""
|
|
636
|
+
Deserialize Python object from JSON format.
|
|
637
|
+
|
|
638
|
+
:param data: JSON string or bytes to deserialize
|
|
639
|
+
:return: Reconstructed Python object
|
|
640
|
+
"""
|
|
641
|
+
return json.loads(data)
|
|
642
|
+
|
|
643
|
+
def pack_next_sequence( # noqa: C901, PLR0912
|
|
644
|
+
self,
|
|
645
|
+
type_: PayloadType,
|
|
646
|
+
payload: str | bytes,
|
|
647
|
+
current: str | bytes | None,
|
|
648
|
+
) -> str | bytes:
|
|
649
|
+
"""
|
|
650
|
+
Pack payload into sequence format with type and length metadata.
|
|
651
|
+
|
|
652
|
+
:param type_: Type identifier for the payload
|
|
653
|
+
:param payload: Data to pack into sequence
|
|
654
|
+
:param current: Current sequence data to append to (unused but maintained
|
|
655
|
+
for signature compatibility)
|
|
656
|
+
:return: Packed sequence with type, length, and payload
|
|
657
|
+
:raises ValueError: If payload type doesn't match current type or unknown
|
|
658
|
+
type specified
|
|
659
|
+
"""
|
|
660
|
+
if current is not None and type(payload) is not type(current):
|
|
661
|
+
raise ValueError("Payload and current must be of the same type")
|
|
662
|
+
|
|
663
|
+
payload_len = len(payload)
|
|
664
|
+
payload_len_output: str | bytes
|
|
665
|
+
payload_type: str | bytes
|
|
666
|
+
delimiter: str | bytes
|
|
667
|
+
if isinstance(payload, bytes):
|
|
668
|
+
payload_len_output = payload_len.to_bytes(
|
|
669
|
+
length=(payload_len.bit_length() + 7) // 8 if payload_len > 0 else 1,
|
|
670
|
+
byteorder="big",
|
|
671
|
+
)
|
|
672
|
+
match type_:
|
|
673
|
+
case "pydantic":
|
|
674
|
+
payload_type = b"P"
|
|
675
|
+
case "python":
|
|
676
|
+
payload_type = b"p"
|
|
677
|
+
case "collection_tuple":
|
|
678
|
+
payload_type = b"T"
|
|
679
|
+
case "collection_sequence":
|
|
680
|
+
payload_type = b"S"
|
|
681
|
+
case "collection_mapping":
|
|
682
|
+
payload_type = b"M"
|
|
683
|
+
case _:
|
|
684
|
+
raise ValueError(f"Unknown type for packing: {type_}")
|
|
685
|
+
delimiter = b"|"
|
|
686
|
+
else:
|
|
687
|
+
payload_len_output = str(payload_len)
|
|
688
|
+
|
|
689
|
+
match type_:
|
|
690
|
+
case "pydantic":
|
|
691
|
+
payload_type = "P"
|
|
692
|
+
case "python":
|
|
693
|
+
payload_type = "p"
|
|
694
|
+
case "collection_tuple":
|
|
695
|
+
payload_type = "T"
|
|
696
|
+
case "collection_sequence":
|
|
697
|
+
payload_type = "S"
|
|
698
|
+
case "collection_mapping":
|
|
699
|
+
payload_type = "M"
|
|
700
|
+
case _:
|
|
701
|
+
raise ValueError(f"Unknown type for packing: {type_}")
|
|
702
|
+
delimiter = "|"
|
|
703
|
+
|
|
704
|
+
# Type ignores because types are enforced at runtime
|
|
705
|
+
next_sequence = (
|
|
706
|
+
payload_type + delimiter + payload_len_output + delimiter + payload # type: ignore[operator]
|
|
707
|
+
)
|
|
708
|
+
return current + next_sequence if current else next_sequence # type: ignore[operator]
|
|
709
|
+
|
|
710
|
+
def unpack_next_sequence( # noqa: C901, PLR0912
|
|
711
|
+
self, data: str | bytes
|
|
712
|
+
) -> tuple[
|
|
713
|
+
PayloadType,
|
|
714
|
+
str | bytes,
|
|
715
|
+
str | bytes | None,
|
|
716
|
+
]:
|
|
717
|
+
"""
|
|
718
|
+
Unpack sequence format to extract type, payload, and remaining data.
|
|
719
|
+
|
|
720
|
+
:param data: Packed sequence data to unpack
|
|
721
|
+
:return: Tuple of (type, payload, remaining_data)
|
|
722
|
+
:raises ValueError: If sequence format is invalid or unknown type character
|
|
723
|
+
"""
|
|
724
|
+
type_: PayloadType
|
|
725
|
+
if isinstance(data, bytes):
|
|
726
|
+
if len(data) < len(b"T|N") or data[1:2] != b"|":
|
|
727
|
+
raise ValueError("Invalid packed data format")
|
|
728
|
+
|
|
729
|
+
type_char_b = data[0:1]
|
|
730
|
+
if type_char_b == b"P":
|
|
731
|
+
type_ = "pydantic"
|
|
732
|
+
elif type_char_b == b"p":
|
|
733
|
+
type_ = "python"
|
|
734
|
+
elif type_char_b == b"T":
|
|
735
|
+
type_ = "collection_tuple"
|
|
736
|
+
elif type_char_b == b"S":
|
|
737
|
+
type_ = "collection_sequence"
|
|
738
|
+
elif type_char_b == b"M":
|
|
739
|
+
type_ = "collection_mapping"
|
|
740
|
+
else:
|
|
741
|
+
raise ValueError("Unknown type character in packed data")
|
|
742
|
+
|
|
743
|
+
len_end = data.index(b"|", 2)
|
|
744
|
+
payload_len = int.from_bytes(data[2:len_end], "big")
|
|
745
|
+
payload_b = data[len_end + 1 : len_end + 1 + payload_len]
|
|
746
|
+
remaining_b = (
|
|
747
|
+
data[len_end + 1 + payload_len :]
|
|
748
|
+
if len_end + 1 + payload_len < len(data)
|
|
749
|
+
else None
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
return type_, payload_b, remaining_b
|
|
753
|
+
|
|
754
|
+
if len(data) < len("T|N") or data[1] != "|":
|
|
755
|
+
raise ValueError("Invalid packed data format")
|
|
756
|
+
|
|
757
|
+
type_char_s = data[0]
|
|
758
|
+
if type_char_s == "P":
|
|
759
|
+
type_ = "pydantic"
|
|
760
|
+
elif type_char_s == "p":
|
|
761
|
+
type_ = "python"
|
|
762
|
+
elif type_char_s == "S":
|
|
763
|
+
type_ = "collection_sequence"
|
|
764
|
+
elif type_char_s == "M":
|
|
765
|
+
type_ = "collection_mapping"
|
|
766
|
+
else:
|
|
767
|
+
raise ValueError("Unknown type character in packed data")
|
|
768
|
+
|
|
769
|
+
len_end = data.index("|", 2)
|
|
770
|
+
payload_len = int(data[2:len_end])
|
|
771
|
+
payload_s = data[len_end + 1 : len_end + 1 + payload_len]
|
|
772
|
+
remaining_s = (
|
|
773
|
+
data[len_end + 1 + payload_len :]
|
|
774
|
+
if len_end + 1 + payload_len < len(data)
|
|
775
|
+
else None
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
return type_, payload_s, remaining_s
|