guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a169__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (115) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +452 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +150 -317
  9. guidellm/benchmark/entrypoints.py +467 -128
  10. guidellm/benchmark/output.py +519 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2086 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +144 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +194 -0
  24. guidellm/data/deserializers/synthetic.py +348 -0
  25. guidellm/data/loaders.py +149 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +404 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +31 -0
  30. guidellm/data/processor.py +31 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +6 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/extras/__init__.py +4 -0
  35. guidellm/extras/audio.py +215 -0
  36. guidellm/extras/vision.py +242 -0
  37. guidellm/logger.py +2 -2
  38. guidellm/mock_server/__init__.py +8 -0
  39. guidellm/mock_server/config.py +84 -0
  40. guidellm/mock_server/handlers/__init__.py +17 -0
  41. guidellm/mock_server/handlers/chat_completions.py +280 -0
  42. guidellm/mock_server/handlers/completions.py +280 -0
  43. guidellm/mock_server/handlers/tokenizer.py +142 -0
  44. guidellm/mock_server/models.py +510 -0
  45. guidellm/mock_server/server.py +168 -0
  46. guidellm/mock_server/utils.py +302 -0
  47. guidellm/preprocess/dataset.py +23 -26
  48. guidellm/presentation/builder.py +2 -2
  49. guidellm/presentation/data_models.py +25 -21
  50. guidellm/presentation/injector.py +2 -3
  51. guidellm/scheduler/__init__.py +65 -26
  52. guidellm/scheduler/constraints.py +1035 -0
  53. guidellm/scheduler/environments.py +252 -0
  54. guidellm/scheduler/scheduler.py +140 -368
  55. guidellm/scheduler/schemas.py +272 -0
  56. guidellm/scheduler/strategies.py +519 -0
  57. guidellm/scheduler/worker.py +391 -420
  58. guidellm/scheduler/worker_group.py +707 -0
  59. guidellm/schemas/__init__.py +31 -0
  60. guidellm/schemas/info.py +159 -0
  61. guidellm/schemas/request.py +226 -0
  62. guidellm/schemas/response.py +119 -0
  63. guidellm/schemas/stats.py +228 -0
  64. guidellm/{config.py → settings.py} +32 -21
  65. guidellm/utils/__init__.py +95 -8
  66. guidellm/utils/auto_importer.py +98 -0
  67. guidellm/utils/cli.py +71 -2
  68. guidellm/utils/console.py +183 -0
  69. guidellm/utils/encoding.py +778 -0
  70. guidellm/utils/functions.py +134 -0
  71. guidellm/utils/hf_datasets.py +1 -2
  72. guidellm/utils/hf_transformers.py +4 -4
  73. guidellm/utils/imports.py +9 -0
  74. guidellm/utils/messaging.py +1118 -0
  75. guidellm/utils/mixins.py +115 -0
  76. guidellm/utils/pydantic_utils.py +411 -0
  77. guidellm/utils/random.py +3 -4
  78. guidellm/utils/registry.py +220 -0
  79. guidellm/utils/singleton.py +133 -0
  80. guidellm/{objects → utils}/statistics.py +341 -247
  81. guidellm/utils/synchronous.py +159 -0
  82. guidellm/utils/text.py +163 -50
  83. guidellm/utils/typing.py +41 -0
  84. guidellm/version.py +1 -1
  85. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/METADATA +33 -10
  86. guidellm-0.4.0a169.dist-info/RECORD +95 -0
  87. guidellm/backend/__init__.py +0 -23
  88. guidellm/backend/backend.py +0 -259
  89. guidellm/backend/openai.py +0 -705
  90. guidellm/backend/response.py +0 -136
  91. guidellm/benchmark/aggregator.py +0 -760
  92. guidellm/benchmark/benchmark.py +0 -837
  93. guidellm/benchmark/scenario.py +0 -104
  94. guidellm/data/prideandprejudice.txt.gz +0 -0
  95. guidellm/dataset/__init__.py +0 -22
  96. guidellm/dataset/creator.py +0 -213
  97. guidellm/dataset/entrypoints.py +0 -42
  98. guidellm/dataset/file.py +0 -92
  99. guidellm/dataset/hf_datasets.py +0 -62
  100. guidellm/dataset/in_memory.py +0 -132
  101. guidellm/dataset/synthetic.py +0 -287
  102. guidellm/objects/__init__.py +0 -18
  103. guidellm/objects/pydantic.py +0 -89
  104. guidellm/request/__init__.py +0 -18
  105. guidellm/request/loader.py +0 -284
  106. guidellm/request/request.py +0 -79
  107. guidellm/request/types.py +0 -10
  108. guidellm/scheduler/queues.py +0 -25
  109. guidellm/scheduler/result.py +0 -155
  110. guidellm/scheduler/strategy.py +0 -495
  111. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  112. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/WHEEL +0 -0
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/entry_points.txt +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/licenses/LICENSE +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a169.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,6 @@
1
+ from .dataset import DEFAULT_SPLITS, resolve_dataset_split
2
+
3
+ __all__ = [
4
+ "DEFAULT_SPLITS",
5
+ "resolve_dataset_split",
6
+ ]
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
6
+
7
+ __all__ = ["DEFAULT_SPLITS", "resolve_dataset_split"]
8
+
9
+
10
+ DEFAULT_SPLITS: dict[Literal["train", "calib", "val", "test"], list[str]] = {
11
+ "train": [
12
+ "train",
13
+ "training",
14
+ "train_set",
15
+ "training_set",
16
+ "train_dataset",
17
+ "training_dataset",
18
+ "train_data",
19
+ "training_data",
20
+ "pretrain",
21
+ "pretrain_set",
22
+ "pretrain_dataset",
23
+ "pretrain_data",
24
+ "pretraining",
25
+ ],
26
+ "calib": [
27
+ "calibration",
28
+ "calib",
29
+ "cal",
30
+ "calibration_set",
31
+ "calib_set",
32
+ "cal_set",
33
+ "calibration_dataset",
34
+ "calib_dataset",
35
+ "cal_set",
36
+ "calibration_data",
37
+ "calib_data",
38
+ "cal_data",
39
+ ],
40
+ "val": [
41
+ "validation",
42
+ "val",
43
+ "valid",
44
+ "validation_set",
45
+ "val_set",
46
+ "validation_dataset",
47
+ "val_dataset",
48
+ "validation_data",
49
+ "val_data",
50
+ "dev",
51
+ "dev_set",
52
+ "dev_dataset",
53
+ "dev_data",
54
+ ],
55
+ "test": [
56
+ "test",
57
+ "testing",
58
+ "test_set",
59
+ "testing_set",
60
+ "test_dataset",
61
+ "testing_dataset",
62
+ "test_data",
63
+ "testing_data",
64
+ "eval",
65
+ "eval_set",
66
+ "eval_dataset",
67
+ "eval_data",
68
+ ],
69
+ }
70
+
71
+
72
+ def resolve_dataset_split(
73
+ dataset: Dataset | IterableDataset | DatasetDict | IterableDatasetDict,
74
+ split: str | None = None,
75
+ ) -> Dataset | IterableDataset:
76
+ if split is not None and isinstance(dataset, DatasetDict | IterableDatasetDict):
77
+ if split in dataset:
78
+ return dataset[split]
79
+
80
+ raise ValueError(f"Requested split '{split}' not found in dataset: {dataset}.")
81
+ elif split is not None:
82
+ raise ValueError(
83
+ f"Requested split '{split}' but dataset has no splits: {dataset}."
84
+ )
85
+
86
+ if isinstance(dataset, Dataset | IterableDataset):
87
+ return dataset
88
+
89
+ for _, default_splits in DEFAULT_SPLITS.items():
90
+ for default_split in default_splits:
91
+ if default_split in dataset:
92
+ return dataset[default_split]
93
+
94
+ return dataset[list(dataset.keys())[0]]
@@ -0,0 +1,4 @@
1
+ """
2
+ Code that depends on optional dependencies.
3
+ Each submodule should be deferred imported.
4
+ """
@@ -0,0 +1,215 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ from pathlib import Path
5
+ from typing import Any, Literal
6
+
7
+ import httpx
8
+ import numpy as np
9
+ import torch
10
+
11
+ try:
12
+ from torchcodec import AudioSamples
13
+ from torchcodec.decoders import AudioDecoder
14
+ from torchcodec.encoders import AudioEncoder
15
+ except ImportError as e:
16
+ raise ImportError("Please install guidellm[audio] to use audio features") from e
17
+
18
+ __all__ = [
19
+ "encode_audio",
20
+ "is_url",
21
+ ]
22
+
23
+
24
+ def is_url(text: Any) -> bool:
25
+ return isinstance(text, str) and text.startswith(("http://", "https://"))
26
+
27
+
28
+ def encode_audio(
29
+ audio: AudioDecoder
30
+ | bytes
31
+ | str
32
+ | Path
33
+ | np.ndarray
34
+ | torch.Tensor
35
+ | dict[str, Any],
36
+ b64encode: bool = False,
37
+ sample_rate: int | None = None,
38
+ file_name: str = "audio.wav",
39
+ encode_sample_rate: int = 16000,
40
+ max_duration: float | None = None,
41
+ mono: bool = True,
42
+ audio_format: str = "mp3",
43
+ bitrate: str = "64k",
44
+ ) -> dict[
45
+ Literal[
46
+ "type",
47
+ "audio",
48
+ "format",
49
+ "mimetype",
50
+ "audio_samples",
51
+ "audio_seconds",
52
+ "audio_bytes",
53
+ "file_name",
54
+ ],
55
+ str | int | float | bytes | None,
56
+ ]:
57
+ """Decode audio (if necessary) and re-encode to specified format."""
58
+ samples = _decode_audio(audio, sample_rate=sample_rate, max_duration=max_duration)
59
+
60
+ bitrate_val = (
61
+ int(bitrate.rstrip("k")) * 1000 if bitrate.endswith("k") else int(bitrate)
62
+ )
63
+ format_val = audio_format.lower()
64
+
65
+ encoded_audio = _encode_audio(
66
+ samples=samples,
67
+ resample_rate=encode_sample_rate,
68
+ bitrate=bitrate_val,
69
+ audio_format=format_val,
70
+ mono=mono,
71
+ )
72
+
73
+ return {
74
+ "type": "audio_base64" if b64encode else "audio_file",
75
+ "audio": (
76
+ base64.b64encode(encoded_audio).decode("utf-8")
77
+ if b64encode
78
+ else encoded_audio
79
+ ),
80
+ "file_name": get_file_name(audio)
81
+ if isinstance(audio, str | Path)
82
+ else file_name,
83
+ "format": audio_format,
84
+ "mimetype": f"audio/{format_val}",
85
+ "audio_samples": samples.sample_rate,
86
+ "audio_seconds": samples.duration_seconds,
87
+ "audio_bytes": len(encoded_audio),
88
+ }
89
+
90
+
91
+ def _decode_audio( # noqa: C901, PLR0912
92
+ audio: AudioDecoder
93
+ | bytes
94
+ | str
95
+ | Path
96
+ | np.ndarray
97
+ | torch.Tensor
98
+ | dict[str, Any],
99
+ sample_rate: int | None = None,
100
+ max_duration: float | None = None,
101
+ ) -> AudioSamples:
102
+ """Decode audio from various input types into AudioSamples."""
103
+ # If input is a dict, unwrap it into a function call
104
+ if isinstance(audio, dict):
105
+ sample_rate = audio.get("sample_rate", audio.get("sampling_rate", sample_rate))
106
+ if "data" not in audio and "url" not in audio:
107
+ raise ValueError(
108
+ f"Audio dict must contain either 'data' or 'url' keys, got {audio}"
109
+ )
110
+ return _decode_audio(
111
+ audio=audio.get("data") or audio.get("url"),
112
+ sample_rate=sample_rate,
113
+ max_duration=max_duration,
114
+ )
115
+
116
+ # Convert numpy array to torch tensor and re-call
117
+ if isinstance(audio, np.ndarray):
118
+ return _decode_audio(
119
+ audio=torch.from_numpy(audio),
120
+ sample_rate=sample_rate,
121
+ max_duration=max_duration,
122
+ )
123
+
124
+ samples: AudioSamples
125
+
126
+ data: torch.Tensor | bytes
127
+ # HF datasets return AudioDecoder for audio column
128
+ if isinstance(audio, AudioDecoder):
129
+ samples = audio.get_samples_played_in_range(stop_seconds=max_duration)
130
+ elif isinstance(audio, torch.Tensor):
131
+ # If float stream assume decoded audio
132
+ if torch.is_floating_point(audio):
133
+ if sample_rate is None:
134
+ raise ValueError("Sample rate must be set for decoded audio")
135
+
136
+ full_duration = audio.shape[1] / sample_rate
137
+ # If max_duration is set, trim the audio to that duration
138
+ if max_duration is not None:
139
+ num_samples = int(max_duration * sample_rate)
140
+ duration = min(max_duration, full_duration)
141
+ data = audio[:, :num_samples]
142
+ else:
143
+ duration = full_duration
144
+ data = audio
145
+
146
+ samples = AudioSamples(
147
+ data=data,
148
+ pts_seconds=0.0,
149
+ duration_seconds=duration,
150
+ sample_rate=sample_rate,
151
+ )
152
+ # If bytes tensor assume encoded audio
153
+ elif audio.dtype == torch.uint8:
154
+ decoder = AudioDecoder(
155
+ source=audio,
156
+ sample_rate=sample_rate,
157
+ )
158
+ samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
159
+
160
+ else:
161
+ raise ValueError(f"Unsupported audio type: {type(audio)}")
162
+
163
+ # If bytes, assume encoded audio
164
+ elif isinstance(audio, bytes):
165
+ decoder = AudioDecoder(
166
+ source=audio,
167
+ sample_rate=sample_rate,
168
+ )
169
+ samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
170
+
171
+ # If str or Path, assume file path or URL to encoded audio
172
+ elif isinstance(audio, str | Path):
173
+ if isinstance(audio, str) and is_url(audio):
174
+ response = httpx.get(audio)
175
+ response.raise_for_status()
176
+ data = response.content
177
+ else:
178
+ if not Path(audio).exists():
179
+ raise ValueError(f"Audio file does not exist: {audio}")
180
+ data = Path(audio).read_bytes()
181
+ decoder = AudioDecoder(
182
+ source=data,
183
+ )
184
+ samples = decoder.get_samples_played_in_range(stop_seconds=max_duration)
185
+ else:
186
+ raise ValueError(f"Unsupported audio type: {type(audio)}")
187
+
188
+ return samples
189
+
190
+
191
+ def _encode_audio(
192
+ samples: AudioSamples,
193
+ resample_rate: int | None = None,
194
+ bitrate: int = 64000,
195
+ audio_format: str = "mp3",
196
+ mono: bool = True,
197
+ ) -> bytes:
198
+ encoder = AudioEncoder(
199
+ samples=samples.data,
200
+ sample_rate=samples.sample_rate,
201
+ )
202
+
203
+ audio_tensor = encoder.to_tensor(
204
+ format=audio_format,
205
+ bit_rate=bitrate if audio_format == "mp3" else None,
206
+ num_channels=1 if mono else None,
207
+ sample_rate=resample_rate,
208
+ )
209
+
210
+ return audio_tensor.numpy().tobytes()
211
+
212
+
213
+ def get_file_name(path: Path | str) -> str:
214
+ """Get file name from path."""
215
+ return Path(path).name
@@ -0,0 +1,242 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import io
5
+ from pathlib import Path
6
+ from typing import Any, Literal
7
+
8
+ import httpx
9
+ import numpy as np
10
+
11
+ try:
12
+ from PIL import Image as PILImage
13
+ except ImportError as e:
14
+ raise ImportError(
15
+ "Please install guidellm[vision] to use image/video features"
16
+ ) from e
17
+
18
+ __all__ = [
19
+ "encode_image",
20
+ "encode_video",
21
+ "get_file_format",
22
+ "is_url",
23
+ "resize_image",
24
+ ]
25
+
26
+
27
+ def is_url(text: Any) -> bool:
28
+ return isinstance(text, str) and text.startswith(("http://", "https://"))
29
+
30
+
31
+ def encode_image(
32
+ image: bytes | str | Path | np.ndarray | PILImage.Image,
33
+ width: int | None = None,
34
+ height: int | None = None,
35
+ max_size: int | None = None,
36
+ max_width: int | None = None,
37
+ max_height: int | None = None,
38
+ encode_type: Literal["base64", "url"] | None = "base64",
39
+ ) -> dict[Literal["type", "image", "image_pixels", "image_bytes"], str | int | None]:
40
+ """
41
+ Input image types:
42
+ - bytes: raw image bytes, decoded with Pillow
43
+ - str: file path on disk, url, or already base64 encoded image string
44
+ - pathlib.Path: file path on disk
45
+ - np.ndarray: image array, decoded with Pillow
46
+ - PIL.Image.Image: Pillow image
47
+ - datasets.Image: HuggingFace datasets Image object
48
+
49
+ max_size: maximum size of the longest edge of the image
50
+ max_width: maximum width of the image
51
+ max_height: maximum height of the image
52
+
53
+ encode_type: None to return the supported format
54
+ (url for url, base64 string for others)
55
+ "base64" to return base64 encoded string (or download URL and encode)
56
+ "url" to return url (only if input is url, otherwise fails)
57
+
58
+ Returns a str of either:
59
+ - image url
60
+ - "data:image/{type};base64, {data}" string
61
+ """
62
+ if isinstance(image, str) and is_url(image):
63
+ if encode_type == "base64":
64
+ response = httpx.get(image)
65
+ response.raise_for_status()
66
+ return encode_image(
67
+ image=response.content,
68
+ max_size=max_size,
69
+ max_width=max_width,
70
+ max_height=max_height,
71
+ encode_type="base64",
72
+ )
73
+
74
+ if any([width, height, max_size, max_width, max_height]):
75
+ raise ValueError(f"Cannot resize image {image} when encode_type is 'url'")
76
+
77
+ return {
78
+ "type": "image_url",
79
+ "image": image,
80
+ "image_pixels": None,
81
+ "image_bytes": None,
82
+ }
83
+
84
+ decoded_image: PILImage.Image
85
+
86
+ if isinstance(image, bytes):
87
+ decoded_image = PILImage.open(io.BytesIO(image))
88
+ elif isinstance(image, str) and image.startswith("data:image/"):
89
+ _, encoded = image.split(",", 1)
90
+ image_data = base64.b64decode(encoded)
91
+ decoded_image = PILImage.open(io.BytesIO(image_data))
92
+ elif isinstance(image, str | Path):
93
+ decoded_image = PILImage.open(image)
94
+ elif isinstance(image, np.ndarray):
95
+ decoded_image = PILImage.fromarray(image)
96
+ elif isinstance(image, PILImage.Image):
97
+ decoded_image = image
98
+ else:
99
+ raise ValueError(f"Unsupported image type: {type(image)} for {image}")
100
+
101
+ output_image = resize_image(
102
+ decoded_image,
103
+ width=width,
104
+ height=height,
105
+ max_width=max_width,
106
+ max_height=max_height,
107
+ max_size=max_size,
108
+ )
109
+ if output_image.mode != "RGB":
110
+ output_image = output_image.convert("RGB")
111
+
112
+ buffer = io.BytesIO()
113
+ output_image.save(buffer, format="JPEG")
114
+ image_bytes = buffer.getvalue()
115
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
116
+
117
+ return {
118
+ "type": "image_base64",
119
+ "image": f"data:image/jpeg;base64,{image_base64}",
120
+ "image_pixels": output_image.width * output_image.height,
121
+ "image_bytes": len(image_bytes),
122
+ }
123
+
124
+
125
+ def resize_image(
126
+ image: PILImage.Image,
127
+ width: int | None = None,
128
+ height: int | None = None,
129
+ max_width: int | None = None,
130
+ max_height: int | None = None,
131
+ max_size: int | None = None,
132
+ ) -> PILImage.Image:
133
+ if not isinstance(image, PILImage.Image):
134
+ raise ValueError(f"Unsupported image type: {type(image)}")
135
+
136
+ if width is not None and height is not None:
137
+ return image.resize((width, height), PILImage.Resampling.BILINEAR)
138
+
139
+ orig_w, orig_h = image.size
140
+ aspect = orig_w / orig_h
141
+
142
+ if width is not None:
143
+ target_w = width
144
+ target_h = round(width / aspect)
145
+ elif height is not None:
146
+ target_h = height
147
+ target_w = round(height * aspect)
148
+ else:
149
+ target_w, target_h = orig_w, orig_h
150
+
151
+ # Normalize max_size → max_width/max_height
152
+ if max_size is not None:
153
+ max_width = max_width or max_size
154
+ max_height = max_height or max_size
155
+
156
+ # Apply max constraints (preserve aspect ratio)
157
+ if max_width or max_height:
158
+ scale_w = max_width / target_w if max_width else 1.0
159
+ scale_h = max_height / target_h if max_height else 1.0
160
+ scale = min(scale_w, scale_h, 1.0) # never upscale
161
+ target_w = round(target_w * scale)
162
+ target_h = round(target_h * scale)
163
+
164
+ if (target_w, target_h) != (orig_w, orig_h):
165
+ image = image.resize((target_w, target_h), PILImage.Resampling.BILINEAR)
166
+
167
+ return image
168
+
169
+
170
+ def encode_video(
171
+ video: bytes | str | Path,
172
+ encode_type: Literal["base64", "url"] | None = "base64",
173
+ ) -> dict[
174
+ Literal["type", "video", "video_frames", "video_seconds", "video_bytes"],
175
+ str | int | float | None,
176
+ ]:
177
+ """
178
+ Input video types:
179
+ - bytes: raw video bytes
180
+ - str: file path on disk, url, or already base64 encoded video string
181
+ - pathlib.Path: file path on disk
182
+ - datasets.Video: HuggingFace datasets Video object
183
+
184
+ encode_type: None to return the supported format
185
+ (url for url, base64 string for others)
186
+ "base64" to return base64 encoded string (or download URL and encode)
187
+ "url" to return url (only if input is url, otherwise fails)
188
+
189
+ Returns a str of either:
190
+ - video url
191
+ - "data:video/{type};base64, {data}" string
192
+ """
193
+ if isinstance(video, str) and is_url(video):
194
+ if encode_type == "base64":
195
+ response = httpx.get(video)
196
+ response.raise_for_status()
197
+ return encode_video(video=response.content, encode_type="base64")
198
+
199
+ return {
200
+ "type": "video_url",
201
+ "video": video,
202
+ "video_frames": None,
203
+ "video_seconds": None,
204
+ "video_bytes": None,
205
+ }
206
+
207
+ if isinstance(video, str) and video.startswith("data:video/"):
208
+ data_str = video.split(",", 1)[1]
209
+
210
+ return {
211
+ "type": "video_base64",
212
+ "video": video,
213
+ "video_frames": None,
214
+ "video_seconds": None,
215
+ "video_bytes": len(data_str) * 3 // 4, # base64 to bytes
216
+ }
217
+
218
+ if isinstance(video, str | Path):
219
+ path = Path(video)
220
+ video_bytes = path.read_bytes()
221
+ video_format = get_file_format(path)
222
+ elif isinstance(video, bytes):
223
+ video_bytes = video
224
+ video_format = "unknown"
225
+ else:
226
+ raise ValueError(f"Unsupported video type: {type(video)} for {video}")
227
+
228
+ video_base64 = base64.b64encode(video_bytes).decode("utf-8")
229
+
230
+ return {
231
+ "type": "video_base64",
232
+ "video": f"data:video/{video_format};base64,{video_base64}",
233
+ "video_frames": None,
234
+ "video_seconds": None,
235
+ "video_bytes": len(video_bytes),
236
+ }
237
+
238
+
239
+ def get_file_format(path: Path | str) -> str:
240
+ """Get file format from path extension."""
241
+ suffix = Path(path).suffix.lower()
242
+ return suffix[1:] if suffix.startswith(".") else "unknown"
guidellm/logger.py CHANGED
@@ -41,7 +41,7 @@ import sys
41
41
 
42
42
  from loguru import logger
43
43
 
44
- from guidellm.config import LoggingSettings, settings
44
+ from guidellm.settings import LoggingSettings, settings
45
45
 
46
46
  __all__ = ["configure_logger", "logger"]
47
47
 
@@ -72,7 +72,7 @@ def configure_logger(config: LoggingSettings = settings.logging):
72
72
  sys.stdout,
73
73
  level=config.console_log_level.upper(),
74
74
  format="<green>{time:YY-MM-DD HH:mm:ss}</green>|<level>{level: <8}</level> \
75
- |<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
75
+ |<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
76
76
  )
77
77
 
78
78
  if config.log_file or config.log_file_level:
@@ -0,0 +1,8 @@
1
+ """
2
+ GuideLLM Mock Server for OpenAI and vLLM API compatibility.
3
+ """
4
+
5
+ from .config import MockServerConfig
6
+ from .server import MockServer
7
+
8
+ __all__ = ["MockServer", "MockServerConfig"]
@@ -0,0 +1,84 @@
1
+ """
2
+ Configuration settings for the mock server component.
3
+
4
+ Provides centralized configuration management for mock server behavior including
5
+ network binding, model identification, response timing characteristics, and token
6
+ generation parameters. Supports environment variable configuration for deployment
7
+ flexibility with automatic validation through Pydantic settings.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pydantic import Field
13
+ from pydantic_settings import BaseSettings
14
+
15
+ __all__ = ["MockServerConfig"]
16
+
17
+
18
+ class MockServerConfig(BaseSettings):
19
+ """
20
+ Configuration settings for mock server behavior and deployment.
21
+
22
+ Centralizes all configurable parameters for mock server operation including
23
+ network settings, model identification, response timing characteristics, and
24
+ token generation behavior. Environment variables with GUIDELLM_MOCK_SERVER_
25
+ prefix override default values for deployment flexibility.
26
+
27
+ Example:
28
+ ::
29
+ config = MockServerConfig(host="0.0.0.0", port=8080, model="custom-model")
30
+ # Use with environment variables:
31
+ # GUIDELLM_MOCK_SERVER_HOST=127.0.0.1 GUIDELLM_MOCK_SERVER_PORT=9000
32
+ """
33
+
34
+ host: str = Field(
35
+ default="127.0.0.1", description="Host address to bind the server to"
36
+ )
37
+ port: int = Field(default=8000, description="Port number to bind the server to")
38
+ workers: int = Field(default=1, description="Number of worker processes to spawn")
39
+ model: str = Field(
40
+ default="llama-3.1-8b-instruct",
41
+ description="Model name to present in API responses",
42
+ )
43
+ processor: str | None = Field(
44
+ default=None,
45
+ description=(
46
+ "Processor type to use for token stats, tokenize, and detokenize. "
47
+ "If None, a mock one is created."
48
+ ),
49
+ )
50
+ request_latency: float = Field(
51
+ default=3.0,
52
+ description="Base request latency in seconds for non-streaming responses",
53
+ )
54
+ request_latency_std: float = Field(
55
+ default=0.0,
56
+ description="Standard deviation for request latency variation",
57
+ )
58
+ ttft_ms: float = Field(
59
+ default=150.0,
60
+ description="Time to first token in milliseconds for streaming responses",
61
+ )
62
+ ttft_ms_std: float = Field(
63
+ default=0.0,
64
+ description="Standard deviation for time to first token variation",
65
+ )
66
+ itl_ms: float = Field(
67
+ default=10.0,
68
+ description="Inter-token latency in milliseconds for streaming responses",
69
+ )
70
+ itl_ms_std: float = Field(
71
+ default=0.0,
72
+ description="Standard deviation for inter-token latency variation",
73
+ )
74
+ output_tokens: int = Field(
75
+ default=128, description="Number of output tokens to generate in responses"
76
+ )
77
+ output_tokens_std: float = Field(
78
+ default=0.0,
79
+ description="Standard deviation for output token count variation",
80
+ )
81
+
82
+ class Config:
83
+ env_prefix = "GUIDELLM_MOCK_SERVER_"
84
+ case_sensitive = False
@@ -0,0 +1,17 @@
1
+ """
2
+ HTTP request handlers for the GuideLLM mock server.
3
+
4
+ This module exposes request handlers that implement OpenAI-compatible API endpoints
5
+ for the mock server. The handlers provide realistic LLM simulation capabilities
6
+ including chat completions, legacy completions, and tokenization services with
7
+ configurable timing characteristics, token counting, and proper error handling to
8
+ support comprehensive benchmarking and testing scenarios.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from .chat_completions import ChatCompletionsHandler
14
+ from .completions import CompletionsHandler
15
+ from .tokenizer import TokenizerHandler
16
+
17
+ __all__ = ["ChatCompletionsHandler", "CompletionsHandler", "TokenizerHandler"]