guidellm 0.3.1__py3-none-any.whl → 0.6.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +524 -255
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +109 -0
  5. guidellm/backends/openai.py +340 -0
  6. guidellm/backends/response_handlers.py +428 -0
  7. guidellm/benchmark/__init__.py +69 -39
  8. guidellm/benchmark/benchmarker.py +160 -316
  9. guidellm/benchmark/entrypoints.py +560 -127
  10. guidellm/benchmark/outputs/__init__.py +24 -0
  11. guidellm/benchmark/outputs/console.py +633 -0
  12. guidellm/benchmark/outputs/csv.py +721 -0
  13. guidellm/benchmark/outputs/html.py +473 -0
  14. guidellm/benchmark/outputs/output.py +169 -0
  15. guidellm/benchmark/outputs/serialized.py +69 -0
  16. guidellm/benchmark/profiles.py +718 -0
  17. guidellm/benchmark/progress.py +553 -556
  18. guidellm/benchmark/scenarios/__init__.py +40 -0
  19. guidellm/benchmark/scenarios/chat.json +6 -0
  20. guidellm/benchmark/scenarios/rag.json +6 -0
  21. guidellm/benchmark/schemas/__init__.py +66 -0
  22. guidellm/benchmark/schemas/base.py +402 -0
  23. guidellm/benchmark/schemas/generative/__init__.py +55 -0
  24. guidellm/benchmark/schemas/generative/accumulator.py +841 -0
  25. guidellm/benchmark/schemas/generative/benchmark.py +163 -0
  26. guidellm/benchmark/schemas/generative/entrypoints.py +381 -0
  27. guidellm/benchmark/schemas/generative/metrics.py +927 -0
  28. guidellm/benchmark/schemas/generative/report.py +158 -0
  29. guidellm/data/__init__.py +34 -4
  30. guidellm/data/builders.py +541 -0
  31. guidellm/data/collators.py +16 -0
  32. guidellm/data/config.py +120 -0
  33. guidellm/data/deserializers/__init__.py +49 -0
  34. guidellm/data/deserializers/deserializer.py +141 -0
  35. guidellm/data/deserializers/file.py +223 -0
  36. guidellm/data/deserializers/huggingface.py +94 -0
  37. guidellm/data/deserializers/memory.py +194 -0
  38. guidellm/data/deserializers/synthetic.py +246 -0
  39. guidellm/data/entrypoints.py +52 -0
  40. guidellm/data/loaders.py +190 -0
  41. guidellm/data/preprocessors/__init__.py +27 -0
  42. guidellm/data/preprocessors/formatters.py +410 -0
  43. guidellm/data/preprocessors/mappers.py +196 -0
  44. guidellm/data/preprocessors/preprocessor.py +30 -0
  45. guidellm/data/processor.py +29 -0
  46. guidellm/data/schemas.py +175 -0
  47. guidellm/data/utils/__init__.py +6 -0
  48. guidellm/data/utils/dataset.py +94 -0
  49. guidellm/extras/__init__.py +4 -0
  50. guidellm/extras/audio.py +220 -0
  51. guidellm/extras/vision.py +242 -0
  52. guidellm/logger.py +2 -2
  53. guidellm/mock_server/__init__.py +8 -0
  54. guidellm/mock_server/config.py +84 -0
  55. guidellm/mock_server/handlers/__init__.py +17 -0
  56. guidellm/mock_server/handlers/chat_completions.py +280 -0
  57. guidellm/mock_server/handlers/completions.py +280 -0
  58. guidellm/mock_server/handlers/tokenizer.py +142 -0
  59. guidellm/mock_server/models.py +510 -0
  60. guidellm/mock_server/server.py +238 -0
  61. guidellm/mock_server/utils.py +302 -0
  62. guidellm/scheduler/__init__.py +69 -26
  63. guidellm/scheduler/constraints/__init__.py +49 -0
  64. guidellm/scheduler/constraints/constraint.py +325 -0
  65. guidellm/scheduler/constraints/error.py +411 -0
  66. guidellm/scheduler/constraints/factory.py +182 -0
  67. guidellm/scheduler/constraints/request.py +312 -0
  68. guidellm/scheduler/constraints/saturation.py +722 -0
  69. guidellm/scheduler/environments.py +252 -0
  70. guidellm/scheduler/scheduler.py +137 -368
  71. guidellm/scheduler/schemas.py +358 -0
  72. guidellm/scheduler/strategies.py +617 -0
  73. guidellm/scheduler/worker.py +413 -419
  74. guidellm/scheduler/worker_group.py +712 -0
  75. guidellm/schemas/__init__.py +65 -0
  76. guidellm/schemas/base.py +417 -0
  77. guidellm/schemas/info.py +188 -0
  78. guidellm/schemas/request.py +235 -0
  79. guidellm/schemas/request_stats.py +349 -0
  80. guidellm/schemas/response.py +124 -0
  81. guidellm/schemas/statistics.py +1018 -0
  82. guidellm/{config.py → settings.py} +31 -24
  83. guidellm/utils/__init__.py +71 -8
  84. guidellm/utils/auto_importer.py +98 -0
  85. guidellm/utils/cli.py +132 -5
  86. guidellm/utils/console.py +566 -0
  87. guidellm/utils/encoding.py +778 -0
  88. guidellm/utils/functions.py +159 -0
  89. guidellm/utils/hf_datasets.py +1 -2
  90. guidellm/utils/hf_transformers.py +4 -4
  91. guidellm/utils/imports.py +9 -0
  92. guidellm/utils/messaging.py +1118 -0
  93. guidellm/utils/mixins.py +115 -0
  94. guidellm/utils/random.py +3 -4
  95. guidellm/utils/registry.py +220 -0
  96. guidellm/utils/singleton.py +133 -0
  97. guidellm/utils/synchronous.py +159 -0
  98. guidellm/utils/text.py +163 -50
  99. guidellm/utils/typing.py +41 -0
  100. guidellm/version.py +2 -2
  101. guidellm-0.6.0a5.dist-info/METADATA +364 -0
  102. guidellm-0.6.0a5.dist-info/RECORD +109 -0
  103. guidellm/backend/__init__.py +0 -23
  104. guidellm/backend/backend.py +0 -259
  105. guidellm/backend/openai.py +0 -708
  106. guidellm/backend/response.py +0 -136
  107. guidellm/benchmark/aggregator.py +0 -760
  108. guidellm/benchmark/benchmark.py +0 -837
  109. guidellm/benchmark/output.py +0 -997
  110. guidellm/benchmark/profile.py +0 -409
  111. guidellm/benchmark/scenario.py +0 -104
  112. guidellm/data/prideandprejudice.txt.gz +0 -0
  113. guidellm/dataset/__init__.py +0 -22
  114. guidellm/dataset/creator.py +0 -213
  115. guidellm/dataset/entrypoints.py +0 -42
  116. guidellm/dataset/file.py +0 -92
  117. guidellm/dataset/hf_datasets.py +0 -62
  118. guidellm/dataset/in_memory.py +0 -132
  119. guidellm/dataset/synthetic.py +0 -287
  120. guidellm/objects/__init__.py +0 -18
  121. guidellm/objects/pydantic.py +0 -89
  122. guidellm/objects/statistics.py +0 -953
  123. guidellm/preprocess/__init__.py +0 -3
  124. guidellm/preprocess/dataset.py +0 -374
  125. guidellm/presentation/__init__.py +0 -28
  126. guidellm/presentation/builder.py +0 -27
  127. guidellm/presentation/data_models.py +0 -232
  128. guidellm/presentation/injector.py +0 -66
  129. guidellm/request/__init__.py +0 -18
  130. guidellm/request/loader.py +0 -284
  131. guidellm/request/request.py +0 -79
  132. guidellm/request/types.py +0 -10
  133. guidellm/scheduler/queues.py +0 -25
  134. guidellm/scheduler/result.py +0 -155
  135. guidellm/scheduler/strategy.py +0 -495
  136. guidellm-0.3.1.dist-info/METADATA +0 -329
  137. guidellm-0.3.1.dist-info/RECORD +0 -62
  138. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/WHEEL +0 -0
  139. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/entry_points.txt +0 -0
  140. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/licenses/LICENSE +0 -0
  141. {guidellm-0.3.1.dist-info → guidellm-0.6.0a5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
1
+ Metadata-Version: 2.4
2
+ Name: guidellm
3
+ Version: 0.6.0a5
4
+ Summary: Guidance platform for deploying and managing large language models.
5
+ Author: Red Hat
6
+ License: Apache-2.0
7
+ Project-URL: homepage, https://github.com/vllm-project/guidellm
8
+ Project-URL: source, https://github.com/vllm-project/guidellm
9
+ Project-URL: issues, https://github.com/vllm-project/guidellm/issues
10
+ Project-URL: docs, https://github.com/vllm-project/guidellm/tree/main/docs
11
+ Keywords: ai,benchmarking,deep-learning,deployment,evaluation,guidance,inference,language-models,large-language-model,llm,machine-learning,model-benchmark,model-evaluation,nlp,performance,vllm
12
+ Requires-Python: <4.0,>=3.10.0
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: click~=8.3.0
16
+ Requires-Dist: culsans~=0.10.0
17
+ Requires-Dist: datasets
18
+ Requires-Dist: eval_type_backport
19
+ Requires-Dist: faker
20
+ Requires-Dist: ftfy>=6.0.0
21
+ Requires-Dist: httpx[http2]<1.0.0
22
+ Requires-Dist: loguru
23
+ Requires-Dist: msgpack
24
+ Requires-Dist: numpy>=2.0.0
25
+ Requires-Dist: protobuf
26
+ Requires-Dist: pydantic>=2.11.7
27
+ Requires-Dist: pydantic-settings>=2.0.0
28
+ Requires-Dist: pyyaml>=6.0.0
29
+ Requires-Dist: rich
30
+ Requires-Dist: sanic
31
+ Requires-Dist: tabulate
32
+ Requires-Dist: transformers
33
+ Requires-Dist: uvloop>=0.18
34
+ Requires-Dist: torch
35
+ Provides-Extra: all
36
+ Requires-Dist: guidellm[audio,openai,perf,vision]; extra == "all"
37
+ Provides-Extra: recommended
38
+ Requires-Dist: guidellm[openai,perf]; extra == "recommended"
39
+ Provides-Extra: perf
40
+ Requires-Dist: orjson; extra == "perf"
41
+ Requires-Dist: msgpack; extra == "perf"
42
+ Requires-Dist: msgspec; extra == "perf"
43
+ Requires-Dist: uvloop; extra == "perf"
44
+ Provides-Extra: openai
45
+ Requires-Dist: tiktoken>=0.11.0; extra == "openai"
46
+ Requires-Dist: blobfile>=3.1.0; extra == "openai"
47
+ Provides-Extra: audio
48
+ Requires-Dist: datasets[audio]>=4.1.0; extra == "audio"
49
+ Requires-Dist: torch==2.9.*; extra == "audio"
50
+ Requires-Dist: torchcodec==0.8.*; extra == "audio"
51
+ Provides-Extra: vision
52
+ Requires-Dist: datasets[vision]; extra == "vision"
53
+ Requires-Dist: pillow; extra == "vision"
54
+ Provides-Extra: dev
55
+ Requires-Dist: guidellm[all]; extra == "dev"
56
+ Requires-Dist: build>=1.0.0; extra == "dev"
57
+ Requires-Dist: setuptools>=61.0; extra == "dev"
58
+ Requires-Dist: setuptools-git-versioning<3,>=2.0; extra == "dev"
59
+ Requires-Dist: pre-commit~=3.5.0; extra == "dev"
60
+ Requires-Dist: scipy~=1.10; extra == "dev"
61
+ Requires-Dist: sphinx~=7.1.2; extra == "dev"
62
+ Requires-Dist: tox~=4.16.0; extra == "dev"
63
+ Requires-Dist: lorem~=0.1.1; extra == "dev"
64
+ Requires-Dist: pytest~=8.2.2; extra == "dev"
65
+ Requires-Dist: pytest-asyncio~=1.1.0; extra == "dev"
66
+ Requires-Dist: pytest-cov~=5.0.0; extra == "dev"
67
+ Requires-Dist: pytest-mock~=3.14.0; extra == "dev"
68
+ Requires-Dist: pytest-rerunfailures~=14.0; extra == "dev"
69
+ Requires-Dist: pytest-timeout~=2.4.0; extra == "dev"
70
+ Requires-Dist: respx~=0.22.0; extra == "dev"
71
+ Requires-Dist: mypy~=1.15.0; extra == "dev"
72
+ Requires-Dist: ruff~=0.11.7; extra == "dev"
73
+ Requires-Dist: mdformat~=0.7.17; extra == "dev"
74
+ Requires-Dist: mdformat-footnote~=0.1.1; extra == "dev"
75
+ Requires-Dist: mdformat-frontmatter~=2.0.8; extra == "dev"
76
+ Requires-Dist: mdformat-gfm~=0.3.6; extra == "dev"
77
+ Requires-Dist: pandas-stubs; extra == "dev"
78
+ Requires-Dist: types-PyYAML~=6.0.1; extra == "dev"
79
+ Requires-Dist: types-requests~=2.32.0; extra == "dev"
80
+ Requires-Dist: types-toml; extra == "dev"
81
+ Requires-Dist: mkdocs-linkcheck~=1.0.6; extra == "dev"
82
+ Dynamic: license-file
83
+
84
+ <p align="center">
85
+ <picture>
86
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/vllm-project/guidellm/main/docs/assets/guidellm-logo-light.png">
87
+ <img alt="GuideLLM Logo" src="https://raw.githubusercontent.com/vllm-project/guidellm/main/docs/assets/guidellm-logo-dark.png" width=55%>
88
+ </picture>
89
+ </p>
90
+
91
+ <h3 align="center">
92
+ SLO-aware Benchmarking and Evaluation Platform for Optimizing Real-World LLM Inference
93
+ </h3>
94
+
95
+ [![GitHub Release](https://img.shields.io/github/release/vllm-project/guidellm.svg?label=Version)](https://github.com/vllm-project/guidellm/releases) [![Documentation](https://img.shields.io/badge/Documentation-8A2BE2?logo=read-the-docs&logoColor=%23ffffff&color=%231BC070)](https://github.com/vllm-project/guidellm/tree/main/docs) [![License](https://img.shields.io/github/license/vllm-project/guidellm.svg)](https://github.com/vllm-project/guidellm/blob/main/LICENSE) [![PyPI Release](https://img.shields.io/pypi/v/guidellm.svg?label=PyPI%20Release)](https://pypi.python.org/pypi/guidellm) [![Python Versions](https://img.shields.io/badge/Python-3.10--3.13-orange)](https://pypi.python.org/pypi/guidellm) [![Nightly Build](https://img.shields.io/github/actions/workflow/status/vllm-project/guidellm/nightly.yml?branch=main&label=Nightly%20Build)](https://github.com/vllm-project/guidellm/actions/workflows/nightly.yml)
96
+
97
+ ## Overview
98
+
99
+ <p>
100
+ <picture>
101
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/vllm-project/guidellm/main/docs/assets/guidellm-user-flows-dark.png">
102
+ <img alt="GuideLLM User Flows" src="https://raw.githubusercontent.com/vllm-project/guidellm/main/docs/assets/guidellm-user-flows-light.png">
103
+ </picture>
104
+ </p>
105
+
106
+ **GuideLLM** is a platform for evaluating how language models perform under real workloads and configurations. It simulates end-to-end interactions with OpenAI-compatible and vLLM-native servers, generates workload patterns that reflect production usage, and produces detailed reports that help teams understand system behavior, resource needs, and operational limits. GuideLLM supports real and synthetic datasets, multimodal inputs, and flexible execution profiles, giving engineering and ML teams a consistent framework for assessing model behavior, tuning deployments, and planning capacity as their systems evolve.
107
+
108
+ ### Why GuideLLM?
109
+
110
+ GuideLLM gives teams a clear picture of performance, efficiency, and reliability when deploying LLMs in production-like environments.
111
+
112
+ - **Captures complete latency and token-level statistics for SLO-driven evaluation**, including full distributions for TTFT, ITL, and end-to-end behavior.
113
+ - **Generates realistic, configurable traffic patterns** across synchronous, concurrent, and rate-based modes, including reproducible sweeps to identify safe operating ranges.
114
+ - **Supports both real and synthetic multimodal datasets**, enabling controlled experiments and production-style evaluations in one framework.
115
+ - **Produces standardized, exportable reports for dashboards, analysis, and regression tracking**, ensuring consistency across teams and workflows.
116
+ - **Delivers high-throughput, extensible benchmarking** with multiprocessing, threading, async execution, and a flexible CLI/API for customization or quickstarts.
117
+
118
+ ### Comparisons
119
+
120
+ Many tools benchmark endpoints, not models, and miss the details that matter for LLMs. GuideLLM focuses exclusively on LLM-specific workloads, measuring TTFT, ITL, output distributions, and dataset-driven variation. It fits into everyday engineering tasks by using standard Python interfaces and HuggingFace datasets instead of custom formats or research-only pipelines. It is also built for performance, supporting high-rate load generation and accurate scheduling far beyond simple scripts or example benchmarks. The table below highlights how this approach compares to other options.
121
+
122
+ | Tool | CLI | API | High Perf | Full Metrics | Data Modalities | Data Sources | Profiles | Backends | Endpoints | Output Types |
123
+ | ---------------------------------------------------------------------------- | --- | --- | --------- | ------------ | ------------------------------ | ------------------------------------- | ------------------------------------------------------------- | ------------------------------- | ------------------------------------------------------------------------- | ------------------------ |
124
+ | GuideLLM | ✅ | ✅ | ✅ | ✅ | Text, Image, Audio, Video | HuggingFace, Files, Synthetic, Custom | Synchronous, Concurrent, Throughput, Constant, Poisson, Sweep | OpenAI-compatible | /completions, /chat/completions, /audio/translation, /audio/transcription | console, json, csv, html |
125
+ | [inference-perf](https://github.com/kubernetes-sigs/inference-perf) | ✅ | ❌ | ✅ | ❌ | Text | Synthetic, Specific Datasets | Concurrent, Constant, Poisson, Sweep | OpenAI-compatible | /completions, /chat/completions | json, png |
126
+ | [genai-bench](https://github.com/sgl-project/genai-bench) | ✅ | ❌ | ❌ | ❌ | Text, Image, Embedding, ReRank | Synthetic, File | Concurrent | OpenAI-compatible, Hosted Cloud | /chat/completions, /embeddings | console, xlsx, png |
127
+ | [llm-perf](https://github.com/ray-project/llmperf) | ❌ | ❌ | ✅ | ❌ | Text | Synthetic | Concurrent | OpenAI-compatible, Hosted Cloud | /chat/completions | json |
128
+ | [ollama-benchmark](https://github.com/aidatatools/ollama-benchmark) | ✅ | ❌ | ❌ | ❌ | Text | Synthetic | Synchronous | Ollama | /completions | console, json |
129
+ | [vllm/benchmarks](https://github.com/vllm-project/vllm/tree/main/benchmarks) | ✅ | ❌ | ❌ | ❌ | Text | Synthetic, Specific Datasets | Synchronous, Throughput, Constant, Sweep | OpenAI-compatible, vLLM API | /completions, /chat/completions | console, png |
130
+
131
+ ## What's New
132
+
133
+ This section summarizes the newest capabilities available to users and outlines the current areas of development. It helps readers understand how the platform is evolving and what to expect next.
134
+
135
+ **Recent Additions**
136
+
137
+ - New refactored architecture enabling high-rate load generation at scale and a more extensible interface for additional backends, data pipelines, load generation schedules, benchmarking constraints, and output formats.
138
+ - Added multimodal benchmarking support for image, video, and audio workloads across chat completions, transcription, and translation APIs.
139
+ - Broader metrics collection, including richer statistics for visual, audio, and text inputs such as image sizes, audio lengths, video frame counts, and word-level data.
140
+
141
+ **Active Development**
142
+
143
+ - Generation of synthetic multimodal datasets for controlled experimentation across images, audio, and video.
144
+ - Extended prefixing options for testing system-prompt and user-prompt variations.
145
+ - Multi-turn conversation capabilities for benchmarking chat agents and dialogue systems.
146
+ - Speculative decoding specific views and outputs.
147
+
148
+ ## Quick Start
149
+
150
+ The Quick Start shows how to install GuideLLM, launch a server, and run your first benchmark in a few minutes.
151
+
152
+ ### Install GuideLLM
153
+
154
+ Before installing, ensure you have the following prerequisites:
155
+
156
+ - OS: Linux or MacOS
157
+ - Python: 3.10 - 3.13
158
+
159
+ Install the latest GuideLLM release from PyPi using `pip` :
160
+
161
+ ```bash
162
+ pip install guidellm[recommended]
163
+ ```
164
+
165
+ Or install from source:
166
+
167
+ ```bash
168
+ pip install git+https://github.com/vllm-project/guidellm.git
169
+ ```
170
+
171
+ Or run the latest container from [ghcr.io/vllm-project/guidellm](https://github.com/vllm-project/guidellm/pkgs/container/guidellm):
172
+
173
+ ```bash
174
+ podman run \
175
+ --rm -it \
176
+ -v "./results:/results:rw" \
177
+ -e GUIDELLM_TARGET=http://localhost:8000 \
178
+ -e GUIDELLM_PROFILE=sweep \
179
+ -e GUIDELLM_MAX_SECONDS=30 \
180
+ -e GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
181
+ ghcr.io/vllm-project/guidellm:latest
182
+ ```
183
+
184
+ ### Launch an Inference Server
185
+
186
+ Start any OpenAI-compatible endpoint. For vLLM:
187
+
188
+ ```bash
189
+ vllm serve "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
190
+ ```
191
+
192
+ Verify the server is running at `http://localhost:8000`.
193
+
194
+ ### Run Your First Benchmark
195
+
196
+ Run a sweep that identifies the maximum performance and maximum rates for the model:
197
+
198
+ ```bash
199
+ guidellm benchmark \
200
+ --target "http://localhost:8000" \
201
+ --profile sweep \
202
+ --max-seconds 30 \
203
+ --data "prompt_tokens=256,output_tokens=128"
204
+ ```
205
+
206
+ You will see progress updates and per-benchmark summaries during the run, as given below:
207
+
208
+ <img src= "https://raw.githubusercontent.com/vllm-project/guidellm/main/docs/assets/sample-benchmarks.gif"/>
209
+
210
+ ### Inspect Outputs
211
+
212
+ After the benchmark completes, GuideLLM saves all results into the output directory you specified (default: the current directory). You'll see a summary printed in the console along with a set of file locations (`.json,` `.csv`, `.html`) that contain the full results of the run.
213
+
214
+ The following section, **Output Files and Reports**, explains what each file contains and how to use them for analysis, visualization, or automation.
215
+
216
+ ## Output Files and Reports
217
+
218
+ After running the Quick Start benchmark, GuideLLM writes several output files to the directory you specified. Each one focuses on a different layer of analysis, ranging from a quick on-screen summary to fully structured data for dashboards and regression pipelines.
219
+
220
+ **Console Output**
221
+
222
+ The console provides a lightweight summary with high-level statistics for each benchmark in the run. It's useful for quick checks to confirm that the server responded correctly, the load sweep completed, and the system behaved as expected. Additionally, the output tables can be copied and pasted into spreadsheet software using `|` as the delimiter. The sections will look similar to the following:
223
+
224
+ <img alt="Sample GuideLLM benchmark output" src="https://raw.githubusercontent.com/vllm-project/guidellm/main/docs/assets/sample-output.png" />
225
+
226
+ **benchmarks.json**
227
+
228
+ This file is the authoritative record of the entire benchmark session. It includes configuration, metadata, per-benchmark statistics, and sample request entries with individual request timings. Use it for debugging, deeper analysis, or loading into Python with `GenerativeBenchmarksReport`.
229
+
230
+ Alternatively, a yaml version of this file can be generated for easier human readability with the same content as `benchmarks.json` using the `--outputs yaml` argument.
231
+
232
+ **benchmarks.csv**
233
+
234
+ This file provides a compact tabular view of each benchmark with the fields most commonly used for reporting—throughput, latency percentiles, token counts, and rate information. It opens cleanly in spreadsheets and BI tools and is well-suited for comparisons across runs.
235
+
236
+ **benchmarks.html**
237
+
238
+ The HTML report provides a visual summary of results, including charts of latency distributions, throughput behavior, and generation patterns. It's ideal for quick exploration or sharing with teammates without requiring them to parse JSON.
239
+
240
+ ## Common Use Cases and Configurations
241
+
242
+ GuideLLM supports a wide range of LLM benchmarking workflows. The examples below show how to run typical scenarios and highlight the parameters that matter most. For a complete list of arguments, details, and options, run `guidellm benchmark run --help`
243
+
244
+ ### Load Patterns
245
+
246
+ Simmulating different applications requires different traffic shapes. This example demonstrates rate-based load testing using a constant profile at 10 requests per second, running for 20 seconds with synthetic data of 128 prompt tokens and 256 output tokens.
247
+
248
+ ```bash
249
+ guidellm benchmark \
250
+ --target http://localhost:8000 \
251
+ --profile constant \
252
+ --rate 10 \
253
+ --max-seconds 20 \
254
+ --data "prompt_tokens=128,output_tokens=256"
255
+ ```
256
+
257
+ **Key parameters:**
258
+
259
+ - `--profile`: Defines the traffic pattern - options include `synchronous` (sequential requests), `concurrent` (parallel users), `throughput` (maximum capacity), `constant` (fixed requests/sec), `poisson` (randomized requests/sec), or `sweep` (automatic rate exploration)
260
+ - `--rate`: The numeric rate value whose meaning depends on profile - for `sweep` it's the number of benchmarks, for `concurrent` it's simultaneous requests, for `constant`/`poisson` it's requests per second
261
+ - `--max-seconds`: Maximum duration in seconds for each benchmark run (can also use `--max-requests` to limit by request count instead)
262
+
263
+ ### Dataset Sources
264
+
265
+ GuideLLM supports HuggingFace datasets, local files, and synthetic data. This example loads the CNN DailyMail dataset from HuggingFace and maps the article column to prompts while using the summary token count column to determine output lengths.
266
+
267
+ ```bash
268
+ guidellm benchmark \
269
+ --target http://localhost:8000 \
270
+ --data "hf:cnn_dailymail" \
271
+ --data-args '{"prompt_column":"article","output_tokens_count_column":"summary_tokens"}'
272
+ ```
273
+
274
+ **Key parameters:**
275
+
276
+ - `--data`: Data source specification - accepts HuggingFace dataset IDs (prefix with `hf:`), local file paths (`.json`, `.csv`, `.jsonl`, `.txt`), or synthetic data configs (JSON object or `key=value` pairs like `prompt_tokens=256,output_tokens=128`)
277
+ - `--data-args`: JSON object of arguments for dataset creation - commonly used to specify column mappings like `prompt_column`, `output_tokens_count_column`, or HuggingFace dataset parameters
278
+ - `--data-samples`: Number of samples to use from the dataset - use `-1` (default) for all samples with dynamic generation, or specify a positive integer to limit sample count
279
+ - `--processor`: Tokenizer or processor name used for generating synthetic data - if not provided and required for the dataset, automatically loads from the model; accepts HuggingFace model IDs or local paths
280
+
281
+ ### Request Types and API Targets
282
+
283
+ You can benchmark chat completions, text completions, or other supported request types. This example configures the benchmark to test chat completions API using a custom dataset file, with GuideLLM automatically formatting requests to match the chat completions schema.
284
+
285
+ ```bash
286
+ guidellm benchmark \
287
+ --target http://localhost:8000 \
288
+ --request-type chat_completions \
289
+ --data path/to/data.json
290
+ ```
291
+
292
+ **Key parameters:**
293
+
294
+ - `--request-type`: Specifies the API endpoint format - options include `chat_completions` (chat API format), `completions` (text completion format), `audio_transcription` (audio transcription), and `audio_translation` (audio translation).
295
+
296
+ ### Using Scenarios
297
+
298
+ Built-in scenarios bundle schedules, dataset settings, and request formatting to standardize common testing patterns. This example uses the pre-configured chat scenario which includes appropriate defaults for chat model evaluation, with any additional CLI arguments overriding the scenario's settings.
299
+
300
+ ```bash
301
+ guidellm benchmark --scenario chat --target http://localhost:8000
302
+ ```
303
+
304
+ **Key parameters:**
305
+
306
+ - `--scenario`: Built-in scenario name or path to a custom scenario configuration file - built-in options include pre-configured testing patterns for common use cases; CLI options passed alongside this will override the scenario's default settings
307
+
308
+ ### Benchmark Controls
309
+
310
+ Warm-up, cooldown, and maximum limits help ensure stable, repeatable measurements. This example runs a concurrent benchmark with 16 parallel requests, using 10% warmup and cooldown periods to exclude initialization and shutdown effects, while limiting the test to stop if more than 5 errors occur.
311
+
312
+ ```bash
313
+ guidellm benchmark \
314
+ --target http://localhost:8000 \
315
+ --profile concurrent \
316
+ --rate 16 \
317
+ --warmup 0.1 \
318
+ --cooldown 0.1 \
319
+ --max-errors 5
320
+ --detect-saturation
321
+ ```
322
+
323
+ **Key parameters:**
324
+
325
+ - `--warmup`: Warm-up specification - values between 0 and 1 represent a percentage of total requests/time, values ≥1 represent absolute request or time units.
326
+ - `--cooldown`: Cool-down specification - same format as warmup, excludes final portion of benchmark from analysis to avoid shutdown effects
327
+ - `--max-seconds`: Maximum duration in seconds for each benchmark before automatic termination
328
+ - `--max-requests`: Maximum number of requests per benchmark before automatic termination
329
+ - `--max-errors`: Maximum number of individual errors before stopping the benchmark entirely
330
+ - `--detect-saturation`: Enable over-saturation detection to automatically stop benchmarks when the model becomes over-saturated (see also `--over-saturation` for more advanced control)
331
+
332
+ ## Development and Contribution
333
+
334
+ Developers interested in extending GuideLLM can use the project's established development workflow. Local setup, environment activation, and testing instructions are outlined in [DEVELOPING.md](https://github.com/vllm-project/guidellm/blob/main/DEVELOPING.md). This guide explains how to run the benchmark suite, validate changes, and work with the CLI or API during development. Contribution standards are documented in [CONTRIBUTING.md](https://github.com/vllm-project/guidellm/blob/main/CONTRIBUTING.md), including coding conventions, commit structure, and review guidelines. These standards help maintain stability as the platform evolves. The [CODE_OF_CONDUCT.md](https://github.com/vllm-project/guidellm/blob/main/CODE_OF_CONDUCT.md) outlines expectations for respectful and constructive participation across all project spaces. For contributors who want deeper reference material, the documentation covers installation, backends, datasets, metrics, output types, and architecture. Reviewing these topics is useful when adding new backends, request types, or data integrations. Release notes and changelogs are linked from the GitHub Releases page and provide historical context for ongoing work.
335
+
336
+ ## Documentation
337
+
338
+ The complete documentation provides the details that do not fit in this README. It includes installation steps, backend configuration, dataset handling, metrics definitions, output formats, tutorials, and an architecture overview. These references help you explore the platform more deeply or integrate it into existing workflows.
339
+
340
+ Notable docs are given below:
341
+
342
+ - [**Installation Guide**](https://github.com/vllm-project/guidellm/blob/main/docs/getting-started/install.md) - This guide provides step-by-step instructions for installing GuideLLM, including prerequisites and setup tips.
343
+ - [**Backends Guide**](https://github.com/vllm-project/guidellm/blob/main/docs/guides/backends.md) - A comprehensive overview of supported backends and how to set them up for use with GuideLLM.
344
+ - [**Data/Datasets Guide**](https://github.com/vllm-project/guidellm/blob/main/docs/guides/datasets.md) - Information on supported datasets, including how to use them for benchmarking.
345
+ - [**Metrics Guide**](https://github.com/vllm-project/guidellm/blob/main/docs/guides/metrics.md) - Detailed explanations of the metrics used in GuideLLM, including definitions and how to interpret them.
346
+ - [**Outputs Guide**](https://github.com/vllm-project/guidellm/blob/main/docs/guides/outputs.md) - Information on the different output formats supported by GuideLLM and how to use them.
347
+ - [**Architecture Overview**](https://github.com/vllm-project/guidellm/blob/main/docs/guides/architecture.md) - A detailed look at GuideLLM's design, components, and how they interact.
348
+
349
+ ## License
350
+
351
+ GuideLLM is licensed under the [Apache License 2.0](https://github.com/vllm-project/guidellm/blob/main/LICENSE).
352
+
353
+ ## Cite
354
+
355
+ If you find GuideLLM helpful in your research or projects, please consider citing it:
356
+
357
+ ```bibtex
358
+ @misc{guidellm2024,
359
+ title={GuideLLM: Scalable Inference and Optimization for Large Language Models},
360
+ author={Neural Magic, Inc.},
361
+ year={2024},
362
+ howpublished={\url{https://github.com/vllm-project/guidellm}},
363
+ }
364
+ ```
@@ -0,0 +1,109 @@
1
+ guidellm/__init__.py,sha256=1zl-PT9IZJvDfdLSMviPLzhVE3_ZXpizmc9s7UWa6kQ,1206
2
+ guidellm/__main__.py,sha256=CjqAH9qjN12oWO_1kAxl42IjpwcQa6S4fzoa3cejYzI,23262
3
+ guidellm/logger.py,sha256=6qGOeff8hOJF6p57Zietq6qr64N7E40CJSQSQcUFgKc,2912
4
+ guidellm/settings.py,sha256=leA-Z97R9XtKvpXzmNjrcSI2k9BMXfBwYP28DU8W3vw,7283
5
+ guidellm/version.py,sha256=sE6omcK4PAXqWb1yHw5AcKrzBnc7FiB1NrsGLlNv2So,127
6
+ guidellm/backends/__init__.py,sha256=Ou-SHPHLoHYfRFkaf_LocNxUUHI_DzmXXRGITyCasac,1101
7
+ guidellm/backends/backend.py,sha256=hVN0cIyQPvIEs-CER5tJWuN0wRdRKPp4g9lf3OlhGo4,3328
8
+ guidellm/backends/openai.py,sha256=nMOnTLBtglN-OpWdaX7WJSVriRCYj1LlFx6e73OJFKM,12783
9
+ guidellm/backends/response_handlers.py,sha256=DY_TbcMaIPJtkwt_dFUGgMnVi7FhImg5HMK_qfaA13g,16373
10
+ guidellm/benchmark/__init__.py,sha256=RBSV9SR1ZSJuNd2uzNAOEloyN41QStm9Vj0-lPhWncs,2863
11
+ guidellm/benchmark/benchmarker.py,sha256=-Gxn_m2n15uLKbFZyjwdAjCrJ5UVPH8a4iS6frAVWo4,6920
12
+ guidellm/benchmark/entrypoints.py,sha256=C9XYhDuk2TinDKeKHx0N-d-bxFBK3oEc6em5jdrsDGE,21911
13
+ guidellm/benchmark/profiles.py,sha256=8HDcgTcaJRLgBWaNv4-gcp__H7t6oxAVfqPwE0eJpqo,24462
14
+ guidellm/benchmark/progress.py,sha256=w_0wasSC8XkwQdrp8n-PiGIo5kZ3wedPyEh-_v1cBLk,25870
15
+ guidellm/benchmark/outputs/__init__.py,sha256=BgFAjCjYeXto7W6pmcUEhJUqhfDQoooI4ikUDymGg1c,845
16
+ guidellm/benchmark/outputs/console.py,sha256=23YqSR66KGJpolhdVym3JLuoC3h3q4pY8HYjhA_D308,22924
17
+ guidellm/benchmark/outputs/csv.py,sha256=Hrz0h4qPk0tNPNirmymr5e9N3dFOVd845jwk47E67L4,25616
18
+ guidellm/benchmark/outputs/html.py,sha256=aM0ku5oe2C3_q_Nz8xKPGm6DyxUbBk-_7bGHkPQg5BY,17245
19
+ guidellm/benchmark/outputs/output.py,sha256=NvFUqYRAJfdF5KfinH0jEQG4JICXt58w9vBp8fhV2NU,6850
20
+ guidellm/benchmark/outputs/serialized.py,sha256=sWz-kj0s2NV02blFQkga-FKrQkHRBMOYRakNcVtj47E,2553
21
+ guidellm/benchmark/scenarios/__init__.py,sha256=SmaYf8hfByJU4LVJ7pZKNxJPYBObl7UKpoaJEmLPdTI,1276
22
+ guidellm/benchmark/scenarios/chat.json,sha256=kTkbveBms8APH9bDogeFLYvm3a0-EQqnJpNNQvspLQM,227
23
+ guidellm/benchmark/scenarios/rag.json,sha256=J1-_gKbAZqNTAIpP3X4_gTDQNndwAypXfO1bXXzkjRQ,232
24
+ guidellm/benchmark/schemas/__init__.py,sha256=hv4xG3I8vdJABYdN-2hIDI4g_m2nhbC-O1Uq2BqijEY,2074
25
+ guidellm/benchmark/schemas/base.py,sha256=6geDHQIdjrHVA1hx_d6ixKMHhK6YpxTvWtTh9AeXxDo,15502
26
+ guidellm/benchmark/schemas/generative/__init__.py,sha256=bGIDG16ims22B7YW0_ELAjpkuH1aN8IqS5qwx9hWUV8,1959
27
+ guidellm/benchmark/schemas/generative/accumulator.py,sha256=P4RGjbeY57qUHPDfWp7XEK6erBKVArsJwtJf6dyGahk,33626
28
+ guidellm/benchmark/schemas/generative/benchmark.py,sha256=PuSHi0yJ3yYrZ1W71ilPv5bqpuGhB3jte5Jz4driQWM,5695
29
+ guidellm/benchmark/schemas/generative/entrypoints.py,sha256=jf-CtN6DELxckhVpMzVCyXxpXbYvaiL6lYB65CUayI0,14454
30
+ guidellm/benchmark/schemas/generative/metrics.py,sha256=FAXJ7sXD3k-ocQQXRDQc1kAAV38RfIWDluuyBr_Zz5E,36625
31
+ guidellm/benchmark/schemas/generative/report.py,sha256=f4G02IjR2IkJJzqJqnyNA69vX6aBL5r6YVScbT0J3NI,5924
32
+ guidellm/data/__init__.py,sha256=NY1zQgj6KD0rm73zi8PuLZe2COQ43RBH9KsZCih9EP0,922
33
+ guidellm/data/builders.py,sha256=fmSL3_94YNj88vb0sT4vgxoftQAMFBMY7gXdy5parPw,18349
34
+ guidellm/data/collators.py,sha256=j4OLGqwbt4sNnTqE8iSbe11qmgJBHnyWjipNeEz1SCk,445
35
+ guidellm/data/config.py,sha256=lihpmHwhWYWjfMqPYJ63f9HL5zOsUIGvPkMn3IfAdZE,3581
36
+ guidellm/data/entrypoints.py,sha256=W3SKTBEHkYJkLtzXYcdolz8xk53PA6p08SoYkDlJymM,2331
37
+ guidellm/data/loaders.py,sha256=z1vvfZJwZZTDF7KnxbcIwo555Xcf4Sa_zl5WHW8OYQs,6807
38
+ guidellm/data/processor.py,sha256=3PBGGx7JxhJr2o3hWTVNqrDPj3sri_YsPklhODGdBc8,873
39
+ guidellm/data/schemas.py,sha256=9I0NIRc6DGR2X9mDGjAXLQk4kQdpPbxd0B5pGvG1MZw,5450
40
+ guidellm/data/deserializers/__init__.py,sha256=7Nqte900b6rie3rEh71eEdVDt6l8a_lCxdHA_Y5Q5G0,1462
41
+ guidellm/data/deserializers/deserializer.py,sha256=enTVvOjhrLMZeY11goCfZkKnPyIFe1vZ4Xl9ZTHX1Ps,4915
42
+ guidellm/data/deserializers/file.py,sha256=D-IavmDXstdWI9tDGj7TobWW0cryzyZDJGUx5vUVYgM,7452
43
+ guidellm/data/deserializers/huggingface.py,sha256=uk2WBH9WJ8csaRrw6wXQ5Xiz_pj_fN5c4jDV9pWf1F0,2954
44
+ guidellm/data/deserializers/memory.py,sha256=F6o2JwIUgcZHdeRkT051AS76i6wWlIw-XGH09_pOqDs,6670
45
+ guidellm/data/deserializers/synthetic.py,sha256=6qOuQj6DpTu49Y0VMoXp5-1oCWng7u_UC5O7FrjoOuo,8372
46
+ guidellm/data/preprocessors/__init__.py,sha256=IPYBdHKpVIkVYyvvD7rmKYxxdbySO_CudAa-FDViboQ,803
47
+ guidellm/data/preprocessors/formatters.py,sha256=JIkrXOT4JYSOOp6YwMDaW1bSlgdEZwZzyUjDf6e4mKc,14357
48
+ guidellm/data/preprocessors/mappers.py,sha256=LL0zSz_n1qXeuvqQoJXgpYrMMDCg9jhQPVi9EFqtTXQ,6701
49
+ guidellm/data/preprocessors/preprocessor.py,sha256=60RGkvJtkfUSeRnntFGO3msD-f1ehEl9SJOJTY8UFOM,801
50
+ guidellm/data/utils/__init__.py,sha256=A8cJqUC7UVSr3upkkD6PzONGQUkjSV9HhMp31NJbI7E,125
51
+ guidellm/data/utils/dataset.py,sha256=ZaKlgGM_L6gcHghAfo5vG0NaHzPtpDWHddD1KX7E7_c,2323
52
+ guidellm/extras/__init__.py,sha256=bNtt6CNDhwMM5XlL1q74j_df-1xoXavTShB05LjDYMw,96
53
+ guidellm/extras/audio.py,sha256=bO5MowS0-W9Ewm1sn6sIfXtrS5ElNYTi8eQnmvVMET4,6629
54
+ guidellm/extras/vision.py,sha256=hU8e7ryUnMZOT6_utR9GKhayvCXYPljeSwCx8S4-nIQ,7691
55
+ guidellm/mock_server/__init__.py,sha256=oRvGpE8a2U9CUdGnza4GDbShT96NfjOW-cAoh0xDR84,183
56
+ guidellm/mock_server/config.py,sha256=t67sJjFV1aO0YMVrRJEm5ysFe1SzYCm-XVSlgmALPdA,2988
57
+ guidellm/mock_server/models.py,sha256=7CPhbQNSLhRJ4XlAXRIHn0_9yQ8IrGWgoImK-8oUHnk,18946
58
+ guidellm/mock_server/server.py,sha256=M1jscwHSqhA-0alljo0luH1TgDsql3elVV0M7H50JFY,9167
59
+ guidellm/mock_server/utils.py,sha256=NYhLj2dJ4EfC2UQIhgzhr_LInAUi_lYqth7A5QK2Djw,10336
60
+ guidellm/mock_server/handlers/__init__.py,sha256=GX2KD41Uc3H-b338mI4Cf1tK-TZcpTKqKdZH9CPgy8Y,698
61
+ guidellm/mock_server/handlers/chat_completions.py,sha256=xrC0OnPDa5V393u2QmUCIWXVgzqc72YiiyxSMwyZv18,10386
62
+ guidellm/mock_server/handlers/completions.py,sha256=BGTI9tJ1PrldT-Nzz_e7KjEtPFpc0NmLvr3nF-tEYKk,10024
63
+ guidellm/mock_server/handlers/tokenizer.py,sha256=OJAILmsk1tvYfHmdP6iuTf8Fg2gDm2_JyPZH-U-pxFE,5243
64
+ guidellm/scheduler/__init__.py,sha256=nVAvOy_cib8PyRNyAHqoSLPysJL-uzm90VuqwPZGj_k,2633
65
+ guidellm/scheduler/environments.py,sha256=eRQ9eLvQ61Yyo3nN_leYHTerla9mPuhr8tJD6_30XtM,8889
66
+ guidellm/scheduler/scheduler.py,sha256=kgENzLt0CudKFIP51KAPAuQyAXqz1pMNtuyvaVYgLwk,6841
67
+ guidellm/scheduler/schemas.py,sha256=OREp0uZVVMTBBgVHkQjZyjYbGOFwMnFJOasaIL359CA,12706
68
+ guidellm/scheduler/strategies.py,sha256=tYRVdOVjntTIAZD1m035ritUsUG-GhZvmeIH7qH98aI,22318
69
+ guidellm/scheduler/worker.py,sha256=wGixmyKqggvZ__tn1kPhWALzRak0xQ8exw-ggYyXa0Y,18041
70
+ guidellm/scheduler/worker_group.py,sha256=e7RSKgOVeRaq2tqFDtnIo4zbBVfPyOxsHoFFeFtnow0,28890
71
+ guidellm/scheduler/constraints/__init__.py,sha256=wCa9FipbgpXFw0FlmqUPGfH1MfqrcS28QNjZHnGryCs,1518
72
+ guidellm/scheduler/constraints/constraint.py,sha256=Lt_9Wz_rmI-nVIRToDS34gVxAWxhVLrujebNAKqnVl0,12493
73
+ guidellm/scheduler/constraints/error.py,sha256=A0onKXMI_99B-Sw79RjGhEPSVvpf9UQMCqB6FC82ij8,15608
74
+ guidellm/scheduler/constraints/factory.py,sha256=zC8g8uPZJyqciDDtTcGmT6RI3kIX9vx1raE-pyiPKv0,7035
75
+ guidellm/scheduler/constraints/request.py,sha256=O1RfaL87YKVi4VRKFJ-XYGWsB_SACIzPkQZnmmm24dQ,11876
76
+ guidellm/scheduler/constraints/saturation.py,sha256=1qDfPZHvDagPf7MSqSqH__b778N75MxgneNLTTYVaE4,28546
77
+ guidellm/schemas/__init__.py,sha256=k0yL0U-Tz0FkdU6a9VI_HjsPZYNjpRaxr51sdQwqnn8,1587
78
+ guidellm/schemas/base.py,sha256=qve9r_ExrPcsQgJQ5_JZaOfZnm4PUOB1qsPzQIJLmJ4,14850
79
+ guidellm/schemas/info.py,sha256=L1NX7woxznVLuJT4X6mpjZcLj04fD_CJY3GPycxZDL8,6421
80
+ guidellm/schemas/request.py,sha256=x120DkiT22lWRcQpSipjQHteeQ6KmO0BmUD34vQ3PNA,8196
81
+ guidellm/schemas/request_stats.py,sha256=6ZX5F20XfES4wfRj90ki9FugF7gErU62D_MZsnYT1p8,11975
82
+ guidellm/schemas/response.py,sha256=uwU0PLpeSgDSpMMQXMAyVTSEurNxUblNE3TSZyYRKBk,4781
83
+ guidellm/schemas/statistics.py,sha256=o_TVBYNtZxvDdpV0SIM2bfTQyj3lt_QG-aEHn7r7UKY,40328
84
+ guidellm/utils/__init__.py,sha256=F1w4qJQHERIUVGDO5Ij7x8xlofEkxM62oTet2UEQkI4,2501
85
+ guidellm/utils/auto_importer.py,sha256=rkraMx815TasixoFn0bwtp--7V7TxuEvfZUVFB8V5L0,3658
86
+ guidellm/utils/cli.py,sha256=vIK3u-h0poxz9tovbXCl3ndVRuOrn-CSHzZZYA6nlzM,6021
87
+ guidellm/utils/colors.py,sha256=D0IGz8A346-Pt5qgnP3S5uV-VgngJoXbfToVCOna41k,175
88
+ guidellm/utils/console.py,sha256=1zTBkAMU0qRVKOvNth7St7a-Ho4txfYSpzstZ0C_hMs,19227
89
+ guidellm/utils/default_group.py,sha256=iZ47bwRcUCxkX04Zdg0qpmqKtFg4P7lt5_hpw1CnKkA,4167
90
+ guidellm/utils/dict.py,sha256=oogh34_NznFEn1L6NKY2RDVBm7TUK9LOZfMc-rquNw8,673
91
+ guidellm/utils/encoding.py,sha256=hf__oDU-iQYKg_FDqYptyV4BxVC6RseIn3OBgaZXBzE,27821
92
+ guidellm/utils/functions.py,sha256=bp2dEDcv4NFlGClAm-i_DlgPYDhezGT6ewslf_nAdrA,5028
93
+ guidellm/utils/hf_datasets.py,sha256=M4uESvC08SzOekQUzUFRcnS49qBYJCAubElqURucU8w,992
94
+ guidellm/utils/hf_transformers.py,sha256=EXOctGUB-ZkwS1yrhIpSOK9IGJTaxXXZO0kUR1qXhp4,992
95
+ guidellm/utils/imports.py,sha256=Ch7TCnsvvLW-2ExAERxj2DUG1Dthl4KefYj1bYSAlTs,179
96
+ guidellm/utils/messaging.py,sha256=OLGC6aunhulC0-aKj6aI5VKlRoXQETulcO-XGqjlQg8,45566
97
+ guidellm/utils/mixins.py,sha256=i48rD2FVm-2qXKq7ENCTnvYZgr0IXOrVSJpkh-8-a_Q,4202
98
+ guidellm/utils/random.py,sha256=rDy1lpJ9vYMM59DYgKeHT8IG_I7fnjoHjNfD8QIF03k,1273
99
+ guidellm/utils/registry.py,sha256=1yS3_4s4Zkvq0L2bnouTIwxhUT2nZwJ021LcVuRooaE,7644
100
+ guidellm/utils/singleton.py,sha256=yjpUPAtRRvOh63Ubg9ivjQjLdwsExOQM6U1nSQvZuTc,5026
101
+ guidellm/utils/synchronous.py,sha256=rRkWwbDf1ty607KUhDKsqV4HcdKU5o0-1s5hwdG-Hak,5209
102
+ guidellm/utils/text.py,sha256=0K8yUEB4gzztevxzuiMXossSoHhvzcHoKqRhQYQdOrg,11644
103
+ guidellm/utils/typing.py,sha256=jt0o7SRbDhnvrifR3l4hN8oL3uJNxl8aMnvaoABb-MU,1235
104
+ guidellm-0.6.0a5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
105
+ guidellm-0.6.0a5.dist-info/METADATA,sha256=OHjbypVjnwhdwyrsM9ZKkym6POsuKVFGaHmwMBA0qCs,24139
106
+ guidellm-0.6.0a5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
107
+ guidellm-0.6.0a5.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
108
+ guidellm-0.6.0a5.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
109
+ guidellm-0.6.0a5.dist-info/RECORD,,
@@ -1,23 +0,0 @@
1
- from .backend import (
2
- Backend,
3
- BackendType,
4
- )
5
- from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
6
- from .response import (
7
- RequestArgs,
8
- ResponseSummary,
9
- StreamingResponseType,
10
- StreamingTextResponse,
11
- )
12
-
13
- __all__ = [
14
- "CHAT_COMPLETIONS_PATH",
15
- "TEXT_COMPLETIONS_PATH",
16
- "Backend",
17
- "BackendType",
18
- "OpenAIHTTPBackend",
19
- "RequestArgs",
20
- "ResponseSummary",
21
- "StreamingResponseType",
22
- "StreamingTextResponse",
23
- ]