guidellm 0.3.0a26__tar.gz → 0.3.0a29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (63) hide show
  1. {guidellm-0.3.0a26/src/guidellm.egg-info → guidellm-0.3.0a29}/PKG-INFO +7 -7
  2. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/README.md +6 -6
  3. guidellm-0.3.0a29/src/guidellm/version.py +6 -0
  4. {guidellm-0.3.0a26 → guidellm-0.3.0a29/src/guidellm.egg-info}/PKG-INFO +7 -7
  5. guidellm-0.3.0a26/src/guidellm/version.py +0 -6
  6. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/LICENSE +0 -0
  7. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/MANIFEST.in +0 -0
  8. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/pyproject.toml +0 -0
  9. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/setup.cfg +0 -0
  10. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/setup.py +0 -0
  11. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/__init__.py +0 -0
  12. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/__main__.py +0 -0
  13. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/backend/__init__.py +0 -0
  14. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/backend/backend.py +0 -0
  15. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/backend/openai.py +0 -0
  16. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/backend/response.py +0 -0
  17. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/__init__.py +0 -0
  18. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/aggregator.py +0 -0
  19. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/benchmark.py +0 -0
  20. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/benchmarker.py +0 -0
  21. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/entrypoints.py +0 -0
  22. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/output.py +0 -0
  23. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/profile.py +0 -0
  24. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/progress.py +0 -0
  25. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/scenario.py +0 -0
  26. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/benchmark/scenarios/__init__.py +0 -0
  27. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/config.py +0 -0
  28. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/data/__init__.py +0 -0
  29. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/data/prideandprejudice.txt.gz +0 -0
  30. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/__init__.py +0 -0
  31. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/creator.py +0 -0
  32. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/entrypoints.py +0 -0
  33. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/file.py +0 -0
  34. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/hf_datasets.py +0 -0
  35. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/in_memory.py +0 -0
  36. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/dataset/synthetic.py +0 -0
  37. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/logger.py +0 -0
  38. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/objects/__init__.py +0 -0
  39. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/objects/pydantic.py +0 -0
  40. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/objects/statistics.py +0 -0
  41. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/preprocess/__init__.py +0 -0
  42. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/preprocess/dataset.py +0 -0
  43. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/request/__init__.py +0 -0
  44. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/request/loader.py +0 -0
  45. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/request/request.py +0 -0
  46. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/scheduler/__init__.py +0 -0
  47. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/scheduler/result.py +0 -0
  48. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/scheduler/scheduler.py +0 -0
  49. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/scheduler/strategy.py +0 -0
  50. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/scheduler/types.py +0 -0
  51. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/scheduler/worker.py +0 -0
  52. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/__init__.py +0 -0
  53. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/cli.py +0 -0
  54. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/colors.py +0 -0
  55. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/hf_datasets.py +0 -0
  56. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/hf_transformers.py +0 -0
  57. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/random.py +0 -0
  58. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm/utils/text.py +0 -0
  59. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm.egg-info/SOURCES.txt +0 -0
  60. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm.egg-info/dependency_links.txt +0 -0
  61. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm.egg-info/entry_points.txt +0 -0
  62. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm.egg-info/requires.txt +0 -0
  63. {guidellm-0.3.0a26 → guidellm-0.3.0a29}/src/guidellm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.3.0a26
3
+ Version: 0.3.0a29
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License-Expression: Apache-2.0
@@ -119,7 +119,7 @@ vllm serve "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
119
119
 
120
120
  For more information on starting a vLLM server, see the [vLLM Documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html).
121
121
 
122
- For information on starting other supported inference servers or platforms, see the [Supported Backends documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/backends.md).
122
+ For information on starting other supported inference servers or platforms, see the [Supported Backends Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/backends.md).
123
123
 
124
124
  #### 2. Run a GuideLLM Benchmark
125
125
 
@@ -147,13 +147,13 @@ After the evaluation is completed, GuideLLM will summarize the results into thre
147
147
 
148
148
  The sections will look similar to the following: <img alt="Sample GuideLLM benchmark output" src="https://raw.githubusercontent.com/neuralmagic/guidellm/main/docs/assets/sample-output.png" />
149
149
 
150
- For more details about the metrics and definitions, please refer to the [Metrics documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/metrics.md).
150
+ For more details about the metrics and definitions, please refer to the [Metrics Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/metrics.md).
151
151
 
152
152
  #### 4. Explore the Results File
153
153
 
154
154
  By default, the full results, including complete statistics and request data, are saved to a file `benchmarks.json` in the current working directory. This file can be used for further analysis or reporting, and additionally can be reloaded into Python for further analysis using the `guidellm.benchmark.GenerativeBenchmarksReport` class. You can specify a different file name and extension with the `--output` argument.
155
155
 
156
- For more details about the supported output file types, please take a look at the [Outputs documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/outputs.md).
156
+ For more details about the supported output file types, please take a look at the [Outputs Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/outputs.md).
157
157
 
158
158
  #### 5. Use the Results
159
159
 
@@ -161,7 +161,7 @@ The results from GuideLLM are used to optimize your LLM deployment for performan
161
161
 
162
162
  For example, when deploying a chat application, we likely want to ensure that our time to first token (TTFT) and inter-token latency (ITL) are under certain thresholds to meet our service level objectives (SLOs) or service level agreements (SLAs). For example, setting TTFT to 200ms and ITL 25ms for the sample data provided in the example above, we can see that even though the server is capable of handling up to 13 requests per second, we would only be able to meet our SLOs for 99% of users at a request rate of 3.5 requests per second. If we relax our constraints on ITL to 50 ms, then we can meet the TTFT SLA for 99% of users at a request rate of approximately 10 requests per second.
163
163
 
164
- For further details on determining the optimal request rate and SLOs, refer to the [SLOs documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/service_level_objectives.md).
164
+ For further details on determining the optimal request rate and SLOs, refer to the [SLOs Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/service_level_objectives.md).
165
165
 
166
166
  ### Configurations
167
167
 
@@ -254,7 +254,7 @@ In the future this will be replaced by a configurable untracked file for dev use
254
254
 
255
255
  ### Documentation
256
256
 
257
- Our comprehensive documentation offers detailed guides and resources to help you maximize the benefits of GuideLLM. Whether just getting started or looking to dive deeper into advanced topics, you can find what you need in our [documentation](https://github.com/neuralmagic/guidellm/blob/main/docs).
257
+ Our comprehensive documentation offers detailed guides and resources to help you maximize the benefits of GuideLLM. Whether just getting started or looking to dive deeper into advanced topics, you can find what you need in our [Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs).
258
258
 
259
259
  ### Core Docs
260
260
 
@@ -279,7 +279,7 @@ We appreciate contributions to the code, examples, integrations, documentation,
279
279
 
280
280
  ### Releases
281
281
 
282
- Visit our [GitHub Releases page](https://github.com/neuralmagic/guidellm/releases) and review the release notes to stay updated with the latest releases.
282
+ Visit our [GitHub Releases Page](https://github.com/neuralmagic/guidellm/releases) and review the release notes to stay updated with the latest releases.
283
283
 
284
284
  ### License
285
285
 
@@ -64,7 +64,7 @@ vllm serve "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
64
64
 
65
65
  For more information on starting a vLLM server, see the [vLLM Documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html).
66
66
 
67
- For information on starting other supported inference servers or platforms, see the [Supported Backends documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/backends.md).
67
+ For information on starting other supported inference servers or platforms, see the [Supported Backends Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/backends.md).
68
68
 
69
69
  #### 2. Run a GuideLLM Benchmark
70
70
 
@@ -92,13 +92,13 @@ After the evaluation is completed, GuideLLM will summarize the results into thre
92
92
 
93
93
  The sections will look similar to the following: <img alt="Sample GuideLLM benchmark output" src="https://raw.githubusercontent.com/neuralmagic/guidellm/main/docs/assets/sample-output.png" />
94
94
 
95
- For more details about the metrics and definitions, please refer to the [Metrics documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/metrics.md).
95
+ For more details about the metrics and definitions, please refer to the [Metrics Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/metrics.md).
96
96
 
97
97
  #### 4. Explore the Results File
98
98
 
99
99
  By default, the full results, including complete statistics and request data, are saved to a file `benchmarks.json` in the current working directory. This file can be used for further analysis or reporting, and additionally can be reloaded into Python for further analysis using the `guidellm.benchmark.GenerativeBenchmarksReport` class. You can specify a different file name and extension with the `--output` argument.
100
100
 
101
- For more details about the supported output file types, please take a look at the [Outputs documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/outputs.md).
101
+ For more details about the supported output file types, please take a look at the [Outputs Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/outputs.md).
102
102
 
103
103
  #### 5. Use the Results
104
104
 
@@ -106,7 +106,7 @@ The results from GuideLLM are used to optimize your LLM deployment for performan
106
106
 
107
107
  For example, when deploying a chat application, we likely want to ensure that our time to first token (TTFT) and inter-token latency (ITL) are under certain thresholds to meet our service level objectives (SLOs) or service level agreements (SLAs). For example, setting TTFT to 200ms and ITL 25ms for the sample data provided in the example above, we can see that even though the server is capable of handling up to 13 requests per second, we would only be able to meet our SLOs for 99% of users at a request rate of 3.5 requests per second. If we relax our constraints on ITL to 50 ms, then we can meet the TTFT SLA for 99% of users at a request rate of approximately 10 requests per second.
108
108
 
109
- For further details on determining the optimal request rate and SLOs, refer to the [SLOs documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/service_level_objectives.md).
109
+ For further details on determining the optimal request rate and SLOs, refer to the [SLOs Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/service_level_objectives.md).
110
110
 
111
111
  ### Configurations
112
112
 
@@ -199,7 +199,7 @@ In the future this will be replaced by a configurable untracked file for dev use
199
199
 
200
200
  ### Documentation
201
201
 
202
- Our comprehensive documentation offers detailed guides and resources to help you maximize the benefits of GuideLLM. Whether just getting started or looking to dive deeper into advanced topics, you can find what you need in our [documentation](https://github.com/neuralmagic/guidellm/blob/main/docs).
202
+ Our comprehensive documentation offers detailed guides and resources to help you maximize the benefits of GuideLLM. Whether just getting started or looking to dive deeper into advanced topics, you can find what you need in our [Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs).
203
203
 
204
204
  ### Core Docs
205
205
 
@@ -224,7 +224,7 @@ We appreciate contributions to the code, examples, integrations, documentation,
224
224
 
225
225
  ### Releases
226
226
 
227
- Visit our [GitHub Releases page](https://github.com/neuralmagic/guidellm/releases) and review the release notes to stay updated with the latest releases.
227
+ Visit our [GitHub Releases Page](https://github.com/neuralmagic/guidellm/releases) and review the release notes to stay updated with the latest releases.
228
228
 
229
229
  ### License
230
230
 
@@ -0,0 +1,6 @@
1
+ version = "0.3.0a29"
2
+ build_type = "nightly"
3
+ build_iteration = "29"
4
+ git_commit = "d74ff213b56e37e5e7d552becdeb2ea6955a0f58"
5
+ git_branch = "main"
6
+ git_last_tag = "v0.2.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: guidellm
3
- Version: 0.3.0a26
3
+ Version: 0.3.0a29
4
4
  Summary: Guidance platform for deploying and managing large language models.
5
5
  Author: Red Hat
6
6
  License-Expression: Apache-2.0
@@ -119,7 +119,7 @@ vllm serve "neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
119
119
 
120
120
  For more information on starting a vLLM server, see the [vLLM Documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html).
121
121
 
122
- For information on starting other supported inference servers or platforms, see the [Supported Backends documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/backends.md).
122
+ For information on starting other supported inference servers or platforms, see the [Supported Backends Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/backends.md).
123
123
 
124
124
  #### 2. Run a GuideLLM Benchmark
125
125
 
@@ -147,13 +147,13 @@ After the evaluation is completed, GuideLLM will summarize the results into thre
147
147
 
148
148
  The sections will look similar to the following: <img alt="Sample GuideLLM benchmark output" src="https://raw.githubusercontent.com/neuralmagic/guidellm/main/docs/assets/sample-output.png" />
149
149
 
150
- For more details about the metrics and definitions, please refer to the [Metrics documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/metrics.md).
150
+ For more details about the metrics and definitions, please refer to the [Metrics Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/metrics.md).
151
151
 
152
152
  #### 4. Explore the Results File
153
153
 
154
154
  By default, the full results, including complete statistics and request data, are saved to a file `benchmarks.json` in the current working directory. This file can be used for further analysis or reporting, and additionally can be reloaded into Python for further analysis using the `guidellm.benchmark.GenerativeBenchmarksReport` class. You can specify a different file name and extension with the `--output` argument.
155
155
 
156
- For more details about the supported output file types, please take a look at the [Outputs documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/outputs.md).
156
+ For more details about the supported output file types, please take a look at the [Outputs Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/outputs.md).
157
157
 
158
158
  #### 5. Use the Results
159
159
 
@@ -161,7 +161,7 @@ The results from GuideLLM are used to optimize your LLM deployment for performan
161
161
 
162
162
  For example, when deploying a chat application, we likely want to ensure that our time to first token (TTFT) and inter-token latency (ITL) are under certain thresholds to meet our service level objectives (SLOs) or service level agreements (SLAs). For example, setting TTFT to 200ms and ITL 25ms for the sample data provided in the example above, we can see that even though the server is capable of handling up to 13 requests per second, we would only be able to meet our SLOs for 99% of users at a request rate of 3.5 requests per second. If we relax our constraints on ITL to 50 ms, then we can meet the TTFT SLA for 99% of users at a request rate of approximately 10 requests per second.
163
163
 
164
- For further details on determining the optimal request rate and SLOs, refer to the [SLOs documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/service_level_objectives.md).
164
+ For further details on determining the optimal request rate and SLOs, refer to the [SLOs Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs/service_level_objectives.md).
165
165
 
166
166
  ### Configurations
167
167
 
@@ -254,7 +254,7 @@ In the future this will be replaced by a configurable untracked file for dev use
254
254
 
255
255
  ### Documentation
256
256
 
257
- Our comprehensive documentation offers detailed guides and resources to help you maximize the benefits of GuideLLM. Whether just getting started or looking to dive deeper into advanced topics, you can find what you need in our [documentation](https://github.com/neuralmagic/guidellm/blob/main/docs).
257
+ Our comprehensive documentation offers detailed guides and resources to help you maximize the benefits of GuideLLM. Whether just getting started or looking to dive deeper into advanced topics, you can find what you need in our [Documentation](https://github.com/neuralmagic/guidellm/blob/main/docs).
258
258
 
259
259
  ### Core Docs
260
260
 
@@ -279,7 +279,7 @@ We appreciate contributions to the code, examples, integrations, documentation,
279
279
 
280
280
  ### Releases
281
281
 
282
- Visit our [GitHub Releases page](https://github.com/neuralmagic/guidellm/releases) and review the release notes to stay updated with the latest releases.
282
+ Visit our [GitHub Releases Page](https://github.com/neuralmagic/guidellm/releases) and review the release notes to stay updated with the latest releases.
283
283
 
284
284
  ### License
285
285
 
@@ -1,6 +0,0 @@
1
- version = "0.3.0a26"
2
- build_type = "nightly"
3
- build_iteration = "26"
4
- git_commit = "0e78c65948eab356f2f846a0d5ae609ab650c290"
5
- git_branch = "main"
6
- git_last_tag = "v0.2.1"
File without changes
File without changes
File without changes
File without changes
File without changes