guidellm 0.1.0__py3-none-any.whl → 0.2.0rc20250418__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0rc20250418.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0rc20250418.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0rc20250418.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,132 @@
1
+ from typing import Any, Literal, Optional
2
+
3
+ from pydantic import computed_field
4
+
5
+ from guidellm.config import settings
6
+ from guidellm.objects.pydantic import StandardBaseModel
7
+
8
+ __all__ = [
9
+ "StreamingResponseType",
10
+ "StreamingTextResponse",
11
+ "RequestArgs",
12
+ "ResponseSummary",
13
+ ]
14
+
15
+
16
+ StreamingResponseType = Literal["start", "iter"]
17
+
18
+
19
+ class StreamingTextResponse(StandardBaseModel):
20
+ """
21
+ A model representing the response content for a streaming text request.
22
+
23
+ :param type_: The type of the response; either 'start' or 'iter'.
24
+ :param value: The value of the response up to this iteration.
25
+ :param start_time: The time.time() the request started.
26
+ :param iter_count: The iteration count for the response. For 'start' this is 0
27
+ and for the first 'iter' it is 1.
28
+ :param delta: The text delta added to the response for this stream iteration.
29
+ :param time: If 'start', the time.time() the request started.
30
+ If 'iter', the time.time() the iteration was received.
31
+ :param request_id: The unique identifier for the request, if any.
32
+ """
33
+
34
+ type_: StreamingResponseType
35
+ value: str
36
+ start_time: float
37
+ first_iter_time: Optional[float]
38
+ iter_count: int
39
+ delta: str
40
+ time: float
41
+ request_id: Optional[str] = None
42
+
43
+
44
+ class RequestArgs(StandardBaseModel):
45
+ """
46
+ A model representing the arguments for a request to a backend.
47
+ Biases towards an HTTP request, but can be used for other types of backends.
48
+
49
+ :param target: The target URL or function for the request.
50
+ :param headers: The headers, if any, included in the request such as authorization.
51
+ :param payload: The payload / arguments for the request including the prompt /
52
+ content and other configurations.
53
+ :param timeout: The timeout for the request in seconds, if any.
54
+ :param http2: Whether HTTP/2 was used for the request, if applicable.
55
+ """
56
+
57
+ target: str
58
+ headers: dict[str, str]
59
+ payload: dict[str, Any]
60
+ timeout: Optional[float] = None
61
+ http2: Optional[bool] = None
62
+
63
+
64
+ class ResponseSummary(StandardBaseModel):
65
+ """
66
+ A model representing a summary of a backend request.
67
+ Always returned as the final iteration of a streaming request.
68
+
69
+ :param value: The final value returned from the request.
70
+ :param request_args: The arguments used to make the request.
71
+ :param iterations: The number of iterations in the request.
72
+ :param start_time: The time the request started.
73
+ :param end_time: The time the request ended.
74
+ :param first_iter_time: The time the first iteration was received.
75
+ :param last_iter_time: The time the last iteration was received.
76
+ :param request_prompt_tokens: The number of tokens measured in the prompt
77
+ for the request, if any.
78
+ :param request_output_tokens: The number of tokens enforced for the output
79
+ for the request, if any.
80
+ :param response_prompt_tokens: The number of tokens measured in the prompt
81
+ for the response, if any.
82
+ :param response_output_tokens: The number of tokens measured in the output
83
+ for the response, if any.
84
+ :param request_id: The unique identifier for the request, if any.
85
+ :param error: The error message, if any, returned from making the request.
86
+ """
87
+
88
+ value: str
89
+ request_args: RequestArgs
90
+ iterations: int = 0
91
+ start_time: float
92
+ end_time: float
93
+ first_iter_time: Optional[float]
94
+ last_iter_time: Optional[float]
95
+ request_prompt_tokens: Optional[int] = None
96
+ request_output_tokens: Optional[int] = None
97
+ response_prompt_tokens: Optional[int] = None
98
+ response_output_tokens: Optional[int] = None
99
+ request_id: Optional[str] = None
100
+ error: Optional[str] = None
101
+
102
+ @computed_field # type: ignore[misc]
103
+ @property
104
+ def prompt_tokens(self) -> Optional[int]:
105
+ """
106
+ The number of tokens measured in the prompt based on preferences
107
+ for trusting the input or response.
108
+
109
+ :return: The number of tokens in the prompt, if any.
110
+ """
111
+ if settings.preferred_prompt_tokens_source == "request":
112
+ return self.request_prompt_tokens or self.response_prompt_tokens
113
+
114
+ return self.response_prompt_tokens or self.request_prompt_tokens
115
+
116
+ @computed_field # type: ignore[misc]
117
+ @property
118
+ def output_tokens(self) -> Optional[int]:
119
+ """
120
+ The number of tokens measured in the output based on preferences
121
+ for trusting the input or response.
122
+
123
+ :return: The number of tokens in the output, if any.
124
+ """
125
+ if self.error is not None:
126
+ # error occurred, can't trust request tokens were all generated
127
+ return self.response_prompt_tokens
128
+
129
+ if settings.preferred_output_tokens_source == "request":
130
+ return self.request_output_tokens or self.response_output_tokens
131
+
132
+ return self.response_output_tokens or self.request_output_tokens
@@ -0,0 +1,73 @@
1
+ from .aggregator import AggregatorT, BenchmarkAggregator, GenerativeBenchmarkAggregator
2
+ from .benchmark import (
3
+ Benchmark,
4
+ BenchmarkArgs,
5
+ BenchmarkMetrics,
6
+ BenchmarkRunStats,
7
+ BenchmarkT,
8
+ GenerativeBenchmark,
9
+ GenerativeMetrics,
10
+ GenerativeTextErrorStats,
11
+ GenerativeTextResponseStats,
12
+ StatusBreakdown,
13
+ )
14
+ from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker
15
+ from .entrypoints import benchmark_generative_text
16
+ from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport
17
+ from .profile import (
18
+ AsyncProfile,
19
+ ConcurrentProfile,
20
+ Profile,
21
+ ProfileType,
22
+ SweepProfile,
23
+ SynchronousProfile,
24
+ ThroughputProfile,
25
+ create_profile,
26
+ )
27
+ from .progress import (
28
+ BenchmarkerProgressDisplay,
29
+ BenchmarkerTaskProgressState,
30
+ GenerativeTextBenchmarkerProgressDisplay,
31
+ GenerativeTextBenchmarkerTaskProgressState,
32
+ )
33
+
34
+ __all__ = [
35
+ # Aggregator
36
+ "AggregatorT",
37
+ "BenchmarkAggregator",
38
+ "GenerativeBenchmarkAggregator",
39
+ # Benchmark
40
+ "Benchmark",
41
+ "BenchmarkArgs",
42
+ "BenchmarkMetrics",
43
+ "BenchmarkRunStats",
44
+ "BenchmarkT",
45
+ "GenerativeBenchmark",
46
+ "GenerativeMetrics",
47
+ "GenerativeTextErrorStats",
48
+ "GenerativeTextResponseStats",
49
+ "StatusBreakdown",
50
+ # Benchmarker
51
+ "Benchmarker",
52
+ "BenchmarkerResult",
53
+ "GenerativeBenchmarker",
54
+ # Entry points
55
+ "benchmark_generative_text",
56
+ # Output
57
+ "GenerativeBenchmarksConsole",
58
+ "GenerativeBenchmarksReport",
59
+ # Profile
60
+ "AsyncProfile",
61
+ "ConcurrentProfile",
62
+ "Profile",
63
+ "ProfileType",
64
+ "SweepProfile",
65
+ "SynchronousProfile",
66
+ "ThroughputProfile",
67
+ "create_profile",
68
+ # Progress
69
+ "BenchmarkerProgressDisplay",
70
+ "BenchmarkerTaskProgressState",
71
+ "GenerativeTextBenchmarkerProgressDisplay",
72
+ "GenerativeTextBenchmarkerTaskProgressState",
73
+ ]