guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -1,79 +0,0 @@
1
- import uuid
2
- from typing import Any, Literal, Optional
3
-
4
- from pydantic import Field
5
-
6
- from guidellm.objects.pydantic import StandardBaseModel
7
-
8
- __all__ = ["GenerationRequest"]
9
-
10
-
11
- class GenerationRequest(StandardBaseModel):
12
- """
13
- A class representing a request for generation.
14
- This class is used to encapsulate the details of a generation request,
15
- including the request ID, type, content, parameters, statistics, and constraints.
16
- It is designed to be used with the BackendRequestsWorker class to handle
17
- the generation process.
18
-
19
- :param request_id: The unique identifier for the request.
20
- :param request_type: The type of request (e.g., text, chat).
21
- :param content: The content for the request to send to the backend.
22
- If request_type is 'text', this should be a string or list of strings
23
- which will be resolved by backend.text_completions.
24
- If request_type is 'chat', this should be a string,
25
- a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]),
26
- or Any raw content which will be resolved by backend.chat_completions.
27
- If raw content, raw_content=True must be passed in the params.
28
- :param params: Additional parameters for the request passed in as kwargs.
29
- For an http backend, these are passed into the body of the request.
30
- :param stats: Statistics for the request, such as the number of prompt tokens.
31
- Used for tracking and reporting purposes.
32
- :param constraints: Constraints for the request, such as the maximum number
33
- of output tokens. Used for controlling the behavior of the backend.
34
- """
35
-
36
- request_id: Optional[str] = Field(
37
- default_factory=lambda: str(uuid.uuid4()),
38
- description="The unique identifier for the request.",
39
- )
40
- request_type: Literal["text_completions", "chat_completions"] = Field(
41
- default="text_completions",
42
- description=(
43
- "The type of request (e.g., text, chat). "
44
- "If request_type='text_completions', resolved by backend.text_completions. "
45
- "If request_typ='chat_completions', resolved by backend.chat_completions."
46
- ),
47
- )
48
- content: Any = Field(
49
- description=(
50
- "The content for the request to send to the backend. "
51
- "If request_type is 'text', this should be a string or list of strings "
52
- "which will be resolved by backend.text_completions. "
53
- "If request_type is 'chat', this should be a string, "
54
- "a list of (str, Dict[str, Union[str, Dict[str, str]], Path, Image]), "
55
- "or Any raw content which will be resolved by backend.chat_completions. "
56
- "If raw content, raw_content=True must be passed in the params."
57
- )
58
- )
59
- params: dict[str, Any] = Field(
60
- default_factory=dict,
61
- description=(
62
- "Additional parameters for the request that will be passed in as kwargs. "
63
- "For an http backend, these are passed into the body of the request. "
64
- ),
65
- )
66
- stats: dict[Literal["prompt_tokens"], int] = Field(
67
- default_factory=dict,
68
- description=(
69
- "Statistics for the request, such as the number of prompt tokens. "
70
- "Used for tracking and reporting purposes."
71
- ),
72
- )
73
- constraints: dict[Literal["output_tokens"], int] = Field(
74
- default_factory=dict,
75
- description=(
76
- "Constraints for the request, such as the maximum number of output tokens. "
77
- "Used for controlling the behavior of the backend."
78
- ),
79
- )
guidellm/request/types.py DELETED
@@ -1,10 +0,0 @@
1
- from typing import TypeVar
2
-
3
- __all__ = [
4
- "RequestT",
5
- "ResponseT",
6
- ]
7
-
8
-
9
- RequestT = TypeVar("RequestT")
10
- ResponseT = TypeVar("ResponseT")
@@ -1,25 +0,0 @@
1
- """
2
- Helper module for importing the correct queue types.
3
- """
4
-
5
- from dataclasses import dataclass
6
- from queue import Empty as QueueEmpty
7
- from queue import Full as QueueFull
8
- from queue import Queue
9
- from typing import Generic
10
-
11
- from guidellm.request.types import RequestT, ResponseT
12
- from guidellm.scheduler.result import WorkerProcessRequest, WorkerProcessResult
13
-
14
- __all__ = [
15
- "MPQueues",
16
- "Queue",
17
- "QueueEmpty",
18
- "QueueFull",
19
- ]
20
-
21
-
22
- @dataclass
23
- class MPQueues(Generic[RequestT, ResponseT]):
24
- requests: Queue[WorkerProcessRequest[RequestT, ResponseT]]
25
- responses: Queue[WorkerProcessResult[RequestT, ResponseT]]
@@ -1,155 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import (
3
- Generic,
4
- Literal,
5
- Optional,
6
- )
7
-
8
- from guidellm.objects import StandardBaseModel
9
- from guidellm.request.types import RequestT, ResponseT
10
- from guidellm.scheduler.strategy import SchedulingStrategy
11
-
12
- __all__ = [
13
- "SchedulerRequestInfo",
14
- "SchedulerRequestResult",
15
- "SchedulerResult",
16
- "SchedulerRunInfo",
17
- "WorkerProcessRequest",
18
- "WorkerProcessResult",
19
- ]
20
-
21
-
22
- class SchedulerRunInfo(StandardBaseModel):
23
- """
24
- Information about the current run of the scheduler.
25
- This class holds metadata about the scheduling run,
26
- including the start and end times, the number of processes,
27
- and the scheduling strategy used.
28
- It also tracks the number of requests created, queued, pending,
29
- and completed during the run.
30
-
31
- :param start_time: The start time of the scheduling run.
32
- :param end_time: The end time of the scheduling run;
33
- if None, then this will be math.inf.
34
- :param end_number: The maximum number of requests to be processed;
35
- if None, then this will be math.inf.
36
- :param processes: The number of processes used in the scheduling run.
37
- :param strategy: The scheduling strategy used in the run.
38
- This should be an instance of SchedulingStrategy.
39
- :param created_requests: The number of requests created during the run.
40
- :param queued_requests: The number of requests queued during the run.
41
- :param scheduled_requests: The number of requests scheduled during the run.
42
- (requests pending being sent to the worker but recieved by a process)
43
- :param processing_requests: The number of requests actively being run.
44
- :param completed_requests: The number of requests completed during the run.
45
- """
46
-
47
- start_time: float
48
- end_time: float
49
- end_number: float
50
- processes: int
51
- strategy: SchedulingStrategy
52
-
53
- created_requests: int = 0
54
- queued_requests: int = 0
55
- scheduled_requests: int = 0
56
- processing_requests: int = 0
57
- completed_requests: int = 0
58
-
59
-
60
- class SchedulerRequestInfo(StandardBaseModel):
61
- """
62
- Information about a specific request run through the scheduler.
63
- This class holds metadata about the request, including
64
- the targeted start time, queued time, start time, end time,
65
- and the process ID that handled the request.
66
-
67
- :param targeted_start_time: The targeted start time for the request (time.time()).
68
- :param queued_time: The time the request was queued (time.time()).
69
- :param scheduled_time: The time the request was scheduled (time.time())
70
- (any sleep time before the request was sent to the worker).
71
- :param worker_start: The time the worker started processing request (time.time()).
72
- :param worker_end: The time the worker finished processing request. (time.time()).
73
- :param process_id: The ID of the underlying process that handled the request.
74
- """
75
-
76
- requested: bool = False
77
- completed: bool = False
78
- errored: bool = False
79
- canceled: bool = False
80
-
81
- targeted_start_time: float = -1
82
- queued_time: float = -1
83
- dequeued_time: float = -1
84
- scheduled_time: float = -1
85
- worker_start: float = -1
86
- request_start: float = -1
87
- request_end: float = -1
88
- worker_end: float = -1
89
- process_id: int = -1
90
-
91
-
92
- class SchedulerResult(StandardBaseModel):
93
- """
94
- The yielded, iterative result for a scheduler run.
95
- These are triggered on the start and end of the run,
96
- as well as on the start and end of each request.
97
- Depending on the type, it will hold the request and response
98
- along with information and statistics about the request and general run.
99
-
100
- :param type_: The type of the result, which can be one of:
101
- - "run_start": Indicates the start of the run.
102
- - "run_complete": Indicates the completion of the run (teardown happens after).
103
- - "request_start": Indicates the start of a request.
104
- - "request_complete": Indicates the completion of a request.
105
- :param request: The request that was processed.
106
- :param response: The response from the worker for the request.
107
- :param request_info: Information about the request, including
108
- the targeted start time, queued time, start time, end time,
109
- and the process ID that handled the request.
110
- :param run_info: Information about the current run of the scheduler,
111
- including the start and end times, the number of processes,
112
- and the scheduling strategy used.
113
- It also tracks the number of requests created, queued, pending,
114
- and completed during the run.
115
- """
116
-
117
- pydantic_type: Literal["scheduler_result"] = "scheduler_result"
118
- type_: Literal[
119
- "run_start",
120
- "run_complete",
121
- "request_scheduled",
122
- "request_start",
123
- "request_complete",
124
- ]
125
- run_info: SchedulerRunInfo
126
-
127
-
128
- class SchedulerRequestResult(
129
- SchedulerResult,
130
- Generic[RequestT, ResponseT],
131
- ):
132
- pydantic_type: Literal["scheduler_request_result"] = "scheduler_request_result" # type: ignore[assignment]
133
- type_: Literal[
134
- "request_scheduled",
135
- "request_start",
136
- "request_complete",
137
- ]
138
- request: RequestT
139
- request_info: SchedulerRequestInfo
140
- response: Optional[ResponseT] = None
141
-
142
-
143
- @dataclass
144
- class WorkerProcessRequest(Generic[RequestT, ResponseT]):
145
- request: RequestT
146
- timeout_time: float
147
- queued_time: float
148
-
149
-
150
- @dataclass
151
- class WorkerProcessResult(Generic[RequestT, ResponseT]):
152
- type_: Literal["request_scheduled", "request_start", "request_complete"]
153
- request: RequestT
154
- response: Optional[ResponseT]
155
- info: SchedulerRequestInfo