guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,25 @@
1
1
  import random
2
2
  from collections import defaultdict
3
3
  from math import ceil
4
- from typing import TYPE_CHECKING, Optional, Union
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  from pydantic import BaseModel, computed_field
7
7
 
8
8
  if TYPE_CHECKING:
9
- from guidellm.benchmark.benchmark import GenerativeBenchmark
9
+ from guidellm.benchmark import GenerativeBenchmark
10
10
 
11
- from guidellm.objects.statistics import DistributionSummary
11
+ from guidellm.utils import DistributionSummary
12
12
 
13
13
 
14
14
  class Bucket(BaseModel):
15
- value: Union[float, int]
15
+ value: float | int
16
16
  count: int
17
17
 
18
18
  @staticmethod
19
19
  def from_data(
20
- data: Union[list[float], list[int]],
21
- bucket_width: Optional[float] = None,
22
- n_buckets: Optional[int] = None,
20
+ data: list[float] | list[int],
21
+ bucket_width: float | None = None,
22
+ n_buckets: int | None = None,
23
23
  ) -> tuple[list["Bucket"], float]:
24
24
  if not data:
25
25
  return [], 1.0
@@ -35,7 +35,7 @@ class Bucket(BaseModel):
35
35
  else:
36
36
  n_buckets = ceil(range_v / bucket_width)
37
37
 
38
- bucket_counts: defaultdict[Union[float, int], int] = defaultdict(int)
38
+ bucket_counts: defaultdict[float | int, int] = defaultdict(int)
39
39
  for val in data:
40
40
  idx = int((val - min_v) // bucket_width)
41
41
  if idx >= n_buckets:
@@ -67,12 +67,12 @@ class RunInfo(BaseModel):
67
67
 
68
68
  @classmethod
69
69
  def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
70
- model = benchmarks[0].worker.backend_model or "N/A"
70
+ model = benchmarks[0].benchmarker.backend.get("model", "N/A")
71
71
  timestamp = max(
72
72
  bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
73
73
  )
74
74
  return cls(
75
- model=Model(name=model, size=0),
75
+ model=Model(name=model or "", size=0),
76
76
  task="N/A",
77
77
  timestamp=timestamp,
78
78
  dataset=Dataset(name="N/A"),
@@ -80,7 +80,7 @@ class RunInfo(BaseModel):
80
80
 
81
81
 
82
82
  class Distribution(BaseModel):
83
- statistics: Optional[DistributionSummary] = None
83
+ statistics: DistributionSummary | None = None
84
84
  buckets: list[Bucket]
85
85
  bucket_width: float
86
86
 
@@ -108,8 +108,8 @@ class WorkloadDetails(BaseModel):
108
108
 
109
109
  @classmethod
110
110
  def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
111
- target = benchmarks[0].worker.backend_target
112
- rate_type = benchmarks[0].args.profile.type_
111
+ target = benchmarks[0].benchmarker.backend.get("target", "N/A")
112
+ rate_type = benchmarks[0].scheduler.strategy.type_
113
113
  successful_requests = [
114
114
  req for bm in benchmarks for req in bm.requests.successful
115
115
  ]
@@ -117,21 +117,25 @@ class WorkloadDetails(BaseModel):
117
117
  range(len(successful_requests)), min(5, len(successful_requests))
118
118
  )
119
119
  sample_prompts = [
120
- successful_requests[i].prompt.replace("\n", " ").replace('"', "'")
120
+ req.request_args.replace("\n", " ").replace('"', "'")
121
+ if (req := successful_requests[i]).request_args
122
+ else ""
121
123
  for i in sample_indices
122
124
  ]
123
125
  sample_outputs = [
124
- successful_requests[i].output.replace("\n", " ").replace('"', "'")
126
+ req.output.replace("\n", " ").replace('"', "'")
127
+ if (req := successful_requests[i]).output
128
+ else ""
125
129
  for i in sample_indices
126
130
  ]
127
131
 
128
132
  prompt_tokens = [
129
- float(req.prompt_tokens)
133
+ float(req.prompt_tokens) if req.prompt_tokens is not None else -1
130
134
  for bm in benchmarks
131
135
  for req in bm.requests.successful
132
136
  ]
133
137
  output_tokens = [
134
- float(req.output_tokens)
138
+ float(req.output_tokens) if req.output_tokens is not None else -1
135
139
  for bm in benchmarks
136
140
  for req in bm.requests.successful
137
141
  ]
@@ -152,13 +156,13 @@ class WorkloadDetails(BaseModel):
152
156
  statistics=output_token_stats, buckets=output_token_buckets, bucket_width=1
153
157
  )
154
158
 
155
- min_start_time = benchmarks[0].run_stats.start_time
159
+ min_start_time = benchmarks[0].start_time
156
160
 
157
161
  all_req_times = [
158
- req.start_time - min_start_time
162
+ req.info.timings.request_start - min_start_time
159
163
  for bm in benchmarks
160
164
  for req in bm.requests.successful
161
- if req.start_time is not None
165
+ if req.info.timings.request_start is not None
162
166
  ]
163
167
  number_of_buckets = len(benchmarks)
164
168
  request_over_time_buckets, bucket_width = Bucket.from_data(
@@ -190,7 +194,7 @@ class TabularDistributionSummary(DistributionSummary):
190
194
  """
191
195
 
192
196
  @computed_field
193
- def percentile_rows(self) -> list[dict[str, Union[str, float]]]:
197
+ def percentile_rows(self) -> list[dict[str, str | float]]:
194
198
  rows = [
195
199
  {"percentile": name, "value": value}
196
200
  for name, value in self.percentiles.model_dump().items()
@@ -1,14 +1,13 @@
1
1
  import re
2
2
  from pathlib import Path
3
- from typing import Union
4
3
 
5
4
  from loguru import logger
6
5
 
7
- from guidellm.config import settings
6
+ from guidellm.settings import settings
8
7
  from guidellm.utils.text import load_text
9
8
 
10
9
 
11
- def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
10
+ def create_report(js_data: dict, output_path: str | Path) -> Path:
12
11
  """
13
12
  Creates a report from the dictionary and saves it to the output path.
14
13
 
@@ -1,47 +1,86 @@
1
- from .result import (
2
- SchedulerRequestInfo,
3
- SchedulerRequestResult,
4
- SchedulerResult,
5
- SchedulerRunInfo,
1
+ """
2
+ Scheduler subsystem for orchestrating benchmark workloads and managing worker processes.
3
+
4
+ This module provides the core scheduling infrastructure for guidellm, including
5
+ strategies for controlling request timing patterns (synchronous, asynchronous,
6
+ constant rate, Poisson), constraints for limiting benchmark execution (duration,
7
+ error rates, request counts), and distributed execution through worker processes.
8
+ The scheduler coordinates between backend interfaces, manages benchmark state
9
+ transitions, and handles multi-turn request sequences with customizable timing
10
+ strategies and resource constraints.
11
+ """
12
+
13
+ from .constraints import (
14
+ Constraint,
15
+ ConstraintInitializer,
16
+ ConstraintsInitializerFactory,
17
+ MaxDurationConstraint,
18
+ MaxErrorRateConstraint,
19
+ MaxErrorsConstraint,
20
+ MaxGlobalErrorRateConstraint,
21
+ MaxNumberConstraint,
22
+ PydanticConstraintInitializer,
23
+ SerializableConstraintInitializer,
24
+ UnserializableConstraintInitializer,
6
25
  )
26
+ from .environments import Environment, NonDistributedEnvironment
7
27
  from .scheduler import Scheduler
8
- from .strategy import (
28
+ from .schemas import (
29
+ BackendInterface,
30
+ BackendT,
31
+ MultiTurnRequestT,
32
+ RequestT,
33
+ ResponseT,
34
+ SchedulerMessagingPydanticRegistry,
35
+ SchedulerState,
36
+ SchedulerUpdateAction,
37
+ SchedulerUpdateActionProgress,
38
+ )
39
+ from .strategies import (
9
40
  AsyncConstantStrategy,
10
41
  AsyncPoissonStrategy,
11
42
  ConcurrentStrategy,
12
43
  SchedulingStrategy,
44
+ StrategyT,
13
45
  StrategyType,
14
46
  SynchronousStrategy,
15
47
  ThroughputStrategy,
16
- strategy_display_str,
17
- )
18
- from .worker import (
19
- GenerativeRequestsWorker,
20
- GenerativeRequestsWorkerDescription,
21
- RequestsWorker,
22
- ResolveStatus,
23
- WorkerDescription,
24
- WorkerProcessResult,
25
48
  )
49
+ from .worker import WorkerProcess
50
+ from .worker_group import WorkerProcessGroup
26
51
 
27
52
  __all__ = [
28
53
  "AsyncConstantStrategy",
29
54
  "AsyncPoissonStrategy",
55
+ "BackendInterface",
56
+ "BackendT",
30
57
  "ConcurrentStrategy",
31
- "GenerativeRequestsWorker",
32
- "GenerativeRequestsWorkerDescription",
33
- "RequestsWorker",
34
- "ResolveStatus",
58
+ "Constraint",
59
+ "ConstraintInitializer",
60
+ "ConstraintsInitializerFactory",
61
+ "Environment",
62
+ "MaxDurationConstraint",
63
+ "MaxErrorRateConstraint",
64
+ "MaxErrorsConstraint",
65
+ "MaxGlobalErrorRateConstraint",
66
+ "MaxNumberConstraint",
67
+ "MultiTurnRequestT",
68
+ "NonDistributedEnvironment",
69
+ "PydanticConstraintInitializer",
70
+ "RequestT",
71
+ "ResponseT",
35
72
  "Scheduler",
36
- "SchedulerRequestInfo",
37
- "SchedulerRequestResult",
38
- "SchedulerResult",
39
- "SchedulerRunInfo",
73
+ "SchedulerMessagingPydanticRegistry",
74
+ "SchedulerState",
75
+ "SchedulerUpdateAction",
76
+ "SchedulerUpdateActionProgress",
40
77
  "SchedulingStrategy",
78
+ "SerializableConstraintInitializer",
79
+ "StrategyT",
41
80
  "StrategyType",
42
81
  "SynchronousStrategy",
43
82
  "ThroughputStrategy",
44
- "WorkerDescription",
45
- "WorkerProcessResult",
46
- "strategy_display_str",
83
+ "UnserializableConstraintInitializer",
84
+ "WorkerProcess",
85
+ "WorkerProcessGroup",
47
86
  ]