guidellm 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (69) hide show
  1. guidellm/__init__.py +38 -6
  2. guidellm/__main__.py +294 -0
  3. guidellm/backend/__init__.py +19 -6
  4. guidellm/backend/backend.py +238 -0
  5. guidellm/backend/openai.py +532 -122
  6. guidellm/backend/response.py +132 -0
  7. guidellm/benchmark/__init__.py +73 -0
  8. guidellm/benchmark/aggregator.py +760 -0
  9. guidellm/benchmark/benchmark.py +838 -0
  10. guidellm/benchmark/benchmarker.py +334 -0
  11. guidellm/benchmark/entrypoints.py +141 -0
  12. guidellm/benchmark/output.py +946 -0
  13. guidellm/benchmark/profile.py +409 -0
  14. guidellm/benchmark/progress.py +720 -0
  15. guidellm/config.py +34 -56
  16. guidellm/data/__init__.py +4 -0
  17. guidellm/data/prideandprejudice.txt.gz +0 -0
  18. guidellm/dataset/__init__.py +22 -0
  19. guidellm/dataset/creator.py +213 -0
  20. guidellm/dataset/entrypoints.py +42 -0
  21. guidellm/dataset/file.py +90 -0
  22. guidellm/dataset/hf_datasets.py +62 -0
  23. guidellm/dataset/in_memory.py +132 -0
  24. guidellm/dataset/synthetic.py +262 -0
  25. guidellm/objects/__init__.py +18 -0
  26. guidellm/objects/pydantic.py +60 -0
  27. guidellm/objects/statistics.py +947 -0
  28. guidellm/request/__init__.py +12 -10
  29. guidellm/request/loader.py +281 -0
  30. guidellm/request/request.py +79 -0
  31. guidellm/scheduler/__init__.py +51 -3
  32. guidellm/scheduler/result.py +137 -0
  33. guidellm/scheduler/scheduler.py +382 -0
  34. guidellm/scheduler/strategy.py +493 -0
  35. guidellm/scheduler/types.py +7 -0
  36. guidellm/scheduler/worker.py +511 -0
  37. guidellm/utils/__init__.py +16 -29
  38. guidellm/utils/colors.py +8 -0
  39. guidellm/utils/hf_transformers.py +35 -0
  40. guidellm/utils/random.py +43 -0
  41. guidellm/utils/text.py +118 -357
  42. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/METADATA +96 -79
  43. guidellm-0.2.0.dist-info/RECORD +48 -0
  44. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/WHEEL +1 -1
  45. guidellm-0.2.0.dist-info/entry_points.txt +2 -0
  46. guidellm/backend/base.py +0 -320
  47. guidellm/core/__init__.py +0 -24
  48. guidellm/core/distribution.py +0 -190
  49. guidellm/core/report.py +0 -321
  50. guidellm/core/request.py +0 -44
  51. guidellm/core/result.py +0 -545
  52. guidellm/core/serializable.py +0 -169
  53. guidellm/executor/__init__.py +0 -10
  54. guidellm/executor/base.py +0 -213
  55. guidellm/executor/profile_generator.py +0 -343
  56. guidellm/main.py +0 -336
  57. guidellm/request/base.py +0 -194
  58. guidellm/request/emulated.py +0 -391
  59. guidellm/request/file.py +0 -76
  60. guidellm/request/transformers.py +0 -100
  61. guidellm/scheduler/base.py +0 -374
  62. guidellm/scheduler/load_generator.py +0 -196
  63. guidellm/utils/injector.py +0 -70
  64. guidellm/utils/progress.py +0 -196
  65. guidellm/utils/transformers.py +0 -151
  66. guidellm-0.1.0.dist-info/RECORD +0 -35
  67. guidellm-0.1.0.dist-info/entry_points.txt +0 -3
  68. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info/licenses}/LICENSE +0 -0
  69. {guidellm-0.1.0.dist-info → guidellm-0.2.0.dist-info}/top_level.txt +0 -0
guidellm/__init__.py CHANGED
@@ -6,14 +6,46 @@ evaluating and benchmarking large language models (LLMs).
6
6
  # flake8: noqa
7
7
 
8
8
  import os
9
- import transformers # type: ignore
9
+ import logging
10
+ import contextlib
10
11
 
11
- os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
12
- transformers.logging.set_verbosity_error() # Silence warnings for transformers
13
12
 
13
+ with (
14
+ open(os.devnull, "w") as devnull,
15
+ contextlib.redirect_stderr(devnull),
16
+ contextlib.redirect_stdout(devnull),
17
+ ):
18
+ from transformers.utils import logging as hf_logging # type: ignore[import]
14
19
 
15
- from .config import settings
20
+ # Set the log level for the transformers library to ERROR
21
+ # to ignore None of PyTorch, TensorFlow found
22
+ os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
23
+ hf_logging.set_verbosity_error()
24
+ logging.getLogger("transformers").setLevel(logging.ERROR)
25
+
26
+ from .config import (
27
+ settings,
28
+ DatasetSettings,
29
+ Environment,
30
+ LoggingSettings,
31
+ OpenAISettings,
32
+ print_config,
33
+ Settings,
34
+ reload_settings,
35
+ )
16
36
  from .logger import configure_logger, logger
17
- from .main import generate_benchmark_report
18
37
 
19
- __all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]
38
+ __all__ = [
39
+ # Config
40
+ "DatasetSettings",
41
+ "Environment",
42
+ "LoggingSettings",
43
+ "OpenAISettings",
44
+ "print_config",
45
+ "Settings",
46
+ "reload_settings",
47
+ "settings",
48
+ # Logger
49
+ "logger",
50
+ "configure_logger",
51
+ ]
guidellm/__main__.py ADDED
@@ -0,0 +1,294 @@
1
+ import asyncio
2
+ import json
3
+ from pathlib import Path
4
+ from typing import get_args
5
+
6
+ import click
7
+
8
+ from guidellm.backend import BackendType
9
+ from guidellm.benchmark import ProfileType, benchmark_generative_text
10
+ from guidellm.config import print_config
11
+ from guidellm.scheduler import StrategyType
12
+
13
+ STRATEGY_PROFILE_CHOICES = set(
14
+ list(get_args(ProfileType)) + list(get_args(StrategyType))
15
+ )
16
+
17
+
18
+ def parse_json(ctx, param, value): # noqa: ARG001
19
+ if value is None:
20
+ return None
21
+ try:
22
+ return json.loads(value)
23
+ except json.JSONDecodeError as err:
24
+ raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
25
+
26
+
27
+ def parse_number_str(ctx, param, value): # noqa: ARG001
28
+ if value is None:
29
+ return None
30
+
31
+ values = value.split(",") if "," in value else [value]
32
+
33
+ try:
34
+ return [int(val) if val.isdigit() else float(val) for val in values]
35
+ except ValueError as err:
36
+ raise click.BadParameter(
37
+ f"{param.name} must be a number or comma-separated list of numbers."
38
+ ) from err
39
+
40
+
41
+ @click.group()
42
+ def cli():
43
+ pass
44
+
45
+
46
+ @cli.command(
47
+ help="Run a benchmark against a generative model using the specified arguments."
48
+ )
49
+ @click.option(
50
+ "--target",
51
+ required=True,
52
+ type=str,
53
+ help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
54
+ )
55
+ @click.option(
56
+ "--backend-type",
57
+ type=click.Choice(list(get_args(BackendType))),
58
+ help=(
59
+ "The type of backend to use to run requests against. Defaults to 'openai_http'."
60
+ f" Supported types: {', '.join(get_args(BackendType))}"
61
+ ),
62
+ default="openai_http",
63
+ )
64
+ @click.option(
65
+ "--backend-args",
66
+ callback=parse_json,
67
+ default=None,
68
+ help=(
69
+ "A JSON string containing any arguments to pass to the backend as a "
70
+ "dict with **kwargs."
71
+ ),
72
+ )
73
+ @click.option(
74
+ "--model",
75
+ default=None,
76
+ type=str,
77
+ help=(
78
+ "The ID of the model to benchmark within the backend. "
79
+ "If None provided (default), then it will use the first model available."
80
+ ),
81
+ )
82
+ @click.option(
83
+ "--processor",
84
+ default=None,
85
+ type=str,
86
+ help=(
87
+ "The processor or tokenizer to use to calculate token counts for statistics "
88
+ "and synthetic data generation. If None provided (default), will load "
89
+ "using the model arg, if needed."
90
+ ),
91
+ )
92
+ @click.option(
93
+ "--processor-args",
94
+ default=None,
95
+ callback=parse_json,
96
+ help=(
97
+ "A JSON string containing any arguments to pass to the processor constructor "
98
+ "as a dict with **kwargs."
99
+ ),
100
+ )
101
+ @click.option(
102
+ "--data",
103
+ required=True,
104
+ type=str,
105
+ help=(
106
+ "The HuggingFace dataset ID, a path to a HuggingFace dataset, "
107
+ "a path to a data file csv, json, jsonl, or txt, "
108
+ "or a synthetic data config as a json or key=value string."
109
+ ),
110
+ )
111
+ @click.option(
112
+ "--data-args",
113
+ callback=parse_json,
114
+ help=(
115
+ "A JSON string containing any arguments to pass to the dataset creation "
116
+ "as a dict with **kwargs."
117
+ ),
118
+ )
119
+ @click.option(
120
+ "--data-sampler",
121
+ default=None,
122
+ type=click.Choice(["random"]),
123
+ help=(
124
+ "The data sampler type to use. 'random' will add a random shuffle on the data. "
125
+ "Defaults to None"
126
+ ),
127
+ )
128
+ @click.option(
129
+ "--rate-type",
130
+ required=True,
131
+ type=click.Choice(STRATEGY_PROFILE_CHOICES),
132
+ help=(
133
+ "The type of benchmark to run. "
134
+ f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
135
+ ),
136
+ )
137
+ @click.option(
138
+ "--rate",
139
+ default=None,
140
+ callback=parse_number_str,
141
+ help=(
142
+ "The rates to run the benchmark at. "
143
+ "Can be a single number or a comma-separated list of numbers. "
144
+ "For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
145
+ "For rate-type=concurrent, this is the number of concurrent requests. "
146
+ "For rate-type=async,constant,poisson, this is the rate requests per second. "
147
+ "For rate-type=synchronous,throughput, this must not be set."
148
+ ),
149
+ )
150
+ @click.option(
151
+ "--max-seconds",
152
+ type=float,
153
+ help=(
154
+ "The maximum number of seconds each benchmark can run for. "
155
+ "If None, will run until max_requests or the data is exhausted."
156
+ ),
157
+ )
158
+ @click.option(
159
+ "--max-requests",
160
+ type=int,
161
+ help=(
162
+ "The maximum number of requests each benchmark can run for. "
163
+ "If None, will run until max_seconds or the data is exhausted."
164
+ ),
165
+ )
166
+ @click.option(
167
+ "--warmup-percent",
168
+ type=float,
169
+ default=None,
170
+ help=(
171
+ "The percent of the benchmark (based on max-seconds, max-requets, "
172
+ "or lenth of dataset) to run as a warmup and not include in the final results. "
173
+ "Defaults to None."
174
+ ),
175
+ )
176
+ @click.option(
177
+ "--cooldown-percent",
178
+ type=float,
179
+ help=(
180
+ "The percent of the benchmark (based on max-seconds, max-requets, or lenth "
181
+ "of dataset) to run as a cooldown and not include in the final results. "
182
+ "Defaults to None."
183
+ ),
184
+ )
185
+ @click.option(
186
+ "--disable-progress",
187
+ is_flag=True,
188
+ help="Set this flag to disable progress updates to the console",
189
+ )
190
+ @click.option(
191
+ "--display-scheduler-stats",
192
+ is_flag=True,
193
+ help="Set this flag to display stats for the processes running the benchmarks",
194
+ )
195
+ @click.option(
196
+ "--disable-console-outputs",
197
+ is_flag=True,
198
+ help="Set this flag to disable console output",
199
+ )
200
+ @click.option(
201
+ "--output-path",
202
+ type=click.Path(),
203
+ default=Path.cwd() / "benchmarks.json",
204
+ help=(
205
+ "The path to save the output to. If it is a directory, "
206
+ "it will save benchmarks.json under it. "
207
+ "Otherwise, json, yaml, or csv files are supported for output types "
208
+ "which will be read from the extension for the file path."
209
+ ),
210
+ )
211
+ @click.option(
212
+ "--output-extras",
213
+ callback=parse_json,
214
+ help="A JSON string of extra data to save with the output benchmarks",
215
+ )
216
+ @click.option(
217
+ "--output-sampling",
218
+ type=int,
219
+ help=(
220
+ "The number of samples to save in the output file. "
221
+ "If None (default), will save all samples."
222
+ ),
223
+ default=None,
224
+ )
225
+ @click.option(
226
+ "--random-seed",
227
+ default=42,
228
+ type=int,
229
+ help="The random seed to use for benchmarking to ensure reproducibility.",
230
+ )
231
+ def benchmark(
232
+ target,
233
+ backend_type,
234
+ backend_args,
235
+ model,
236
+ processor,
237
+ processor_args,
238
+ data,
239
+ data_args,
240
+ data_sampler,
241
+ rate_type,
242
+ rate,
243
+ max_seconds,
244
+ max_requests,
245
+ warmup_percent,
246
+ cooldown_percent,
247
+ disable_progress,
248
+ display_scheduler_stats,
249
+ disable_console_outputs,
250
+ output_path,
251
+ output_extras,
252
+ output_sampling,
253
+ random_seed,
254
+ ):
255
+ asyncio.run(
256
+ benchmark_generative_text(
257
+ target=target,
258
+ backend_type=backend_type,
259
+ backend_args=backend_args,
260
+ model=model,
261
+ processor=processor,
262
+ processor_args=processor_args,
263
+ data=data,
264
+ data_args=data_args,
265
+ data_sampler=data_sampler,
266
+ rate_type=rate_type,
267
+ rate=rate,
268
+ max_seconds=max_seconds,
269
+ max_requests=max_requests,
270
+ warmup_percent=warmup_percent,
271
+ cooldown_percent=cooldown_percent,
272
+ show_progress=not disable_progress,
273
+ show_progress_scheduler_stats=display_scheduler_stats,
274
+ output_console=not disable_console_outputs,
275
+ output_path=output_path,
276
+ output_extras=output_extras,
277
+ output_sampling=output_sampling,
278
+ random_seed=random_seed,
279
+ )
280
+ )
281
+
282
+
283
+ @cli.command(
284
+ help=(
285
+ "Print out the available configuration settings that can be set "
286
+ "through environment variables."
287
+ )
288
+ )
289
+ def config():
290
+ print_config()
291
+
292
+
293
+ if __name__ == "__main__":
294
+ cli()
@@ -1,10 +1,23 @@
1
- from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse
2
- from .openai import OpenAIBackend
1
+ from .backend import (
2
+ Backend,
3
+ BackendType,
4
+ )
5
+ from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
6
+ from .response import (
7
+ RequestArgs,
8
+ ResponseSummary,
9
+ StreamingResponseType,
10
+ StreamingTextResponse,
11
+ )
3
12
 
4
13
  __all__ = [
14
+ "StreamingResponseType",
15
+ "StreamingTextResponse",
16
+ "RequestArgs",
17
+ "ResponseSummary",
5
18
  "Backend",
6
- "BackendEngine",
7
- "BackendEnginePublic",
8
- "GenerativeResponse",
9
- "OpenAIBackend",
19
+ "BackendType",
20
+ "OpenAIHTTPBackend",
21
+ "TEXT_COMPLETIONS_PATH",
22
+ "CHAT_COMPLETIONS_PATH",
10
23
  ]
@@ -0,0 +1,238 @@
1
+ from abc import ABC, abstractmethod
2
+ from collections.abc import AsyncGenerator
3
+ from pathlib import Path
4
+ from typing import Any, Literal, Optional, Union
5
+
6
+ from loguru import logger
7
+ from PIL import Image
8
+
9
+ from guidellm.backend.response import ResponseSummary, StreamingTextResponse
10
+
11
+ __all__ = [
12
+ "Backend",
13
+ "BackendType",
14
+ ]
15
+
16
+
17
+ BackendType = Literal["openai_http"]
18
+
19
+
20
+ class Backend(ABC):
21
+ """
22
+ Abstract base class for generative AI backends.
23
+
24
+ This class provides a common interface for creating and interacting with different
25
+ generative AI backends. Subclasses should implement the abstract methods to
26
+ define specific backend behavior.
27
+
28
+ :cvar _registry: A registration dictionary that maps BackendType to backend classes.
29
+ :param type_: The type of the backend.
30
+ """
31
+
32
+ _registry: dict[BackendType, "type[Backend]"] = {}
33
+
34
+ @classmethod
35
+ def register(cls, backend_type: BackendType):
36
+ """
37
+ A decorator to register a backend class in the backend registry.
38
+
39
+ :param backend_type: The type of backend to register.
40
+ :type backend_type: BackendType
41
+ :return: The decorated backend class.
42
+ :rtype: Type[Backend]
43
+ """
44
+ if backend_type in cls._registry:
45
+ raise ValueError(f"Backend type already registered: {backend_type}")
46
+
47
+ if not issubclass(cls, Backend):
48
+ raise TypeError("Only subclasses of Backend can be registered")
49
+
50
+ def inner_wrapper(wrapped_class: type["Backend"]):
51
+ cls._registry[backend_type] = wrapped_class
52
+ logger.info("Registered backend type: {}", backend_type)
53
+ return wrapped_class
54
+
55
+ return inner_wrapper
56
+
57
+ @classmethod
58
+ def create(cls, type_: BackendType, **kwargs) -> "Backend":
59
+ """
60
+ Factory method to create a backend instance based on the backend type.
61
+
62
+ :param type_: The type of backend to create.
63
+ :type type_: BackendType
64
+ :param kwargs: Additional arguments for backend initialization.
65
+ :return: An instance of a subclass of Backend.
66
+ :rtype: Backend
67
+ :raises ValueError: If the backend type is not registered.
68
+ """
69
+
70
+ logger.info("Creating backend of type {}", type_)
71
+
72
+ if type_ not in cls._registry:
73
+ err = ValueError(f"Unsupported backend type: {type_}")
74
+ logger.error("{}", err)
75
+ raise err
76
+
77
+ return Backend._registry[type_](**kwargs)
78
+
79
+ def __init__(self, type_: BackendType):
80
+ self._type = type_
81
+
82
+ @property
83
+ def type_(self) -> BackendType:
84
+ """
85
+ :return: The type of the backend.
86
+ """
87
+ return self._type
88
+
89
+ @property
90
+ @abstractmethod
91
+ def target(self) -> str:
92
+ """
93
+ :return: The target location for the backend.
94
+ """
95
+ ...
96
+
97
+ @property
98
+ @abstractmethod
99
+ def model(self) -> Optional[str]:
100
+ """
101
+ :return: The model used for the backend requests.
102
+ """
103
+ ...
104
+
105
+ @property
106
+ @abstractmethod
107
+ def info(self) -> dict[str, Any]:
108
+ """
109
+ :return: The information about the backend.
110
+ """
111
+ ...
112
+
113
+ async def validate(self):
114
+ """
115
+ Handle final setup and validate the backend is ready for use.
116
+ If not successful, raises the appropriate exception.
117
+ """
118
+ logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
119
+ await self.check_setup()
120
+ models = await self.available_models()
121
+ if not models:
122
+ raise ValueError("No models available for the backend")
123
+
124
+ async for _ in self.text_completions(
125
+ prompt="Test connection", output_token_count=1
126
+ ): # type: ignore[attr-defined]
127
+ pass
128
+
129
+ @abstractmethod
130
+ async def check_setup(self):
131
+ """
132
+ Check the setup for the backend.
133
+ If unsuccessful, raises the appropriate exception.
134
+
135
+ :raises ValueError: If the setup check fails.
136
+ """
137
+ ...
138
+
139
+ @abstractmethod
140
+ async def prepare_multiprocessing(self):
141
+ """
142
+ Prepare the backend for use in a multiprocessing environment.
143
+ This is useful for backends that have instance state that can not
144
+ be shared across processes and should be cleared out and re-initialized
145
+ for each new process.
146
+ """
147
+ ...
148
+
149
+ @abstractmethod
150
+ async def available_models(self) -> list[str]:
151
+ """
152
+ Get the list of available models for the backend.
153
+
154
+ :return: The list of available models.
155
+ :rtype: List[str]
156
+ """
157
+ ...
158
+
159
+ @abstractmethod
160
+ async def text_completions(
161
+ self,
162
+ prompt: Union[str, list[str]],
163
+ request_id: Optional[str] = None,
164
+ prompt_token_count: Optional[int] = None,
165
+ output_token_count: Optional[int] = None,
166
+ **kwargs,
167
+ ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
168
+ """
169
+ Generate text only completions for the given prompt.
170
+ Does not support multiple modalities, complicated chat interfaces,
171
+ or chat templates. Specifically, it requests with only the prompt.
172
+
173
+ :param prompt: The prompt (or list of prompts) to generate a completion for.
174
+ If a list is supplied, these are concatenated and run through the model
175
+ for a single prompt.
176
+ :param request_id: The unique identifier for the request, if any.
177
+ Added to logging statements and the response for tracking purposes.
178
+ :param prompt_token_count: The number of tokens measured in the prompt, if any.
179
+ Returned in the response stats for later analysis, if applicable.
180
+ :param output_token_count: If supplied, the number of tokens to enforce
181
+ generation of for the output for this request.
182
+ :param kwargs: Additional keyword arguments to pass with the request.
183
+ :return: An async generator that yields a StreamingTextResponse for start,
184
+ a StreamingTextResponse for each received iteration,
185
+ and a ResponseSummary for the final response.
186
+ """
187
+ ...
188
+
189
+ @abstractmethod
190
+ async def chat_completions(
191
+ self,
192
+ content: Union[
193
+ str,
194
+ list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
195
+ Any,
196
+ ],
197
+ request_id: Optional[str] = None,
198
+ prompt_token_count: Optional[int] = None,
199
+ output_token_count: Optional[int] = None,
200
+ raw_content: bool = False,
201
+ **kwargs,
202
+ ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
203
+ """
204
+ Generate chat completions for the given content.
205
+ Supports multiple modalities, complicated chat interfaces, and chat templates.
206
+ Specifically, it requests with the content, which can be any combination of
207
+ text, images, and audio provided the target model supports it,
208
+ and returns the output text. Additionally, any chat templates
209
+ for the model are applied within the backend.
210
+
211
+ :param content: The content (or list of content) to generate a completion for.
212
+ This supports any combination of text, images, and audio (model dependent).
213
+ Supported text only request examples:
214
+ content="Sample prompt", content=["Sample prompt", "Second prompt"],
215
+ content=[{"type": "text", "value": "Sample prompt"}.
216
+ Supported text and image request examples:
217
+ content=["Describe the image", PIL.Image.open("image.jpg")],
218
+ content=["Describe the image", Path("image.jpg")],
219
+ content=["Describe the image", {"type": "image_url",
220
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}].
221
+ Supported text and audio request examples:
222
+ content=["Transcribe the audio", Path("audio.wav")],
223
+ content=["Transcribe the audio", {"type": "input_audio",
224
+ "input_audio": {"data": f"{base64_bytes}", "format": "wav}].
225
+ Additionally, if raw_content=True then the content is passed directly to the
226
+ backend without any processing.
227
+ :param request_id: The unique identifier for the request, if any.
228
+ Added to logging statements and the response for tracking purposes.
229
+ :param prompt_token_count: The number of tokens measured in the prompt, if any.
230
+ Returned in the response stats for later analysis, if applicable.
231
+ :param output_token_count: If supplied, the number of tokens to enforce
232
+ generation of for the output for this request.
233
+ :param kwargs: Additional keyword arguments to pass with the request.
234
+ :return: An async generator that yields a StreamingTextResponse for start,
235
+ a StreamingTextResponse for each received iteration,
236
+ and a ResponseSummary for the final response.
237
+ """
238
+ ...