hammad-python 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +177 -0
- hammad/{performance/imports.py → _internal.py} +7 -1
- hammad/cache/__init__.py +1 -1
- hammad/cli/__init__.py +3 -1
- hammad/cli/_runner.py +265 -0
- hammad/cli/animations.py +1 -1
- hammad/cli/plugins.py +133 -78
- hammad/cli/styles/__init__.py +1 -1
- hammad/cli/styles/utils.py +149 -3
- hammad/data/__init__.py +56 -29
- hammad/data/collections/__init__.py +27 -17
- hammad/data/collections/collection.py +205 -383
- hammad/data/collections/indexes/__init__.py +37 -0
- hammad/data/collections/indexes/qdrant/__init__.py +1 -0
- hammad/data/collections/indexes/qdrant/index.py +735 -0
- hammad/data/collections/indexes/qdrant/settings.py +94 -0
- hammad/data/collections/indexes/qdrant/utils.py +220 -0
- hammad/data/collections/indexes/tantivy/__init__.py +1 -0
- hammad/data/collections/indexes/tantivy/index.py +428 -0
- hammad/data/collections/indexes/tantivy/settings.py +51 -0
- hammad/data/collections/indexes/tantivy/utils.py +200 -0
- hammad/data/configurations/__init__.py +2 -2
- hammad/data/configurations/configuration.py +2 -2
- hammad/data/models/__init__.py +20 -9
- hammad/data/models/extensions/__init__.py +4 -0
- hammad/data/models/{pydantic → extensions/pydantic}/__init__.py +6 -19
- hammad/data/models/{pydantic → extensions/pydantic}/converters.py +143 -16
- hammad/data/models/{base/fields.py → fields.py} +1 -1
- hammad/data/models/{base/model.py → model.py} +1 -1
- hammad/data/models/{base/utils.py → utils.py} +1 -1
- hammad/data/sql/__init__.py +23 -0
- hammad/data/sql/database.py +578 -0
- hammad/data/sql/types.py +141 -0
- hammad/data/types/__init__.py +1 -3
- hammad/data/types/file.py +3 -3
- hammad/data/types/multimodal/__init__.py +2 -2
- hammad/data/types/multimodal/audio.py +2 -2
- hammad/data/types/multimodal/image.py +2 -2
- hammad/formatting/__init__.py +9 -27
- hammad/formatting/json/__init__.py +8 -2
- hammad/formatting/json/converters.py +7 -1
- hammad/formatting/text/__init__.py +1 -1
- hammad/formatting/yaml/__init__.py +1 -1
- hammad/genai/__init__.py +78 -0
- hammad/genai/agents/__init__.py +1 -0
- hammad/genai/agents/types/__init__.py +35 -0
- hammad/genai/agents/types/history.py +277 -0
- hammad/genai/agents/types/tool.py +490 -0
- hammad/genai/embedding_models/__init__.py +41 -0
- hammad/{ai/embeddings/client/litellm_embeddings_client.py → genai/embedding_models/embedding_model.py} +47 -142
- hammad/genai/embedding_models/embedding_model_name.py +77 -0
- hammad/genai/embedding_models/embedding_model_request.py +65 -0
- hammad/{ai/embeddings/types.py → genai/embedding_models/embedding_model_response.py} +3 -3
- hammad/genai/embedding_models/run.py +161 -0
- hammad/genai/language_models/__init__.py +35 -0
- hammad/genai/language_models/_streaming.py +622 -0
- hammad/genai/language_models/_types.py +276 -0
- hammad/genai/language_models/_utils/__init__.py +31 -0
- hammad/genai/language_models/_utils/_completions.py +131 -0
- hammad/genai/language_models/_utils/_messages.py +89 -0
- hammad/genai/language_models/_utils/_requests.py +202 -0
- hammad/genai/language_models/_utils/_structured_outputs.py +124 -0
- hammad/genai/language_models/language_model.py +734 -0
- hammad/genai/language_models/language_model_request.py +135 -0
- hammad/genai/language_models/language_model_response.py +219 -0
- hammad/genai/language_models/language_model_response_chunk.py +53 -0
- hammad/genai/language_models/run.py +530 -0
- hammad/genai/multimodal_models.py +48 -0
- hammad/genai/rerank_models.py +26 -0
- hammad/logging/__init__.py +1 -1
- hammad/logging/decorators.py +1 -1
- hammad/logging/logger.py +2 -2
- hammad/mcp/__init__.py +1 -1
- hammad/mcp/client/__init__.py +35 -0
- hammad/mcp/client/client.py +105 -4
- hammad/mcp/client/client_service.py +10 -3
- hammad/mcp/servers/__init__.py +24 -0
- hammad/{performance/runtime → runtime}/__init__.py +2 -2
- hammad/{performance/runtime → runtime}/decorators.py +1 -1
- hammad/{performance/runtime → runtime}/run.py +1 -1
- hammad/service/__init__.py +1 -1
- hammad/service/create.py +3 -8
- hammad/service/decorators.py +8 -8
- hammad/typing/__init__.py +28 -0
- hammad/web/__init__.py +3 -3
- hammad/web/http/client.py +1 -1
- hammad/web/models.py +53 -21
- hammad/web/search/client.py +99 -52
- hammad/web/utils.py +13 -13
- hammad_python-0.0.16.dist-info/METADATA +191 -0
- hammad_python-0.0.16.dist-info/RECORD +110 -0
- hammad/ai/__init__.py +0 -1
- hammad/ai/_utils.py +0 -142
- hammad/ai/completions/__init__.py +0 -45
- hammad/ai/completions/client.py +0 -684
- hammad/ai/completions/create.py +0 -710
- hammad/ai/completions/settings.py +0 -100
- hammad/ai/completions/types.py +0 -792
- hammad/ai/completions/utils.py +0 -486
- hammad/ai/embeddings/__init__.py +0 -35
- hammad/ai/embeddings/client/__init__.py +0 -1
- hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
- hammad/ai/embeddings/create.py +0 -159
- hammad/data/collections/base_collection.py +0 -58
- hammad/data/collections/searchable_collection.py +0 -556
- hammad/data/collections/vector_collection.py +0 -596
- hammad/data/databases/__init__.py +0 -21
- hammad/data/databases/database.py +0 -902
- hammad/data/models/base/__init__.py +0 -35
- hammad/data/models/pydantic/models/__init__.py +0 -28
- hammad/data/models/pydantic/models/arbitrary_model.py +0 -46
- hammad/data/models/pydantic/models/cacheable_model.py +0 -79
- hammad/data/models/pydantic/models/fast_model.py +0 -318
- hammad/data/models/pydantic/models/function_model.py +0 -176
- hammad/data/models/pydantic/models/subscriptable_model.py +0 -63
- hammad/performance/__init__.py +0 -36
- hammad/py.typed +0 -0
- hammad_python-0.0.14.dist-info/METADATA +0 -70
- hammad_python-0.0.14.dist-info/RECORD +0 -99
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,622 @@
|
|
1
|
+
"""hammad.genai.language_models._streaming"""
|
2
|
+
|
3
|
+
from typing import (
|
4
|
+
List,
|
5
|
+
Type,
|
6
|
+
TypeVar,
|
7
|
+
Generic,
|
8
|
+
Iterator,
|
9
|
+
AsyncIterator,
|
10
|
+
Optional,
|
11
|
+
Any,
|
12
|
+
Callable,
|
13
|
+
Dict,
|
14
|
+
)
|
15
|
+
|
16
|
+
from ...typing import get_origin, get_args
|
17
|
+
|
18
|
+
from .language_model_response import LanguageModelResponse
|
19
|
+
from .language_model_response_chunk import LanguageModelResponseChunk
|
20
|
+
|
21
|
+
__all__ = [
|
22
|
+
"Stream",
|
23
|
+
"AsyncStream",
|
24
|
+
"InstructorStreamCollector",
|
25
|
+
"InstructorStreamWrapper",
|
26
|
+
"AsyncInstructorStreamWrapper",
|
27
|
+
"StreamingChunkProcessor",
|
28
|
+
]
|
29
|
+
|
30
|
+
T = TypeVar("T")
|
31
|
+
|
32
|
+
|
33
|
+
class InstructorStreamCollector:
|
34
|
+
"""Collector for instructor streaming responses using hooks."""
|
35
|
+
|
36
|
+
def __init__(self):
|
37
|
+
self.raw_chunks = []
|
38
|
+
self.completion_responses = []
|
39
|
+
self.last_response = None
|
40
|
+
self.error = None
|
41
|
+
|
42
|
+
def on_completion_response(self, response):
|
43
|
+
"""Hook handler for completion responses."""
|
44
|
+
self.completion_responses.append(response)
|
45
|
+
|
46
|
+
def on_completion_error(self, error):
|
47
|
+
"""Hook handler for completion errors."""
|
48
|
+
self.error = error
|
49
|
+
|
50
|
+
def add_chunk(self, chunk):
|
51
|
+
"""Add a raw chunk to the collector."""
|
52
|
+
self.raw_chunks.append(chunk)
|
53
|
+
|
54
|
+
def get_raw_content(self):
|
55
|
+
"""Get raw content from completion responses."""
|
56
|
+
if self.completion_responses:
|
57
|
+
last_response = self.completion_responses[-1]
|
58
|
+
if hasattr(last_response, 'choices') and last_response.choices:
|
59
|
+
choice = last_response.choices[0]
|
60
|
+
if hasattr(choice, 'message'):
|
61
|
+
return getattr(choice.message, 'content', None)
|
62
|
+
return None
|
63
|
+
|
64
|
+
def get_raw_completion(self):
|
65
|
+
"""Get the raw completion object."""
|
66
|
+
return self.completion_responses[-1] if self.completion_responses else None
|
67
|
+
|
68
|
+
def get_tool_calls(self):
|
69
|
+
"""Get tool calls from completion responses."""
|
70
|
+
if self.completion_responses:
|
71
|
+
last_response = self.completion_responses[-1]
|
72
|
+
if hasattr(last_response, 'choices') and last_response.choices:
|
73
|
+
choice = last_response.choices[0]
|
74
|
+
if hasattr(choice, 'message'):
|
75
|
+
return getattr(choice.message, 'tool_calls', None)
|
76
|
+
return None
|
77
|
+
|
78
|
+
|
79
|
+
class StreamingChunkProcessor:
|
80
|
+
"""Process streaming chunks to extract only new content."""
|
81
|
+
|
82
|
+
def __init__(self, output_type: Type[T], response_field_name: Optional[str] = None):
|
83
|
+
self.output_type = output_type
|
84
|
+
self.response_field_name = response_field_name
|
85
|
+
self.previous_chunk = None
|
86
|
+
self.previous_content = ""
|
87
|
+
|
88
|
+
def process_chunk(self, chunk: Any) -> Optional[str]:
|
89
|
+
"""Process a chunk and return only the new content."""
|
90
|
+
# Handle list types (e.g., list[str])
|
91
|
+
from ...typing import get_origin
|
92
|
+
origin = get_origin(self.output_type)
|
93
|
+
|
94
|
+
if origin is list:
|
95
|
+
return self._process_list_chunk(chunk)
|
96
|
+
elif self.response_field_name and hasattr(chunk, self.response_field_name):
|
97
|
+
return self._process_field_chunk(chunk)
|
98
|
+
else:
|
99
|
+
return self._process_simple_chunk(chunk)
|
100
|
+
|
101
|
+
def _process_list_chunk(self, chunk: Any) -> Optional[str]:
|
102
|
+
"""Process chunks for list types."""
|
103
|
+
current_list = []
|
104
|
+
|
105
|
+
if isinstance(chunk, list):
|
106
|
+
current_list = chunk
|
107
|
+
elif hasattr(chunk, 'value') and isinstance(chunk.value, list):
|
108
|
+
current_list = chunk.value
|
109
|
+
elif hasattr(chunk, self.response_field_name) and isinstance(getattr(chunk, self.response_field_name), list):
|
110
|
+
current_list = getattr(chunk, self.response_field_name)
|
111
|
+
|
112
|
+
if not current_list:
|
113
|
+
return None
|
114
|
+
|
115
|
+
# For list types, return only new items
|
116
|
+
if self.previous_chunk is None:
|
117
|
+
# First chunk - return the last item
|
118
|
+
if current_list:
|
119
|
+
self.previous_chunk = current_list
|
120
|
+
return str(current_list[-1])
|
121
|
+
else:
|
122
|
+
# Subsequent chunks - return only new items
|
123
|
+
prev_list = self.previous_chunk if isinstance(self.previous_chunk, list) else []
|
124
|
+
prev_len = len(prev_list)
|
125
|
+
|
126
|
+
if len(current_list) > prev_len:
|
127
|
+
new_items = current_list[prev_len:]
|
128
|
+
self.previous_chunk = current_list
|
129
|
+
if new_items:
|
130
|
+
return str(new_items[-1])
|
131
|
+
|
132
|
+
return None
|
133
|
+
|
134
|
+
def _process_field_chunk(self, chunk: Any) -> Optional[str]:
|
135
|
+
"""Process chunks with a specific response field."""
|
136
|
+
if not hasattr(chunk, self.response_field_name):
|
137
|
+
return None
|
138
|
+
|
139
|
+
field_value = getattr(chunk, self.response_field_name)
|
140
|
+
|
141
|
+
if isinstance(field_value, str):
|
142
|
+
# For string fields, return only new content
|
143
|
+
if self.previous_chunk is None:
|
144
|
+
self.previous_chunk = chunk
|
145
|
+
self.previous_content = field_value
|
146
|
+
return field_value
|
147
|
+
else:
|
148
|
+
prev_value = self.previous_content
|
149
|
+
current_value = field_value
|
150
|
+
|
151
|
+
if current_value.startswith(prev_value):
|
152
|
+
new_content = current_value[len(prev_value):]
|
153
|
+
self.previous_chunk = chunk
|
154
|
+
self.previous_content = current_value
|
155
|
+
return new_content if new_content else None
|
156
|
+
else:
|
157
|
+
self.previous_chunk = chunk
|
158
|
+
self.previous_content = current_value
|
159
|
+
return current_value
|
160
|
+
elif isinstance(field_value, list):
|
161
|
+
# For list fields in response field
|
162
|
+
if self.previous_chunk is None:
|
163
|
+
self.previous_chunk = chunk
|
164
|
+
if field_value:
|
165
|
+
return str(field_value[-1])
|
166
|
+
else:
|
167
|
+
prev_field = getattr(self.previous_chunk, self.response_field_name, [])
|
168
|
+
prev_len = len(prev_field) if isinstance(prev_field, list) else 0
|
169
|
+
|
170
|
+
if len(field_value) > prev_len:
|
171
|
+
new_items = field_value[prev_len:]
|
172
|
+
self.previous_chunk = chunk
|
173
|
+
if new_items:
|
174
|
+
return str(new_items[-1])
|
175
|
+
|
176
|
+
return None
|
177
|
+
|
178
|
+
def _process_simple_chunk(self, chunk: Any) -> Optional[str]:
|
179
|
+
"""Process simple chunks without response fields."""
|
180
|
+
if hasattr(chunk, 'value'):
|
181
|
+
value = chunk.value
|
182
|
+
if isinstance(value, str):
|
183
|
+
if self.previous_content:
|
184
|
+
if value.startswith(self.previous_content):
|
185
|
+
new_content = value[len(self.previous_content):]
|
186
|
+
self.previous_content = value
|
187
|
+
return new_content if new_content else None
|
188
|
+
else:
|
189
|
+
self.previous_content = value
|
190
|
+
return value
|
191
|
+
else:
|
192
|
+
self.previous_content = value
|
193
|
+
return value
|
194
|
+
elif isinstance(chunk, str):
|
195
|
+
if self.previous_content:
|
196
|
+
if chunk.startswith(self.previous_content):
|
197
|
+
new_content = chunk[len(self.previous_content):]
|
198
|
+
self.previous_content = chunk
|
199
|
+
return new_content if new_content else None
|
200
|
+
else:
|
201
|
+
self.previous_content = chunk
|
202
|
+
return chunk
|
203
|
+
else:
|
204
|
+
self.previous_content = chunk
|
205
|
+
return chunk
|
206
|
+
elif self.output_type in (int, float, bool):
|
207
|
+
# For primitive types, return string representation
|
208
|
+
return str(chunk)
|
209
|
+
|
210
|
+
return None
|
211
|
+
|
212
|
+
|
213
|
+
class InstructorStreamWrapper:
|
214
|
+
"""Wrapper for instructor streams that collects raw responses via hooks."""
|
215
|
+
|
216
|
+
def __init__(self, stream: Iterator[Any], collector: InstructorStreamCollector):
|
217
|
+
self._stream = stream
|
218
|
+
self.collector = collector
|
219
|
+
|
220
|
+
def __iter__(self):
|
221
|
+
return self
|
222
|
+
|
223
|
+
def __next__(self):
|
224
|
+
chunk = next(self._stream)
|
225
|
+
self.collector.add_chunk(chunk)
|
226
|
+
return chunk
|
227
|
+
|
228
|
+
def get_raw_content(self):
|
229
|
+
return self.collector.get_raw_content()
|
230
|
+
|
231
|
+
def get_raw_completion(self):
|
232
|
+
return self.collector.get_raw_completion()
|
233
|
+
|
234
|
+
def get_tool_calls(self):
|
235
|
+
return self.collector.get_tool_calls()
|
236
|
+
|
237
|
+
|
238
|
+
class AsyncInstructorStreamWrapper:
|
239
|
+
"""Async wrapper for instructor streams that collects raw responses via hooks."""
|
240
|
+
|
241
|
+
def __init__(self, stream: AsyncIterator[Any], collector: InstructorStreamCollector):
|
242
|
+
self._stream = stream
|
243
|
+
self.collector = collector
|
244
|
+
|
245
|
+
def __aiter__(self):
|
246
|
+
return self
|
247
|
+
|
248
|
+
async def __anext__(self):
|
249
|
+
chunk = await self._stream.__anext__()
|
250
|
+
self.collector.add_chunk(chunk)
|
251
|
+
return chunk
|
252
|
+
|
253
|
+
def get_raw_content(self):
|
254
|
+
return self.collector.get_raw_content()
|
255
|
+
|
256
|
+
def get_raw_completion(self):
|
257
|
+
return self.collector.get_raw_completion()
|
258
|
+
|
259
|
+
def get_tool_calls(self):
|
260
|
+
return self.collector.get_tool_calls()
|
261
|
+
|
262
|
+
|
263
|
+
class Stream(Generic[T]):
|
264
|
+
"""Synchronous stream wrapper for language model streaming.
|
265
|
+
|
266
|
+
This class provides a unified interface for streaming responses
|
267
|
+
from both LiteLLM and Instructor, handling the different chunk
|
268
|
+
formats and providing consistent access patterns.
|
269
|
+
"""
|
270
|
+
|
271
|
+
def __init__(
|
272
|
+
self,
|
273
|
+
stream: Iterator[Any],
|
274
|
+
output_type: Type[T] = str,
|
275
|
+
model: Optional[str] = None,
|
276
|
+
response_field_name: Optional[str] = None,
|
277
|
+
):
|
278
|
+
"""Initialize the stream.
|
279
|
+
|
280
|
+
Args:
|
281
|
+
stream: The underlying stream iterator
|
282
|
+
output_type: The expected output type
|
283
|
+
model: The model name
|
284
|
+
response_field_name: The field name for structured outputs
|
285
|
+
"""
|
286
|
+
self._stream = stream
|
287
|
+
self._output_type = output_type
|
288
|
+
self._model = model
|
289
|
+
self._response_field_name = response_field_name
|
290
|
+
self._chunks: List[LanguageModelResponseChunk[T]] = []
|
291
|
+
self._final_output: Optional[T] = None
|
292
|
+
self._is_instructor = output_type != str
|
293
|
+
self._is_consumed = False
|
294
|
+
self._previous_chunk_output = None
|
295
|
+
|
296
|
+
def __iter__(self) -> Iterator[LanguageModelResponseChunk[T]]:
|
297
|
+
"""Iterate over response chunks."""
|
298
|
+
for chunk in self._stream:
|
299
|
+
response_chunk = self._process_chunk(chunk)
|
300
|
+
if response_chunk:
|
301
|
+
self._chunks.append(response_chunk)
|
302
|
+
yield response_chunk
|
303
|
+
self._is_consumed = True
|
304
|
+
|
305
|
+
def _process_chunk(self, chunk: Any) -> Optional[LanguageModelResponseChunk[T]]:
|
306
|
+
"""Process a raw chunk into a LanguageModelResponseChunk."""
|
307
|
+
if self._is_instructor:
|
308
|
+
# Handle instructor streaming (Partial/Iterable)
|
309
|
+
output = chunk
|
310
|
+
|
311
|
+
# Use the chunk processor to get only new content
|
312
|
+
if not hasattr(self, '_chunk_processor'):
|
313
|
+
self._chunk_processor = StreamingChunkProcessor(self._output_type, self._response_field_name)
|
314
|
+
|
315
|
+
content = self._chunk_processor.process_chunk(chunk)
|
316
|
+
|
317
|
+
# Extract the proper output value
|
318
|
+
if self._response_field_name and hasattr(chunk, self._response_field_name):
|
319
|
+
output_value = getattr(chunk, self._response_field_name)
|
320
|
+
else:
|
321
|
+
output_value = chunk
|
322
|
+
|
323
|
+
return LanguageModelResponseChunk(
|
324
|
+
output=output_value,
|
325
|
+
content=content,
|
326
|
+
model=self._model,
|
327
|
+
chunk=chunk,
|
328
|
+
is_final=hasattr(chunk, "_is_final") and chunk._is_final,
|
329
|
+
)
|
330
|
+
else:
|
331
|
+
# Handle LiteLLM streaming (ChatCompletionChunk)
|
332
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
333
|
+
choice = chunk.choices[0]
|
334
|
+
content = None
|
335
|
+
if hasattr(choice, "delta") and choice.delta:
|
336
|
+
content = getattr(choice.delta, "content", None)
|
337
|
+
|
338
|
+
return LanguageModelResponseChunk(
|
339
|
+
content=content,
|
340
|
+
output=content,
|
341
|
+
model=getattr(chunk, "model", self._model),
|
342
|
+
finish_reason=getattr(choice, "finish_reason", None),
|
343
|
+
chunk=chunk,
|
344
|
+
is_final=getattr(choice, "finish_reason", None) is not None,
|
345
|
+
)
|
346
|
+
return None
|
347
|
+
|
348
|
+
def collect(self) -> LanguageModelResponse[T]:
|
349
|
+
"""Collect all chunks and return a complete LanguageModelResponse object."""
|
350
|
+
if not self._chunks:
|
351
|
+
# Consume the stream if not already consumed
|
352
|
+
list(self)
|
353
|
+
|
354
|
+
if self._is_instructor and self._chunks:
|
355
|
+
# For instructor, the final chunk contains the complete object
|
356
|
+
final_chunk = self._chunks[-1]
|
357
|
+
|
358
|
+
# Check if stream collector is available for raw content
|
359
|
+
raw_content = None
|
360
|
+
raw_completion = None
|
361
|
+
tool_calls = None
|
362
|
+
|
363
|
+
if hasattr(self._stream, "collector"):
|
364
|
+
collector = self._stream.collector
|
365
|
+
raw_content = collector.get_raw_content()
|
366
|
+
raw_completion = collector.get_raw_completion()
|
367
|
+
tool_calls = collector.get_tool_calls()
|
368
|
+
elif hasattr(self._stream, "get_raw_content"):
|
369
|
+
raw_content = self._stream.get_raw_content()
|
370
|
+
raw_completion = self._stream.get_raw_completion() if hasattr(self._stream, "get_raw_completion") else None
|
371
|
+
tool_calls = self._stream.get_tool_calls() if hasattr(self._stream, "get_tool_calls") else None
|
372
|
+
|
373
|
+
return LanguageModelResponse(
|
374
|
+
output=final_chunk.output,
|
375
|
+
model=final_chunk.model or self._model or "unknown",
|
376
|
+
completion=raw_completion,
|
377
|
+
content=raw_content,
|
378
|
+
tool_calls=tool_calls,
|
379
|
+
)
|
380
|
+
else:
|
381
|
+
# For LiteLLM, combine content from all chunks
|
382
|
+
content_parts = [chunk.content for chunk in self._chunks if chunk.content]
|
383
|
+
combined_content = "".join(content_parts)
|
384
|
+
|
385
|
+
# Create a mock completion for consistency
|
386
|
+
mock_completion = None
|
387
|
+
if self._chunks:
|
388
|
+
mock_completion = self._chunks[-1].chunk
|
389
|
+
|
390
|
+
return LanguageModelResponse(
|
391
|
+
output=combined_content,
|
392
|
+
model=self._model or "unknown",
|
393
|
+
completion=mock_completion,
|
394
|
+
content=combined_content,
|
395
|
+
)
|
396
|
+
|
397
|
+
def to_response(self) -> LanguageModelResponse[T]:
|
398
|
+
"""Convert the stream to a LanguageModelResponse object.
|
399
|
+
|
400
|
+
This method can only be called after the stream has been fully consumed.
|
401
|
+
It's an alias for collect() with a check for consumption state.
|
402
|
+
|
403
|
+
Returns:
|
404
|
+
LanguageModelResponse[T]: The complete response object
|
405
|
+
|
406
|
+
Raises:
|
407
|
+
RuntimeError: If the stream has not been fully consumed
|
408
|
+
"""
|
409
|
+
if not self._is_consumed and not self._chunks:
|
410
|
+
raise RuntimeError(
|
411
|
+
"Stream must be fully consumed before converting to response. "
|
412
|
+
"Use collect() or iterate through the stream first."
|
413
|
+
)
|
414
|
+
|
415
|
+
return self.collect()
|
416
|
+
|
417
|
+
def to_message(self) -> Any:
|
418
|
+
"""Convert the stream to a ChatCompletionMessageParam.
|
419
|
+
|
420
|
+
This method can only be called after the stream has been fully consumed.
|
421
|
+
It converts the final response to a message format.
|
422
|
+
|
423
|
+
Returns:
|
424
|
+
ChatCompletionMessageParam: The response as a chat message
|
425
|
+
|
426
|
+
Raises:
|
427
|
+
RuntimeError: If the stream has not been fully consumed
|
428
|
+
"""
|
429
|
+
if not self._is_consumed and not self._chunks:
|
430
|
+
raise RuntimeError(
|
431
|
+
"Stream must be fully consumed before converting to message. "
|
432
|
+
"Use collect() or iterate through the stream first."
|
433
|
+
)
|
434
|
+
|
435
|
+
response = self.collect()
|
436
|
+
return response.to_message()
|
437
|
+
|
438
|
+
|
439
|
+
class AsyncStream(Generic[T]):
|
440
|
+
"""Asynchronous stream wrapper for language model streaming.
|
441
|
+
|
442
|
+
This class provides a unified interface for async streaming responses
|
443
|
+
from both LiteLLM and Instructor, handling the different chunk
|
444
|
+
formats and providing consistent access patterns.
|
445
|
+
"""
|
446
|
+
|
447
|
+
def __init__(
|
448
|
+
self,
|
449
|
+
stream: AsyncIterator[Any],
|
450
|
+
output_type: Type[T] = str,
|
451
|
+
model: Optional[str] = None,
|
452
|
+
response_field_name: Optional[str] = None,
|
453
|
+
):
|
454
|
+
"""Initialize the async stream.
|
455
|
+
|
456
|
+
Args:
|
457
|
+
stream: The underlying async stream iterator
|
458
|
+
output_type: The expected output type
|
459
|
+
model: The model name
|
460
|
+
response_field_name: The field name for structured outputs
|
461
|
+
"""
|
462
|
+
self._stream = stream
|
463
|
+
self._output_type = output_type
|
464
|
+
self._model = model
|
465
|
+
self._response_field_name = response_field_name
|
466
|
+
self._chunks: List[LanguageModelResponseChunk[T]] = []
|
467
|
+
self._final_output: Optional[T] = None
|
468
|
+
self._is_instructor = output_type != str
|
469
|
+
self._is_consumed = False
|
470
|
+
self._previous_chunk_output = None
|
471
|
+
|
472
|
+
def __aiter__(self) -> AsyncIterator[LanguageModelResponseChunk[T]]:
|
473
|
+
"""Async iterate over response chunks."""
|
474
|
+
return self
|
475
|
+
|
476
|
+
async def __anext__(self) -> LanguageModelResponseChunk[T]:
|
477
|
+
"""Get the next response chunk."""
|
478
|
+
try:
|
479
|
+
chunk = await self._stream.__anext__()
|
480
|
+
response_chunk = self._process_chunk(chunk)
|
481
|
+
if response_chunk:
|
482
|
+
self._chunks.append(response_chunk)
|
483
|
+
return response_chunk
|
484
|
+
else:
|
485
|
+
return await self.__anext__() # Skip empty chunks
|
486
|
+
except StopAsyncIteration:
|
487
|
+
self._is_consumed = True
|
488
|
+
raise StopAsyncIteration
|
489
|
+
|
490
|
+
def _process_chunk(self, chunk: Any) -> Optional[LanguageModelResponseChunk[T]]:
|
491
|
+
"""Process a raw chunk into a LanguageModelResponseChunk."""
|
492
|
+
if self._is_instructor:
|
493
|
+
# Handle instructor streaming (Partial/Iterable)
|
494
|
+
output = chunk
|
495
|
+
|
496
|
+
# Use the chunk processor to get only new content
|
497
|
+
if not hasattr(self, '_chunk_processor'):
|
498
|
+
self._chunk_processor = StreamingChunkProcessor(self._output_type, self._response_field_name)
|
499
|
+
|
500
|
+
content = self._chunk_processor.process_chunk(chunk)
|
501
|
+
|
502
|
+
# Extract the proper output value
|
503
|
+
if self._response_field_name and hasattr(chunk, self._response_field_name):
|
504
|
+
output_value = getattr(chunk, self._response_field_name)
|
505
|
+
else:
|
506
|
+
output_value = chunk
|
507
|
+
|
508
|
+
return LanguageModelResponseChunk(
|
509
|
+
output=output_value,
|
510
|
+
content=content,
|
511
|
+
model=self._model,
|
512
|
+
chunk=chunk,
|
513
|
+
is_final=hasattr(chunk, "_is_final") and chunk._is_final,
|
514
|
+
)
|
515
|
+
else:
|
516
|
+
# Handle LiteLLM streaming (ChatCompletionChunk)
|
517
|
+
if hasattr(chunk, "choices") and chunk.choices:
|
518
|
+
choice = chunk.choices[0]
|
519
|
+
content = None
|
520
|
+
if hasattr(choice, "delta") and choice.delta:
|
521
|
+
content = getattr(choice.delta, "content", None)
|
522
|
+
|
523
|
+
return LanguageModelResponseChunk(
|
524
|
+
content=content,
|
525
|
+
output=content,
|
526
|
+
model=getattr(chunk, "model", self._model),
|
527
|
+
finish_reason=getattr(choice, "finish_reason", None),
|
528
|
+
chunk=chunk,
|
529
|
+
is_final=getattr(choice, "finish_reason", None) is not None,
|
530
|
+
)
|
531
|
+
return None
|
532
|
+
|
533
|
+
async def collect(self) -> LanguageModelResponse[T]:
|
534
|
+
"""Collect all chunks and return a complete LanguageModelResponse object."""
|
535
|
+
if not self._chunks:
|
536
|
+
# Consume the stream if not already consumed
|
537
|
+
async for _ in self:
|
538
|
+
pass
|
539
|
+
|
540
|
+
if self._is_instructor and self._chunks:
|
541
|
+
# For instructor, the final chunk contains the complete object
|
542
|
+
final_chunk = self._chunks[-1]
|
543
|
+
|
544
|
+
# Check if stream collector is available for raw content
|
545
|
+
raw_content = None
|
546
|
+
raw_completion = None
|
547
|
+
tool_calls = None
|
548
|
+
|
549
|
+
if hasattr(self._stream, "collector"):
|
550
|
+
collector = self._stream.collector
|
551
|
+
raw_content = collector.get_raw_content()
|
552
|
+
raw_completion = collector.get_raw_completion()
|
553
|
+
tool_calls = collector.get_tool_calls()
|
554
|
+
elif hasattr(self._stream, "get_raw_content"):
|
555
|
+
raw_content = self._stream.get_raw_content()
|
556
|
+
raw_completion = self._stream.get_raw_completion() if hasattr(self._stream, "get_raw_completion") else None
|
557
|
+
tool_calls = self._stream.get_tool_calls() if hasattr(self._stream, "get_tool_calls") else None
|
558
|
+
|
559
|
+
return LanguageModelResponse(
|
560
|
+
output=final_chunk.output,
|
561
|
+
model=final_chunk.model or self._model or "unknown",
|
562
|
+
completion=raw_completion,
|
563
|
+
content=raw_content,
|
564
|
+
tool_calls=tool_calls,
|
565
|
+
)
|
566
|
+
else:
|
567
|
+
# For LiteLLM, combine content from all chunks
|
568
|
+
content_parts = [chunk.content for chunk in self._chunks if chunk.content]
|
569
|
+
combined_content = "".join(content_parts)
|
570
|
+
|
571
|
+
# Create a mock completion for consistency
|
572
|
+
mock_completion = None
|
573
|
+
if self._chunks:
|
574
|
+
mock_completion = self._chunks[-1].chunk
|
575
|
+
|
576
|
+
return LanguageModelResponse(
|
577
|
+
output=combined_content,
|
578
|
+
model=self._model or "unknown",
|
579
|
+
completion=mock_completion,
|
580
|
+
content=combined_content,
|
581
|
+
)
|
582
|
+
|
583
|
+
async def to_response(self) -> LanguageModelResponse[T]:
|
584
|
+
"""Convert the stream to a LanguageModelResponse object.
|
585
|
+
|
586
|
+
This method can only be called after the stream has been fully consumed.
|
587
|
+
It's an alias for collect() with a check for consumption state.
|
588
|
+
|
589
|
+
Returns:
|
590
|
+
LanguageModelResponse[T]: The complete response object
|
591
|
+
|
592
|
+
Raises:
|
593
|
+
RuntimeError: If the stream has not been fully consumed
|
594
|
+
"""
|
595
|
+
if not self._is_consumed and not self._chunks:
|
596
|
+
raise RuntimeError(
|
597
|
+
"Stream must be fully consumed before converting to response. "
|
598
|
+
"Use collect() or iterate through the stream first."
|
599
|
+
)
|
600
|
+
|
601
|
+
return await self.collect()
|
602
|
+
|
603
|
+
async def to_message(self) -> Any:
|
604
|
+
"""Convert the stream to a ChatCompletionMessageParam.
|
605
|
+
|
606
|
+
This method can only be called after the stream has been fully consumed.
|
607
|
+
It converts the final response to a message format.
|
608
|
+
|
609
|
+
Returns:
|
610
|
+
ChatCompletionMessageParam: The response as a chat message
|
611
|
+
|
612
|
+
Raises:
|
613
|
+
RuntimeError: If the stream has not been fully consumed
|
614
|
+
"""
|
615
|
+
if not self._is_consumed and not self._chunks:
|
616
|
+
raise RuntimeError(
|
617
|
+
"Stream must be fully consumed before converting to message. "
|
618
|
+
"Use collect() or iterate through the stream first."
|
619
|
+
)
|
620
|
+
|
621
|
+
response = await self.collect()
|
622
|
+
return response.to_message()
|