hammad-python 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +7 -137
- hammad/_internal.py +1 -0
- hammad/cli/_runner.py +8 -8
- hammad/cli/plugins.py +55 -26
- hammad/cli/styles/utils.py +16 -8
- hammad/data/__init__.py +1 -5
- hammad/data/collections/__init__.py +2 -3
- hammad/data/collections/collection.py +41 -22
- hammad/data/collections/indexes/__init__.py +1 -1
- hammad/data/collections/indexes/qdrant/__init__.py +1 -1
- hammad/data/collections/indexes/qdrant/index.py +106 -118
- hammad/data/collections/indexes/qdrant/settings.py +14 -14
- hammad/data/collections/indexes/qdrant/utils.py +28 -38
- hammad/data/collections/indexes/tantivy/__init__.py +1 -1
- hammad/data/collections/indexes/tantivy/index.py +57 -59
- hammad/data/collections/indexes/tantivy/settings.py +8 -19
- hammad/data/collections/indexes/tantivy/utils.py +28 -52
- hammad/data/models/__init__.py +2 -7
- hammad/data/sql/__init__.py +1 -1
- hammad/data/sql/database.py +71 -73
- hammad/data/sql/types.py +37 -51
- hammad/formatting/__init__.py +2 -1
- hammad/formatting/json/converters.py +2 -2
- hammad/genai/__init__.py +96 -36
- hammad/genai/agents/__init__.py +47 -1
- hammad/genai/agents/agent.py +1022 -0
- hammad/genai/agents/run.py +615 -0
- hammad/genai/agents/types/__init__.py +29 -22
- hammad/genai/agents/types/agent_context.py +13 -0
- hammad/genai/agents/types/agent_event.py +128 -0
- hammad/genai/agents/types/agent_hooks.py +220 -0
- hammad/genai/agents/types/agent_messages.py +31 -0
- hammad/genai/agents/types/agent_response.py +90 -0
- hammad/genai/agents/types/agent_stream.py +242 -0
- hammad/genai/models/__init__.py +1 -0
- hammad/genai/models/embeddings/__init__.py +39 -0
- hammad/genai/{embedding_models/embedding_model.py → models/embeddings/model.py} +45 -41
- hammad/genai/{embedding_models → models/embeddings}/run.py +10 -8
- hammad/genai/models/embeddings/types/__init__.py +37 -0
- hammad/genai/{embedding_models → models/embeddings/types}/embedding_model_name.py +2 -4
- hammad/genai/{embedding_models → models/embeddings/types}/embedding_model_response.py +11 -4
- hammad/genai/{embedding_models/embedding_model_request.py → models/embeddings/types/embedding_model_run_params.py} +4 -3
- hammad/genai/models/embeddings/types/embedding_model_settings.py +47 -0
- hammad/genai/models/language/__init__.py +48 -0
- hammad/genai/{language_models/language_model.py → models/language/model.py} +481 -204
- hammad/genai/{language_models → models/language}/run.py +80 -57
- hammad/genai/models/language/types/__init__.py +40 -0
- hammad/genai/models/language/types/language_model_instructor_mode.py +47 -0
- hammad/genai/models/language/types/language_model_messages.py +28 -0
- hammad/genai/{language_models/_types.py → models/language/types/language_model_name.py} +3 -40
- hammad/genai/{language_models → models/language/types}/language_model_request.py +17 -25
- hammad/genai/{language_models → models/language/types}/language_model_response.py +61 -68
- hammad/genai/{language_models → models/language/types}/language_model_response_chunk.py +8 -5
- hammad/genai/models/language/types/language_model_settings.py +89 -0
- hammad/genai/{language_models/_streaming.py → models/language/types/language_model_stream.py} +221 -243
- hammad/genai/{language_models/_utils → models/language/utils}/__init__.py +8 -11
- hammad/genai/models/language/utils/requests.py +421 -0
- hammad/genai/{language_models/_utils/_structured_outputs.py → models/language/utils/structured_outputs.py} +31 -20
- hammad/genai/models/model_provider.py +4 -0
- hammad/genai/{multimodal_models.py → models/multimodal.py} +4 -5
- hammad/genai/models/reranking.py +26 -0
- hammad/genai/types/__init__.py +1 -0
- hammad/genai/types/base.py +215 -0
- hammad/genai/{agents/types → types}/history.py +101 -88
- hammad/genai/{agents/types/tool.py → types/tools.py} +156 -141
- hammad/logging/logger.py +1 -1
- hammad/mcp/client/__init__.py +2 -3
- hammad/mcp/client/client.py +10 -10
- hammad/mcp/servers/__init__.py +2 -1
- hammad/service/decorators.py +1 -3
- hammad/web/models.py +1 -3
- hammad/web/search/client.py +10 -22
- {hammad_python-0.0.19.dist-info → hammad_python-0.0.20.dist-info}/METADATA +10 -2
- hammad_python-0.0.20.dist-info/RECORD +127 -0
- hammad/genai/embedding_models/__init__.py +0 -41
- hammad/genai/language_models/__init__.py +0 -35
- hammad/genai/language_models/_utils/_completions.py +0 -131
- hammad/genai/language_models/_utils/_messages.py +0 -89
- hammad/genai/language_models/_utils/_requests.py +0 -202
- hammad/genai/rerank_models.py +0 -26
- hammad_python-0.0.19.dist-info/RECORD +0 -111
- {hammad_python-0.0.19.dist-info → hammad_python-0.0.20.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.19.dist-info → hammad_python-0.0.20.dist-info}/licenses/LICENSE +0 -0
hammad/genai/{language_models/_streaming.py → models/language/types/language_model_stream.py}
RENAMED
@@ -1,5 +1,6 @@
|
|
1
|
-
"""hammad.genai.
|
1
|
+
"""hammad.genai.models.language.types.language_model_stream"""
|
2
2
|
|
3
|
+
import asyncio
|
3
4
|
from typing import (
|
4
5
|
List,
|
5
6
|
Type,
|
@@ -11,16 +12,17 @@ from typing import (
|
|
11
12
|
Any,
|
12
13
|
Callable,
|
13
14
|
Dict,
|
15
|
+
Union,
|
14
16
|
)
|
15
17
|
|
16
|
-
from
|
18
|
+
from .....typing import get_origin, get_args
|
19
|
+
from ....types.base import BaseGenAIModelStream
|
17
20
|
|
18
21
|
from .language_model_response import LanguageModelResponse
|
19
22
|
from .language_model_response_chunk import LanguageModelResponseChunk
|
20
23
|
|
21
24
|
__all__ = [
|
22
|
-
"
|
23
|
-
"AsyncStream",
|
25
|
+
"LanguageModelStream",
|
24
26
|
"InstructorStreamCollector",
|
25
27
|
"InstructorStreamWrapper",
|
26
28
|
"AsyncInstructorStreamWrapper",
|
@@ -32,86 +34,89 @@ T = TypeVar("T")
|
|
32
34
|
|
33
35
|
class InstructorStreamCollector:
|
34
36
|
"""Collector for instructor streaming responses using hooks."""
|
35
|
-
|
37
|
+
|
36
38
|
def __init__(self):
|
37
39
|
self.raw_chunks = []
|
38
40
|
self.completion_responses = []
|
39
41
|
self.last_response = None
|
40
42
|
self.error = None
|
41
|
-
|
43
|
+
|
42
44
|
def on_completion_response(self, response):
|
43
45
|
"""Hook handler for completion responses."""
|
44
46
|
self.completion_responses.append(response)
|
45
|
-
|
47
|
+
|
46
48
|
def on_completion_error(self, error):
|
47
49
|
"""Hook handler for completion errors."""
|
48
50
|
self.error = error
|
49
|
-
|
51
|
+
|
50
52
|
def add_chunk(self, chunk):
|
51
53
|
"""Add a raw chunk to the collector."""
|
52
54
|
self.raw_chunks.append(chunk)
|
53
|
-
|
55
|
+
|
54
56
|
def get_raw_content(self):
|
55
57
|
"""Get raw content from completion responses."""
|
56
58
|
if self.completion_responses:
|
57
59
|
last_response = self.completion_responses[-1]
|
58
|
-
if hasattr(last_response,
|
60
|
+
if hasattr(last_response, "choices") and last_response.choices:
|
59
61
|
choice = last_response.choices[0]
|
60
|
-
if hasattr(choice,
|
61
|
-
return getattr(choice.message,
|
62
|
+
if hasattr(choice, "message"):
|
63
|
+
return getattr(choice.message, "content", None)
|
62
64
|
return None
|
63
|
-
|
65
|
+
|
64
66
|
def get_raw_completion(self):
|
65
67
|
"""Get the raw completion object."""
|
66
68
|
return self.completion_responses[-1] if self.completion_responses else None
|
67
|
-
|
69
|
+
|
68
70
|
def get_tool_calls(self):
|
69
71
|
"""Get tool calls from completion responses."""
|
70
72
|
if self.completion_responses:
|
71
73
|
last_response = self.completion_responses[-1]
|
72
|
-
if hasattr(last_response,
|
74
|
+
if hasattr(last_response, "choices") and last_response.choices:
|
73
75
|
choice = last_response.choices[0]
|
74
|
-
if hasattr(choice,
|
75
|
-
return getattr(choice.message,
|
76
|
+
if hasattr(choice, "message"):
|
77
|
+
return getattr(choice.message, "tool_calls", None)
|
76
78
|
return None
|
77
79
|
|
78
80
|
|
79
81
|
class StreamingChunkProcessor:
|
80
82
|
"""Process streaming chunks to extract only new content."""
|
81
|
-
|
83
|
+
|
82
84
|
def __init__(self, output_type: Type[T], response_field_name: Optional[str] = None):
|
83
85
|
self.output_type = output_type
|
84
86
|
self.response_field_name = response_field_name
|
85
87
|
self.previous_chunk = None
|
86
88
|
self.previous_content = ""
|
87
|
-
|
89
|
+
|
88
90
|
def process_chunk(self, chunk: Any) -> Optional[str]:
|
89
91
|
"""Process a chunk and return only the new content."""
|
90
92
|
# Handle list types (e.g., list[str])
|
91
|
-
from
|
93
|
+
from .....typing import get_origin
|
94
|
+
|
92
95
|
origin = get_origin(self.output_type)
|
93
|
-
|
96
|
+
|
94
97
|
if origin is list:
|
95
98
|
return self._process_list_chunk(chunk)
|
96
99
|
elif self.response_field_name and hasattr(chunk, self.response_field_name):
|
97
100
|
return self._process_field_chunk(chunk)
|
98
101
|
else:
|
99
102
|
return self._process_simple_chunk(chunk)
|
100
|
-
|
103
|
+
|
101
104
|
def _process_list_chunk(self, chunk: Any) -> Optional[str]:
|
102
105
|
"""Process chunks for list types."""
|
103
106
|
current_list = []
|
104
|
-
|
107
|
+
|
105
108
|
if isinstance(chunk, list):
|
106
109
|
current_list = chunk
|
107
|
-
elif hasattr(chunk,
|
110
|
+
elif hasattr(chunk, "value") and isinstance(chunk.value, list):
|
108
111
|
current_list = chunk.value
|
109
|
-
elif hasattr(chunk, self.response_field_name) and isinstance(
|
112
|
+
elif hasattr(chunk, self.response_field_name) and isinstance(
|
113
|
+
getattr(chunk, self.response_field_name), list
|
114
|
+
):
|
110
115
|
current_list = getattr(chunk, self.response_field_name)
|
111
|
-
|
116
|
+
|
112
117
|
if not current_list:
|
113
118
|
return None
|
114
|
-
|
119
|
+
|
115
120
|
# For list types, return only new items
|
116
121
|
if self.previous_chunk is None:
|
117
122
|
# First chunk - return the last item
|
@@ -120,24 +125,26 @@ class StreamingChunkProcessor:
|
|
120
125
|
return str(current_list[-1])
|
121
126
|
else:
|
122
127
|
# Subsequent chunks - return only new items
|
123
|
-
prev_list =
|
128
|
+
prev_list = (
|
129
|
+
self.previous_chunk if isinstance(self.previous_chunk, list) else []
|
130
|
+
)
|
124
131
|
prev_len = len(prev_list)
|
125
|
-
|
132
|
+
|
126
133
|
if len(current_list) > prev_len:
|
127
134
|
new_items = current_list[prev_len:]
|
128
135
|
self.previous_chunk = current_list
|
129
136
|
if new_items:
|
130
137
|
return str(new_items[-1])
|
131
|
-
|
138
|
+
|
132
139
|
return None
|
133
|
-
|
140
|
+
|
134
141
|
def _process_field_chunk(self, chunk: Any) -> Optional[str]:
|
135
142
|
"""Process chunks with a specific response field."""
|
136
143
|
if not hasattr(chunk, self.response_field_name):
|
137
144
|
return None
|
138
|
-
|
145
|
+
|
139
146
|
field_value = getattr(chunk, self.response_field_name)
|
140
|
-
|
147
|
+
|
141
148
|
if isinstance(field_value, str):
|
142
149
|
# For string fields, return only new content
|
143
150
|
if self.previous_chunk is None:
|
@@ -147,9 +154,9 @@ class StreamingChunkProcessor:
|
|
147
154
|
else:
|
148
155
|
prev_value = self.previous_content
|
149
156
|
current_value = field_value
|
150
|
-
|
157
|
+
|
151
158
|
if current_value.startswith(prev_value):
|
152
|
-
new_content = current_value[len(prev_value):]
|
159
|
+
new_content = current_value[len(prev_value) :]
|
153
160
|
self.previous_chunk = chunk
|
154
161
|
self.previous_content = current_value
|
155
162
|
return new_content if new_content else None
|
@@ -166,23 +173,23 @@ class StreamingChunkProcessor:
|
|
166
173
|
else:
|
167
174
|
prev_field = getattr(self.previous_chunk, self.response_field_name, [])
|
168
175
|
prev_len = len(prev_field) if isinstance(prev_field, list) else 0
|
169
|
-
|
176
|
+
|
170
177
|
if len(field_value) > prev_len:
|
171
178
|
new_items = field_value[prev_len:]
|
172
179
|
self.previous_chunk = chunk
|
173
180
|
if new_items:
|
174
181
|
return str(new_items[-1])
|
175
|
-
|
182
|
+
|
176
183
|
return None
|
177
|
-
|
184
|
+
|
178
185
|
def _process_simple_chunk(self, chunk: Any) -> Optional[str]:
|
179
186
|
"""Process simple chunks without response fields."""
|
180
|
-
if hasattr(chunk,
|
187
|
+
if hasattr(chunk, "value"):
|
181
188
|
value = chunk.value
|
182
189
|
if isinstance(value, str):
|
183
190
|
if self.previous_content:
|
184
191
|
if value.startswith(self.previous_content):
|
185
|
-
new_content = value[len(self.previous_content):]
|
192
|
+
new_content = value[len(self.previous_content) :]
|
186
193
|
self.previous_content = value
|
187
194
|
return new_content if new_content else None
|
188
195
|
else:
|
@@ -194,7 +201,7 @@ class StreamingChunkProcessor:
|
|
194
201
|
elif isinstance(chunk, str):
|
195
202
|
if self.previous_content:
|
196
203
|
if chunk.startswith(self.previous_content):
|
197
|
-
new_content = chunk[len(self.previous_content):]
|
204
|
+
new_content = chunk[len(self.previous_content) :]
|
198
205
|
self.previous_content = chunk
|
199
206
|
return new_content if new_content else None
|
200
207
|
else:
|
@@ -206,83 +213,95 @@ class StreamingChunkProcessor:
|
|
206
213
|
elif self.output_type in (int, float, bool):
|
207
214
|
# For primitive types, return string representation
|
208
215
|
return str(chunk)
|
209
|
-
|
216
|
+
|
210
217
|
return None
|
211
218
|
|
212
219
|
|
213
220
|
class InstructorStreamWrapper:
|
214
221
|
"""Wrapper for instructor streams that collects raw responses via hooks."""
|
215
|
-
|
222
|
+
|
216
223
|
def __init__(self, stream: Iterator[Any], collector: InstructorStreamCollector):
|
217
224
|
self._stream = stream
|
218
225
|
self.collector = collector
|
219
|
-
|
226
|
+
|
220
227
|
def __iter__(self):
|
221
228
|
return self
|
222
|
-
|
229
|
+
|
223
230
|
def __next__(self):
|
224
231
|
chunk = next(self._stream)
|
225
232
|
self.collector.add_chunk(chunk)
|
226
233
|
return chunk
|
227
|
-
|
234
|
+
|
228
235
|
def get_raw_content(self):
|
229
236
|
return self.collector.get_raw_content()
|
230
|
-
|
237
|
+
|
231
238
|
def get_raw_completion(self):
|
232
239
|
return self.collector.get_raw_completion()
|
233
|
-
|
240
|
+
|
234
241
|
def get_tool_calls(self):
|
235
242
|
return self.collector.get_tool_calls()
|
236
243
|
|
237
244
|
|
238
245
|
class AsyncInstructorStreamWrapper:
|
239
246
|
"""Async wrapper for instructor streams that collects raw responses via hooks."""
|
240
|
-
|
241
|
-
def __init__(
|
247
|
+
|
248
|
+
def __init__(
|
249
|
+
self, stream: AsyncIterator[Any], collector: InstructorStreamCollector
|
250
|
+
):
|
242
251
|
self._stream = stream
|
243
252
|
self.collector = collector
|
244
|
-
|
253
|
+
|
245
254
|
def __aiter__(self):
|
246
255
|
return self
|
247
|
-
|
256
|
+
|
248
257
|
async def __anext__(self):
|
249
258
|
chunk = await self._stream.__anext__()
|
250
259
|
self.collector.add_chunk(chunk)
|
251
260
|
return chunk
|
252
|
-
|
261
|
+
|
253
262
|
def get_raw_content(self):
|
254
263
|
return self.collector.get_raw_content()
|
255
|
-
|
264
|
+
|
256
265
|
def get_raw_completion(self):
|
257
266
|
return self.collector.get_raw_completion()
|
258
|
-
|
267
|
+
|
259
268
|
def get_tool_calls(self):
|
260
269
|
return self.collector.get_tool_calls()
|
261
270
|
|
262
271
|
|
263
|
-
class
|
264
|
-
|
272
|
+
class LanguageModelStream(
|
273
|
+
BaseGenAIModelStream[LanguageModelResponseChunk[T]], Generic[T]
|
274
|
+
):
|
275
|
+
"""Unified stream wrapper for language model streaming.
|
265
276
|
|
266
|
-
This class provides a unified interface for streaming responses
|
267
|
-
from both LiteLLM and Instructor, handling the different chunk
|
268
|
-
|
277
|
+
This class provides a unified interface for both sync and async streaming responses
|
278
|
+
from both LiteLLM and Instructor, handling the different chunk formats and providing
|
279
|
+
consistent access patterns. It inherits from BaseGenAIModelStream and manages
|
280
|
+
both sync and async streaming in a single class.
|
269
281
|
"""
|
270
282
|
|
271
283
|
def __init__(
|
272
284
|
self,
|
273
|
-
stream: Iterator[Any],
|
285
|
+
stream: Union[Iterator[Any], AsyncIterator[Any]],
|
274
286
|
output_type: Type[T] = str,
|
275
287
|
model: Optional[str] = None,
|
276
288
|
response_field_name: Optional[str] = None,
|
277
289
|
):
|
278
290
|
"""Initialize the stream.
|
279
|
-
|
291
|
+
|
280
292
|
Args:
|
281
|
-
stream: The underlying stream iterator
|
293
|
+
stream: The underlying stream iterator (sync or async)
|
282
294
|
output_type: The expected output type
|
283
295
|
model: The model name
|
284
296
|
response_field_name: The field name for structured outputs
|
285
297
|
"""
|
298
|
+
# Initialize base class
|
299
|
+
super().__init__(
|
300
|
+
type="language_model",
|
301
|
+
model=model or "unknown",
|
302
|
+
stream=stream,
|
303
|
+
)
|
304
|
+
|
286
305
|
self._stream = stream
|
287
306
|
self._output_type = output_type
|
288
307
|
self._model = model
|
@@ -292,9 +311,31 @@ class Stream(Generic[T]):
|
|
292
311
|
self._is_instructor = output_type != str
|
293
312
|
self._is_consumed = False
|
294
313
|
self._previous_chunk_output = None
|
314
|
+
self._is_async = hasattr(stream, "__anext__")
|
315
|
+
self._full_content = ""
|
295
316
|
|
296
317
|
def __iter__(self) -> Iterator[LanguageModelResponseChunk[T]]:
|
297
|
-
"""Iterate over response chunks."""
|
318
|
+
"""Iterate over response chunks (sync mode)."""
|
319
|
+
if self._is_async:
|
320
|
+
# This is a workaround to allow sync iteration over an async stream
|
321
|
+
# It's not ideal, but it works for simple cases.
|
322
|
+
# A better solution would be to use a dedicated sync entrypoint
|
323
|
+
# if this is a common use case.
|
324
|
+
try:
|
325
|
+
loop = asyncio.get_running_loop()
|
326
|
+
except RuntimeError:
|
327
|
+
loop = asyncio.new_event_loop()
|
328
|
+
asyncio.set_event_loop(loop)
|
329
|
+
|
330
|
+
async_iter = self.__aiter__()
|
331
|
+
while True:
|
332
|
+
try:
|
333
|
+
# We are calling the async __anext__ which returns a processed chunk
|
334
|
+
yield loop.run_until_complete(async_iter.__anext__())
|
335
|
+
except StopAsyncIteration:
|
336
|
+
break
|
337
|
+
return
|
338
|
+
|
298
339
|
for chunk in self._stream:
|
299
340
|
response_chunk = self._process_chunk(chunk)
|
300
341
|
if response_chunk:
|
@@ -302,18 +343,46 @@ class Stream(Generic[T]):
|
|
302
343
|
yield response_chunk
|
303
344
|
self._is_consumed = True
|
304
345
|
|
346
|
+
def __aiter__(self) -> AsyncIterator[LanguageModelResponseChunk[T]]:
|
347
|
+
"""Async iterate over response chunks (async mode)."""
|
348
|
+
if not self._is_async:
|
349
|
+
raise RuntimeError(
|
350
|
+
"Cannot use async iteration on sync stream. Use regular for loop instead."
|
351
|
+
)
|
352
|
+
return self
|
353
|
+
|
354
|
+
async def __anext__(self) -> LanguageModelResponseChunk[T]:
|
355
|
+
"""Get the next response chunk (async mode)."""
|
356
|
+
if not self._is_async:
|
357
|
+
raise RuntimeError(
|
358
|
+
"Cannot use async iteration on sync stream. Use regular for loop instead."
|
359
|
+
)
|
360
|
+
|
361
|
+
try:
|
362
|
+
chunk = await self._stream.__anext__()
|
363
|
+
response_chunk = self._process_chunk(chunk)
|
364
|
+
if response_chunk:
|
365
|
+
self._chunks.append(response_chunk)
|
366
|
+
return response_chunk
|
367
|
+
else:
|
368
|
+
return await self.__anext__() # Skip empty chunks
|
369
|
+
except StopAsyncIteration:
|
370
|
+
self._is_consumed = True
|
371
|
+
raise StopAsyncIteration
|
372
|
+
|
305
373
|
def _process_chunk(self, chunk: Any) -> Optional[LanguageModelResponseChunk[T]]:
|
306
374
|
"""Process a raw chunk into a LanguageModelResponseChunk."""
|
307
375
|
if self._is_instructor:
|
308
376
|
# Handle instructor streaming (Partial/Iterable)
|
309
|
-
|
310
|
-
|
377
|
+
|
311
378
|
# Use the chunk processor to get only new content
|
312
|
-
if not hasattr(self,
|
313
|
-
self._chunk_processor = StreamingChunkProcessor(
|
314
|
-
|
379
|
+
if not hasattr(self, "_chunk_processor"):
|
380
|
+
self._chunk_processor = StreamingChunkProcessor(
|
381
|
+
self._output_type, self._response_field_name
|
382
|
+
)
|
383
|
+
|
315
384
|
content = self._chunk_processor.process_chunk(chunk)
|
316
|
-
|
385
|
+
|
317
386
|
# Extract the proper output value
|
318
387
|
if self._response_field_name and hasattr(chunk, self._response_field_name):
|
319
388
|
output_value = getattr(chunk, self._response_field_name)
|
@@ -335,9 +404,12 @@ class Stream(Generic[T]):
|
|
335
404
|
if hasattr(choice, "delta") and choice.delta:
|
336
405
|
content = getattr(choice.delta, "content", None)
|
337
406
|
|
407
|
+
if content is not None:
|
408
|
+
self._full_content += content
|
409
|
+
|
338
410
|
return LanguageModelResponseChunk(
|
339
411
|
content=content,
|
340
|
-
output=
|
412
|
+
output=self._full_content,
|
341
413
|
model=getattr(chunk, "model", self._model),
|
342
414
|
finish_reason=getattr(choice, "finish_reason", None),
|
343
415
|
chunk=chunk,
|
@@ -346,11 +418,34 @@ class Stream(Generic[T]):
|
|
346
418
|
return None
|
347
419
|
|
348
420
|
def collect(self) -> LanguageModelResponse[T]:
|
349
|
-
"""Collect all chunks and return a complete LanguageModelResponse object."""
|
421
|
+
"""Collect all chunks and return a complete LanguageModelResponse object (sync mode)."""
|
422
|
+
if self._is_async:
|
423
|
+
raise RuntimeError(
|
424
|
+
"Cannot use sync collect() on async stream. Use async collect() instead."
|
425
|
+
)
|
426
|
+
|
350
427
|
if not self._chunks:
|
351
428
|
# Consume the stream if not already consumed
|
352
429
|
list(self)
|
353
430
|
|
431
|
+
return self._build_response()
|
432
|
+
|
433
|
+
async def async_collect(self) -> LanguageModelResponse[T]:
|
434
|
+
"""Collect all chunks and return a complete LanguageModelResponse object (async mode)."""
|
435
|
+
if not self._is_async:
|
436
|
+
raise RuntimeError(
|
437
|
+
"Cannot use async collect() on sync stream. Use sync collect() instead."
|
438
|
+
)
|
439
|
+
|
440
|
+
if not self._chunks:
|
441
|
+
# Consume the stream if not already consumed
|
442
|
+
async for _ in self:
|
443
|
+
pass
|
444
|
+
|
445
|
+
return self._build_response()
|
446
|
+
|
447
|
+
def _build_response(self) -> LanguageModelResponse[T]:
|
448
|
+
"""Build the final LanguageModelResponse from collected chunks."""
|
354
449
|
if self._is_instructor and self._chunks:
|
355
450
|
# For instructor, the final chunk contains the complete object
|
356
451
|
final_chunk = self._chunks[-1]
|
@@ -359,7 +454,7 @@ class Stream(Generic[T]):
|
|
359
454
|
raw_content = None
|
360
455
|
raw_completion = None
|
361
456
|
tool_calls = None
|
362
|
-
|
457
|
+
|
363
458
|
if hasattr(self._stream, "collector"):
|
364
459
|
collector = self._stream.collector
|
365
460
|
raw_content = collector.get_raw_content()
|
@@ -367,8 +462,16 @@ class Stream(Generic[T]):
|
|
367
462
|
tool_calls = collector.get_tool_calls()
|
368
463
|
elif hasattr(self._stream, "get_raw_content"):
|
369
464
|
raw_content = self._stream.get_raw_content()
|
370
|
-
raw_completion =
|
371
|
-
|
465
|
+
raw_completion = (
|
466
|
+
self._stream.get_raw_completion()
|
467
|
+
if hasattr(self._stream, "get_raw_completion")
|
468
|
+
else None
|
469
|
+
)
|
470
|
+
tool_calls = (
|
471
|
+
self._stream.get_tool_calls()
|
472
|
+
if hasattr(self._stream, "get_tool_calls")
|
473
|
+
else None
|
474
|
+
)
|
372
475
|
|
373
476
|
return LanguageModelResponse(
|
374
477
|
output=final_chunk.output,
|
@@ -395,7 +498,7 @@ class Stream(Generic[T]):
|
|
395
498
|
)
|
396
499
|
|
397
500
|
def to_response(self) -> LanguageModelResponse[T]:
|
398
|
-
"""Convert the stream to a LanguageModelResponse object.
|
501
|
+
"""Convert the stream to a LanguageModelResponse object (sync mode).
|
399
502
|
|
400
503
|
This method can only be called after the stream has been fully consumed.
|
401
504
|
It's an alias for collect() with a check for consumption state.
|
@@ -404,8 +507,13 @@ class Stream(Generic[T]):
|
|
404
507
|
LanguageModelResponse[T]: The complete response object
|
405
508
|
|
406
509
|
Raises:
|
407
|
-
RuntimeError: If the stream has not been fully consumed
|
510
|
+
RuntimeError: If the stream has not been fully consumed or is async
|
408
511
|
"""
|
512
|
+
if self._is_async:
|
513
|
+
raise RuntimeError(
|
514
|
+
"Cannot use sync to_response() on async stream. Use async to_response() instead."
|
515
|
+
)
|
516
|
+
|
409
517
|
if not self._is_consumed and not self._chunks:
|
410
518
|
raise RuntimeError(
|
411
519
|
"Stream must be fully consumed before converting to response. "
|
@@ -414,194 +522,59 @@ class Stream(Generic[T]):
|
|
414
522
|
|
415
523
|
return self.collect()
|
416
524
|
|
417
|
-
def
|
418
|
-
"""Convert the stream to a
|
525
|
+
async def async_to_response(self) -> LanguageModelResponse[T]:
|
526
|
+
"""Convert the stream to a LanguageModelResponse object (async mode).
|
419
527
|
|
420
528
|
This method can only be called after the stream has been fully consumed.
|
421
|
-
It
|
529
|
+
It's an alias for async_collect() with a check for consumption state.
|
422
530
|
|
423
531
|
Returns:
|
424
|
-
|
532
|
+
LanguageModelResponse[T]: The complete response object
|
425
533
|
|
426
534
|
Raises:
|
427
|
-
RuntimeError: If the stream has not been fully consumed
|
535
|
+
RuntimeError: If the stream has not been fully consumed or is sync
|
428
536
|
"""
|
429
|
-
if not self.
|
537
|
+
if not self._is_async:
|
430
538
|
raise RuntimeError(
|
431
|
-
"
|
432
|
-
"Use collect() or iterate through the stream first."
|
539
|
+
"Cannot use async to_response() on sync stream. Use sync to_response() instead."
|
433
540
|
)
|
434
541
|
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
class AsyncStream(Generic[T]):
|
440
|
-
"""Asynchronous stream wrapper for language model streaming.
|
441
|
-
|
442
|
-
This class provides a unified interface for async streaming responses
|
443
|
-
from both LiteLLM and Instructor, handling the different chunk
|
444
|
-
formats and providing consistent access patterns.
|
445
|
-
"""
|
446
|
-
|
447
|
-
def __init__(
|
448
|
-
self,
|
449
|
-
stream: AsyncIterator[Any],
|
450
|
-
output_type: Type[T] = str,
|
451
|
-
model: Optional[str] = None,
|
452
|
-
response_field_name: Optional[str] = None,
|
453
|
-
):
|
454
|
-
"""Initialize the async stream.
|
455
|
-
|
456
|
-
Args:
|
457
|
-
stream: The underlying async stream iterator
|
458
|
-
output_type: The expected output type
|
459
|
-
model: The model name
|
460
|
-
response_field_name: The field name for structured outputs
|
461
|
-
"""
|
462
|
-
self._stream = stream
|
463
|
-
self._output_type = output_type
|
464
|
-
self._model = model
|
465
|
-
self._response_field_name = response_field_name
|
466
|
-
self._chunks: List[LanguageModelResponseChunk[T]] = []
|
467
|
-
self._final_output: Optional[T] = None
|
468
|
-
self._is_instructor = output_type != str
|
469
|
-
self._is_consumed = False
|
470
|
-
self._previous_chunk_output = None
|
471
|
-
|
472
|
-
def __aiter__(self) -> AsyncIterator[LanguageModelResponseChunk[T]]:
|
473
|
-
"""Async iterate over response chunks."""
|
474
|
-
return self
|
475
|
-
|
476
|
-
async def __anext__(self) -> LanguageModelResponseChunk[T]:
|
477
|
-
"""Get the next response chunk."""
|
478
|
-
try:
|
479
|
-
chunk = await self._stream.__anext__()
|
480
|
-
response_chunk = self._process_chunk(chunk)
|
481
|
-
if response_chunk:
|
482
|
-
self._chunks.append(response_chunk)
|
483
|
-
return response_chunk
|
484
|
-
else:
|
485
|
-
return await self.__anext__() # Skip empty chunks
|
486
|
-
except StopAsyncIteration:
|
487
|
-
self._is_consumed = True
|
488
|
-
raise StopAsyncIteration
|
489
|
-
|
490
|
-
def _process_chunk(self, chunk: Any) -> Optional[LanguageModelResponseChunk[T]]:
|
491
|
-
"""Process a raw chunk into a LanguageModelResponseChunk."""
|
492
|
-
if self._is_instructor:
|
493
|
-
# Handle instructor streaming (Partial/Iterable)
|
494
|
-
output = chunk
|
495
|
-
|
496
|
-
# Use the chunk processor to get only new content
|
497
|
-
if not hasattr(self, '_chunk_processor'):
|
498
|
-
self._chunk_processor = StreamingChunkProcessor(self._output_type, self._response_field_name)
|
499
|
-
|
500
|
-
content = self._chunk_processor.process_chunk(chunk)
|
501
|
-
|
502
|
-
# Extract the proper output value
|
503
|
-
if self._response_field_name and hasattr(chunk, self._response_field_name):
|
504
|
-
output_value = getattr(chunk, self._response_field_name)
|
505
|
-
else:
|
506
|
-
output_value = chunk
|
507
|
-
|
508
|
-
return LanguageModelResponseChunk(
|
509
|
-
output=output_value,
|
510
|
-
content=content,
|
511
|
-
model=self._model,
|
512
|
-
chunk=chunk,
|
513
|
-
is_final=hasattr(chunk, "_is_final") and chunk._is_final,
|
514
|
-
)
|
515
|
-
else:
|
516
|
-
# Handle LiteLLM streaming (ChatCompletionChunk)
|
517
|
-
if hasattr(chunk, "choices") and chunk.choices:
|
518
|
-
choice = chunk.choices[0]
|
519
|
-
content = None
|
520
|
-
if hasattr(choice, "delta") and choice.delta:
|
521
|
-
content = getattr(choice.delta, "content", None)
|
522
|
-
|
523
|
-
return LanguageModelResponseChunk(
|
524
|
-
content=content,
|
525
|
-
output=content,
|
526
|
-
model=getattr(chunk, "model", self._model),
|
527
|
-
finish_reason=getattr(choice, "finish_reason", None),
|
528
|
-
chunk=chunk,
|
529
|
-
is_final=getattr(choice, "finish_reason", None) is not None,
|
530
|
-
)
|
531
|
-
return None
|
532
|
-
|
533
|
-
async def collect(self) -> LanguageModelResponse[T]:
|
534
|
-
"""Collect all chunks and return a complete LanguageModelResponse object."""
|
535
|
-
if not self._chunks:
|
536
|
-
# Consume the stream if not already consumed
|
537
|
-
async for _ in self:
|
538
|
-
pass
|
539
|
-
|
540
|
-
if self._is_instructor and self._chunks:
|
541
|
-
# For instructor, the final chunk contains the complete object
|
542
|
-
final_chunk = self._chunks[-1]
|
543
|
-
|
544
|
-
# Check if stream collector is available for raw content
|
545
|
-
raw_content = None
|
546
|
-
raw_completion = None
|
547
|
-
tool_calls = None
|
548
|
-
|
549
|
-
if hasattr(self._stream, "collector"):
|
550
|
-
collector = self._stream.collector
|
551
|
-
raw_content = collector.get_raw_content()
|
552
|
-
raw_completion = collector.get_raw_completion()
|
553
|
-
tool_calls = collector.get_tool_calls()
|
554
|
-
elif hasattr(self._stream, "get_raw_content"):
|
555
|
-
raw_content = self._stream.get_raw_content()
|
556
|
-
raw_completion = self._stream.get_raw_completion() if hasattr(self._stream, "get_raw_completion") else None
|
557
|
-
tool_calls = self._stream.get_tool_calls() if hasattr(self._stream, "get_tool_calls") else None
|
558
|
-
|
559
|
-
return LanguageModelResponse(
|
560
|
-
output=final_chunk.output,
|
561
|
-
model=final_chunk.model or self._model or "unknown",
|
562
|
-
completion=raw_completion,
|
563
|
-
content=raw_content,
|
564
|
-
tool_calls=tool_calls,
|
542
|
+
if not self._is_consumed and not self._chunks:
|
543
|
+
raise RuntimeError(
|
544
|
+
"Stream must be fully consumed before converting to response. "
|
545
|
+
"Use async_collect() or iterate through the stream first."
|
565
546
|
)
|
566
|
-
else:
|
567
|
-
# For LiteLLM, combine content from all chunks
|
568
|
-
content_parts = [chunk.content for chunk in self._chunks if chunk.content]
|
569
|
-
combined_content = "".join(content_parts)
|
570
547
|
|
571
|
-
|
572
|
-
mock_completion = None
|
573
|
-
if self._chunks:
|
574
|
-
mock_completion = self._chunks[-1].chunk
|
548
|
+
return await self.async_collect()
|
575
549
|
|
576
|
-
|
577
|
-
|
578
|
-
model=self._model or "unknown",
|
579
|
-
completion=mock_completion,
|
580
|
-
content=combined_content,
|
581
|
-
)
|
582
|
-
|
583
|
-
async def to_response(self) -> LanguageModelResponse[T]:
|
584
|
-
"""Convert the stream to a LanguageModelResponse object.
|
550
|
+
def to_message(self) -> Any:
|
551
|
+
"""Convert the stream to a ChatCompletionMessageParam (sync mode).
|
585
552
|
|
586
553
|
This method can only be called after the stream has been fully consumed.
|
587
|
-
It
|
554
|
+
It converts the final response to a message format.
|
588
555
|
|
589
556
|
Returns:
|
590
|
-
|
557
|
+
ChatCompletionMessageParam: The response as a chat message
|
591
558
|
|
592
559
|
Raises:
|
593
|
-
RuntimeError: If the stream has not been fully consumed
|
560
|
+
RuntimeError: If the stream has not been fully consumed or is async
|
594
561
|
"""
|
562
|
+
if self._is_async:
|
563
|
+
raise RuntimeError(
|
564
|
+
"Cannot use sync to_message() on async stream. Use async to_message() instead."
|
565
|
+
)
|
566
|
+
|
595
567
|
if not self._is_consumed and not self._chunks:
|
596
568
|
raise RuntimeError(
|
597
|
-
"Stream must be fully consumed before converting to
|
569
|
+
"Stream must be fully consumed before converting to message. "
|
598
570
|
"Use collect() or iterate through the stream first."
|
599
571
|
)
|
600
572
|
|
601
|
-
|
573
|
+
response = self.collect()
|
574
|
+
return response.to_message()
|
602
575
|
|
603
|
-
async def
|
604
|
-
"""Convert the stream to a ChatCompletionMessageParam.
|
576
|
+
async def async_to_message(self) -> Any:
|
577
|
+
"""Convert the stream to a ChatCompletionMessageParam (async mode).
|
605
578
|
|
606
579
|
This method can only be called after the stream has been fully consumed.
|
607
580
|
It converts the final response to a message format.
|
@@ -610,13 +583,18 @@ class AsyncStream(Generic[T]):
|
|
610
583
|
ChatCompletionMessageParam: The response as a chat message
|
611
584
|
|
612
585
|
Raises:
|
613
|
-
RuntimeError: If the stream has not been fully consumed
|
586
|
+
RuntimeError: If the stream has not been fully consumed or is sync
|
614
587
|
"""
|
588
|
+
if not self._is_async:
|
589
|
+
raise RuntimeError(
|
590
|
+
"Cannot use async to_message() on sync stream. Use sync to_message() instead."
|
591
|
+
)
|
592
|
+
|
615
593
|
if not self._is_consumed and not self._chunks:
|
616
594
|
raise RuntimeError(
|
617
595
|
"Stream must be fully consumed before converting to message. "
|
618
|
-
"Use
|
596
|
+
"Use async_collect() or iterate through the stream first."
|
619
597
|
)
|
620
598
|
|
621
|
-
response = await self.
|
622
|
-
return response.to_message()
|
599
|
+
response = await self.async_collect()
|
600
|
+
return response.to_message()
|