hammad-python 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. hammad/__init__.py +7 -137
  2. hammad/_internal.py +1 -0
  3. hammad/cli/_runner.py +8 -8
  4. hammad/cli/plugins.py +55 -26
  5. hammad/cli/styles/utils.py +16 -8
  6. hammad/data/__init__.py +1 -5
  7. hammad/data/collections/__init__.py +2 -3
  8. hammad/data/collections/collection.py +41 -22
  9. hammad/data/collections/indexes/__init__.py +1 -1
  10. hammad/data/collections/indexes/qdrant/__init__.py +1 -1
  11. hammad/data/collections/indexes/qdrant/index.py +106 -118
  12. hammad/data/collections/indexes/qdrant/settings.py +14 -14
  13. hammad/data/collections/indexes/qdrant/utils.py +28 -38
  14. hammad/data/collections/indexes/tantivy/__init__.py +1 -1
  15. hammad/data/collections/indexes/tantivy/index.py +57 -59
  16. hammad/data/collections/indexes/tantivy/settings.py +8 -19
  17. hammad/data/collections/indexes/tantivy/utils.py +28 -52
  18. hammad/data/models/__init__.py +2 -7
  19. hammad/data/sql/__init__.py +1 -1
  20. hammad/data/sql/database.py +71 -73
  21. hammad/data/sql/types.py +37 -51
  22. hammad/formatting/__init__.py +2 -1
  23. hammad/formatting/json/converters.py +2 -2
  24. hammad/genai/__init__.py +96 -36
  25. hammad/genai/agents/__init__.py +47 -1
  26. hammad/genai/agents/agent.py +1022 -0
  27. hammad/genai/agents/run.py +615 -0
  28. hammad/genai/agents/types/__init__.py +29 -22
  29. hammad/genai/agents/types/agent_context.py +13 -0
  30. hammad/genai/agents/types/agent_event.py +128 -0
  31. hammad/genai/agents/types/agent_hooks.py +220 -0
  32. hammad/genai/agents/types/agent_messages.py +31 -0
  33. hammad/genai/agents/types/agent_response.py +90 -0
  34. hammad/genai/agents/types/agent_stream.py +242 -0
  35. hammad/genai/models/__init__.py +1 -0
  36. hammad/genai/models/embeddings/__init__.py +39 -0
  37. hammad/genai/{embedding_models/embedding_model.py → models/embeddings/model.py} +45 -41
  38. hammad/genai/{embedding_models → models/embeddings}/run.py +10 -8
  39. hammad/genai/models/embeddings/types/__init__.py +37 -0
  40. hammad/genai/{embedding_models → models/embeddings/types}/embedding_model_name.py +2 -4
  41. hammad/genai/{embedding_models → models/embeddings/types}/embedding_model_response.py +11 -4
  42. hammad/genai/{embedding_models/embedding_model_request.py → models/embeddings/types/embedding_model_run_params.py} +4 -3
  43. hammad/genai/models/embeddings/types/embedding_model_settings.py +47 -0
  44. hammad/genai/models/language/__init__.py +48 -0
  45. hammad/genai/{language_models/language_model.py → models/language/model.py} +481 -204
  46. hammad/genai/{language_models → models/language}/run.py +80 -57
  47. hammad/genai/models/language/types/__init__.py +40 -0
  48. hammad/genai/models/language/types/language_model_instructor_mode.py +47 -0
  49. hammad/genai/models/language/types/language_model_messages.py +28 -0
  50. hammad/genai/{language_models/_types.py → models/language/types/language_model_name.py} +3 -40
  51. hammad/genai/{language_models → models/language/types}/language_model_request.py +17 -25
  52. hammad/genai/{language_models → models/language/types}/language_model_response.py +61 -68
  53. hammad/genai/{language_models → models/language/types}/language_model_response_chunk.py +8 -5
  54. hammad/genai/models/language/types/language_model_settings.py +89 -0
  55. hammad/genai/{language_models/_streaming.py → models/language/types/language_model_stream.py} +221 -243
  56. hammad/genai/{language_models/_utils → models/language/utils}/__init__.py +8 -11
  57. hammad/genai/models/language/utils/requests.py +421 -0
  58. hammad/genai/{language_models/_utils/_structured_outputs.py → models/language/utils/structured_outputs.py} +31 -20
  59. hammad/genai/models/model_provider.py +4 -0
  60. hammad/genai/{multimodal_models.py → models/multimodal.py} +4 -5
  61. hammad/genai/models/reranking.py +26 -0
  62. hammad/genai/types/__init__.py +1 -0
  63. hammad/genai/types/base.py +215 -0
  64. hammad/genai/{agents/types → types}/history.py +101 -88
  65. hammad/genai/{agents/types/tool.py → types/tools.py} +156 -141
  66. hammad/logging/logger.py +1 -1
  67. hammad/mcp/client/__init__.py +2 -3
  68. hammad/mcp/client/client.py +10 -10
  69. hammad/mcp/servers/__init__.py +2 -1
  70. hammad/service/decorators.py +1 -3
  71. hammad/web/models.py +1 -3
  72. hammad/web/search/client.py +10 -22
  73. {hammad_python-0.0.19.dist-info → hammad_python-0.0.20.dist-info}/METADATA +10 -2
  74. hammad_python-0.0.20.dist-info/RECORD +127 -0
  75. hammad/genai/embedding_models/__init__.py +0 -41
  76. hammad/genai/language_models/__init__.py +0 -35
  77. hammad/genai/language_models/_utils/_completions.py +0 -131
  78. hammad/genai/language_models/_utils/_messages.py +0 -89
  79. hammad/genai/language_models/_utils/_requests.py +0 -202
  80. hammad/genai/rerank_models.py +0 -26
  81. hammad_python-0.0.19.dist-info/RECORD +0 -111
  82. {hammad_python-0.0.19.dist-info → hammad_python-0.0.20.dist-info}/WHEEL +0 -0
  83. {hammad_python-0.0.19.dist-info → hammad_python-0.0.20.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,6 @@
1
- """hammad.genai.language_models._streaming"""
1
+ """hammad.genai.models.language.types.language_model_stream"""
2
2
 
3
+ import asyncio
3
4
  from typing import (
4
5
  List,
5
6
  Type,
@@ -11,16 +12,17 @@ from typing import (
11
12
  Any,
12
13
  Callable,
13
14
  Dict,
15
+ Union,
14
16
  )
15
17
 
16
- from ...typing import get_origin, get_args
18
+ from .....typing import get_origin, get_args
19
+ from ....types.base import BaseGenAIModelStream
17
20
 
18
21
  from .language_model_response import LanguageModelResponse
19
22
  from .language_model_response_chunk import LanguageModelResponseChunk
20
23
 
21
24
  __all__ = [
22
- "Stream",
23
- "AsyncStream",
25
+ "LanguageModelStream",
24
26
  "InstructorStreamCollector",
25
27
  "InstructorStreamWrapper",
26
28
  "AsyncInstructorStreamWrapper",
@@ -32,86 +34,89 @@ T = TypeVar("T")
32
34
 
33
35
  class InstructorStreamCollector:
34
36
  """Collector for instructor streaming responses using hooks."""
35
-
37
+
36
38
  def __init__(self):
37
39
  self.raw_chunks = []
38
40
  self.completion_responses = []
39
41
  self.last_response = None
40
42
  self.error = None
41
-
43
+
42
44
  def on_completion_response(self, response):
43
45
  """Hook handler for completion responses."""
44
46
  self.completion_responses.append(response)
45
-
47
+
46
48
  def on_completion_error(self, error):
47
49
  """Hook handler for completion errors."""
48
50
  self.error = error
49
-
51
+
50
52
  def add_chunk(self, chunk):
51
53
  """Add a raw chunk to the collector."""
52
54
  self.raw_chunks.append(chunk)
53
-
55
+
54
56
  def get_raw_content(self):
55
57
  """Get raw content from completion responses."""
56
58
  if self.completion_responses:
57
59
  last_response = self.completion_responses[-1]
58
- if hasattr(last_response, 'choices') and last_response.choices:
60
+ if hasattr(last_response, "choices") and last_response.choices:
59
61
  choice = last_response.choices[0]
60
- if hasattr(choice, 'message'):
61
- return getattr(choice.message, 'content', None)
62
+ if hasattr(choice, "message"):
63
+ return getattr(choice.message, "content", None)
62
64
  return None
63
-
65
+
64
66
  def get_raw_completion(self):
65
67
  """Get the raw completion object."""
66
68
  return self.completion_responses[-1] if self.completion_responses else None
67
-
69
+
68
70
  def get_tool_calls(self):
69
71
  """Get tool calls from completion responses."""
70
72
  if self.completion_responses:
71
73
  last_response = self.completion_responses[-1]
72
- if hasattr(last_response, 'choices') and last_response.choices:
74
+ if hasattr(last_response, "choices") and last_response.choices:
73
75
  choice = last_response.choices[0]
74
- if hasattr(choice, 'message'):
75
- return getattr(choice.message, 'tool_calls', None)
76
+ if hasattr(choice, "message"):
77
+ return getattr(choice.message, "tool_calls", None)
76
78
  return None
77
79
 
78
80
 
79
81
  class StreamingChunkProcessor:
80
82
  """Process streaming chunks to extract only new content."""
81
-
83
+
82
84
  def __init__(self, output_type: Type[T], response_field_name: Optional[str] = None):
83
85
  self.output_type = output_type
84
86
  self.response_field_name = response_field_name
85
87
  self.previous_chunk = None
86
88
  self.previous_content = ""
87
-
89
+
88
90
  def process_chunk(self, chunk: Any) -> Optional[str]:
89
91
  """Process a chunk and return only the new content."""
90
92
  # Handle list types (e.g., list[str])
91
- from ...typing import get_origin
93
+ from .....typing import get_origin
94
+
92
95
  origin = get_origin(self.output_type)
93
-
96
+
94
97
  if origin is list:
95
98
  return self._process_list_chunk(chunk)
96
99
  elif self.response_field_name and hasattr(chunk, self.response_field_name):
97
100
  return self._process_field_chunk(chunk)
98
101
  else:
99
102
  return self._process_simple_chunk(chunk)
100
-
103
+
101
104
  def _process_list_chunk(self, chunk: Any) -> Optional[str]:
102
105
  """Process chunks for list types."""
103
106
  current_list = []
104
-
107
+
105
108
  if isinstance(chunk, list):
106
109
  current_list = chunk
107
- elif hasattr(chunk, 'value') and isinstance(chunk.value, list):
110
+ elif hasattr(chunk, "value") and isinstance(chunk.value, list):
108
111
  current_list = chunk.value
109
- elif hasattr(chunk, self.response_field_name) and isinstance(getattr(chunk, self.response_field_name), list):
112
+ elif hasattr(chunk, self.response_field_name) and isinstance(
113
+ getattr(chunk, self.response_field_name), list
114
+ ):
110
115
  current_list = getattr(chunk, self.response_field_name)
111
-
116
+
112
117
  if not current_list:
113
118
  return None
114
-
119
+
115
120
  # For list types, return only new items
116
121
  if self.previous_chunk is None:
117
122
  # First chunk - return the last item
@@ -120,24 +125,26 @@ class StreamingChunkProcessor:
120
125
  return str(current_list[-1])
121
126
  else:
122
127
  # Subsequent chunks - return only new items
123
- prev_list = self.previous_chunk if isinstance(self.previous_chunk, list) else []
128
+ prev_list = (
129
+ self.previous_chunk if isinstance(self.previous_chunk, list) else []
130
+ )
124
131
  prev_len = len(prev_list)
125
-
132
+
126
133
  if len(current_list) > prev_len:
127
134
  new_items = current_list[prev_len:]
128
135
  self.previous_chunk = current_list
129
136
  if new_items:
130
137
  return str(new_items[-1])
131
-
138
+
132
139
  return None
133
-
140
+
134
141
  def _process_field_chunk(self, chunk: Any) -> Optional[str]:
135
142
  """Process chunks with a specific response field."""
136
143
  if not hasattr(chunk, self.response_field_name):
137
144
  return None
138
-
145
+
139
146
  field_value = getattr(chunk, self.response_field_name)
140
-
147
+
141
148
  if isinstance(field_value, str):
142
149
  # For string fields, return only new content
143
150
  if self.previous_chunk is None:
@@ -147,9 +154,9 @@ class StreamingChunkProcessor:
147
154
  else:
148
155
  prev_value = self.previous_content
149
156
  current_value = field_value
150
-
157
+
151
158
  if current_value.startswith(prev_value):
152
- new_content = current_value[len(prev_value):]
159
+ new_content = current_value[len(prev_value) :]
153
160
  self.previous_chunk = chunk
154
161
  self.previous_content = current_value
155
162
  return new_content if new_content else None
@@ -166,23 +173,23 @@ class StreamingChunkProcessor:
166
173
  else:
167
174
  prev_field = getattr(self.previous_chunk, self.response_field_name, [])
168
175
  prev_len = len(prev_field) if isinstance(prev_field, list) else 0
169
-
176
+
170
177
  if len(field_value) > prev_len:
171
178
  new_items = field_value[prev_len:]
172
179
  self.previous_chunk = chunk
173
180
  if new_items:
174
181
  return str(new_items[-1])
175
-
182
+
176
183
  return None
177
-
184
+
178
185
  def _process_simple_chunk(self, chunk: Any) -> Optional[str]:
179
186
  """Process simple chunks without response fields."""
180
- if hasattr(chunk, 'value'):
187
+ if hasattr(chunk, "value"):
181
188
  value = chunk.value
182
189
  if isinstance(value, str):
183
190
  if self.previous_content:
184
191
  if value.startswith(self.previous_content):
185
- new_content = value[len(self.previous_content):]
192
+ new_content = value[len(self.previous_content) :]
186
193
  self.previous_content = value
187
194
  return new_content if new_content else None
188
195
  else:
@@ -194,7 +201,7 @@ class StreamingChunkProcessor:
194
201
  elif isinstance(chunk, str):
195
202
  if self.previous_content:
196
203
  if chunk.startswith(self.previous_content):
197
- new_content = chunk[len(self.previous_content):]
204
+ new_content = chunk[len(self.previous_content) :]
198
205
  self.previous_content = chunk
199
206
  return new_content if new_content else None
200
207
  else:
@@ -206,83 +213,95 @@ class StreamingChunkProcessor:
206
213
  elif self.output_type in (int, float, bool):
207
214
  # For primitive types, return string representation
208
215
  return str(chunk)
209
-
216
+
210
217
  return None
211
218
 
212
219
 
213
220
  class InstructorStreamWrapper:
214
221
  """Wrapper for instructor streams that collects raw responses via hooks."""
215
-
222
+
216
223
  def __init__(self, stream: Iterator[Any], collector: InstructorStreamCollector):
217
224
  self._stream = stream
218
225
  self.collector = collector
219
-
226
+
220
227
  def __iter__(self):
221
228
  return self
222
-
229
+
223
230
  def __next__(self):
224
231
  chunk = next(self._stream)
225
232
  self.collector.add_chunk(chunk)
226
233
  return chunk
227
-
234
+
228
235
  def get_raw_content(self):
229
236
  return self.collector.get_raw_content()
230
-
237
+
231
238
  def get_raw_completion(self):
232
239
  return self.collector.get_raw_completion()
233
-
240
+
234
241
  def get_tool_calls(self):
235
242
  return self.collector.get_tool_calls()
236
243
 
237
244
 
238
245
  class AsyncInstructorStreamWrapper:
239
246
  """Async wrapper for instructor streams that collects raw responses via hooks."""
240
-
241
- def __init__(self, stream: AsyncIterator[Any], collector: InstructorStreamCollector):
247
+
248
+ def __init__(
249
+ self, stream: AsyncIterator[Any], collector: InstructorStreamCollector
250
+ ):
242
251
  self._stream = stream
243
252
  self.collector = collector
244
-
253
+
245
254
  def __aiter__(self):
246
255
  return self
247
-
256
+
248
257
  async def __anext__(self):
249
258
  chunk = await self._stream.__anext__()
250
259
  self.collector.add_chunk(chunk)
251
260
  return chunk
252
-
261
+
253
262
  def get_raw_content(self):
254
263
  return self.collector.get_raw_content()
255
-
264
+
256
265
  def get_raw_completion(self):
257
266
  return self.collector.get_raw_completion()
258
-
267
+
259
268
  def get_tool_calls(self):
260
269
  return self.collector.get_tool_calls()
261
270
 
262
271
 
263
- class Stream(Generic[T]):
264
- """Synchronous stream wrapper for language model streaming.
272
+ class LanguageModelStream(
273
+ BaseGenAIModelStream[LanguageModelResponseChunk[T]], Generic[T]
274
+ ):
275
+ """Unified stream wrapper for language model streaming.
265
276
 
266
- This class provides a unified interface for streaming responses
267
- from both LiteLLM and Instructor, handling the different chunk
268
- formats and providing consistent access patterns.
277
+ This class provides a unified interface for both sync and async streaming responses
278
+ from both LiteLLM and Instructor, handling the different chunk formats and providing
279
+ consistent access patterns. It inherits from BaseGenAIModelStream and manages
280
+ both sync and async streaming in a single class.
269
281
  """
270
282
 
271
283
  def __init__(
272
284
  self,
273
- stream: Iterator[Any],
285
+ stream: Union[Iterator[Any], AsyncIterator[Any]],
274
286
  output_type: Type[T] = str,
275
287
  model: Optional[str] = None,
276
288
  response_field_name: Optional[str] = None,
277
289
  ):
278
290
  """Initialize the stream.
279
-
291
+
280
292
  Args:
281
- stream: The underlying stream iterator
293
+ stream: The underlying stream iterator (sync or async)
282
294
  output_type: The expected output type
283
295
  model: The model name
284
296
  response_field_name: The field name for structured outputs
285
297
  """
298
+ # Initialize base class
299
+ super().__init__(
300
+ type="language_model",
301
+ model=model or "unknown",
302
+ stream=stream,
303
+ )
304
+
286
305
  self._stream = stream
287
306
  self._output_type = output_type
288
307
  self._model = model
@@ -292,9 +311,31 @@ class Stream(Generic[T]):
292
311
  self._is_instructor = output_type != str
293
312
  self._is_consumed = False
294
313
  self._previous_chunk_output = None
314
+ self._is_async = hasattr(stream, "__anext__")
315
+ self._full_content = ""
295
316
 
296
317
  def __iter__(self) -> Iterator[LanguageModelResponseChunk[T]]:
297
- """Iterate over response chunks."""
318
+ """Iterate over response chunks (sync mode)."""
319
+ if self._is_async:
320
+ # This is a workaround to allow sync iteration over an async stream
321
+ # It's not ideal, but it works for simple cases.
322
+ # A better solution would be to use a dedicated sync entrypoint
323
+ # if this is a common use case.
324
+ try:
325
+ loop = asyncio.get_running_loop()
326
+ except RuntimeError:
327
+ loop = asyncio.new_event_loop()
328
+ asyncio.set_event_loop(loop)
329
+
330
+ async_iter = self.__aiter__()
331
+ while True:
332
+ try:
333
+ # We are calling the async __anext__ which returns a processed chunk
334
+ yield loop.run_until_complete(async_iter.__anext__())
335
+ except StopAsyncIteration:
336
+ break
337
+ return
338
+
298
339
  for chunk in self._stream:
299
340
  response_chunk = self._process_chunk(chunk)
300
341
  if response_chunk:
@@ -302,18 +343,46 @@ class Stream(Generic[T]):
302
343
  yield response_chunk
303
344
  self._is_consumed = True
304
345
 
346
+ def __aiter__(self) -> AsyncIterator[LanguageModelResponseChunk[T]]:
347
+ """Async iterate over response chunks (async mode)."""
348
+ if not self._is_async:
349
+ raise RuntimeError(
350
+ "Cannot use async iteration on sync stream. Use regular for loop instead."
351
+ )
352
+ return self
353
+
354
+ async def __anext__(self) -> LanguageModelResponseChunk[T]:
355
+ """Get the next response chunk (async mode)."""
356
+ if not self._is_async:
357
+ raise RuntimeError(
358
+ "Cannot use async iteration on sync stream. Use regular for loop instead."
359
+ )
360
+
361
+ try:
362
+ chunk = await self._stream.__anext__()
363
+ response_chunk = self._process_chunk(chunk)
364
+ if response_chunk:
365
+ self._chunks.append(response_chunk)
366
+ return response_chunk
367
+ else:
368
+ return await self.__anext__() # Skip empty chunks
369
+ except StopAsyncIteration:
370
+ self._is_consumed = True
371
+ raise StopAsyncIteration
372
+
305
373
  def _process_chunk(self, chunk: Any) -> Optional[LanguageModelResponseChunk[T]]:
306
374
  """Process a raw chunk into a LanguageModelResponseChunk."""
307
375
  if self._is_instructor:
308
376
  # Handle instructor streaming (Partial/Iterable)
309
- output = chunk
310
-
377
+
311
378
  # Use the chunk processor to get only new content
312
- if not hasattr(self, '_chunk_processor'):
313
- self._chunk_processor = StreamingChunkProcessor(self._output_type, self._response_field_name)
314
-
379
+ if not hasattr(self, "_chunk_processor"):
380
+ self._chunk_processor = StreamingChunkProcessor(
381
+ self._output_type, self._response_field_name
382
+ )
383
+
315
384
  content = self._chunk_processor.process_chunk(chunk)
316
-
385
+
317
386
  # Extract the proper output value
318
387
  if self._response_field_name and hasattr(chunk, self._response_field_name):
319
388
  output_value = getattr(chunk, self._response_field_name)
@@ -335,9 +404,12 @@ class Stream(Generic[T]):
335
404
  if hasattr(choice, "delta") and choice.delta:
336
405
  content = getattr(choice.delta, "content", None)
337
406
 
407
+ if content is not None:
408
+ self._full_content += content
409
+
338
410
  return LanguageModelResponseChunk(
339
411
  content=content,
340
- output=content,
412
+ output=self._full_content,
341
413
  model=getattr(chunk, "model", self._model),
342
414
  finish_reason=getattr(choice, "finish_reason", None),
343
415
  chunk=chunk,
@@ -346,11 +418,34 @@ class Stream(Generic[T]):
346
418
  return None
347
419
 
348
420
  def collect(self) -> LanguageModelResponse[T]:
349
- """Collect all chunks and return a complete LanguageModelResponse object."""
421
+ """Collect all chunks and return a complete LanguageModelResponse object (sync mode)."""
422
+ if self._is_async:
423
+ raise RuntimeError(
424
+ "Cannot use sync collect() on async stream. Use async collect() instead."
425
+ )
426
+
350
427
  if not self._chunks:
351
428
  # Consume the stream if not already consumed
352
429
  list(self)
353
430
 
431
+ return self._build_response()
432
+
433
+ async def async_collect(self) -> LanguageModelResponse[T]:
434
+ """Collect all chunks and return a complete LanguageModelResponse object (async mode)."""
435
+ if not self._is_async:
436
+ raise RuntimeError(
437
+ "Cannot use async collect() on sync stream. Use sync collect() instead."
438
+ )
439
+
440
+ if not self._chunks:
441
+ # Consume the stream if not already consumed
442
+ async for _ in self:
443
+ pass
444
+
445
+ return self._build_response()
446
+
447
+ def _build_response(self) -> LanguageModelResponse[T]:
448
+ """Build the final LanguageModelResponse from collected chunks."""
354
449
  if self._is_instructor and self._chunks:
355
450
  # For instructor, the final chunk contains the complete object
356
451
  final_chunk = self._chunks[-1]
@@ -359,7 +454,7 @@ class Stream(Generic[T]):
359
454
  raw_content = None
360
455
  raw_completion = None
361
456
  tool_calls = None
362
-
457
+
363
458
  if hasattr(self._stream, "collector"):
364
459
  collector = self._stream.collector
365
460
  raw_content = collector.get_raw_content()
@@ -367,8 +462,16 @@ class Stream(Generic[T]):
367
462
  tool_calls = collector.get_tool_calls()
368
463
  elif hasattr(self._stream, "get_raw_content"):
369
464
  raw_content = self._stream.get_raw_content()
370
- raw_completion = self._stream.get_raw_completion() if hasattr(self._stream, "get_raw_completion") else None
371
- tool_calls = self._stream.get_tool_calls() if hasattr(self._stream, "get_tool_calls") else None
465
+ raw_completion = (
466
+ self._stream.get_raw_completion()
467
+ if hasattr(self._stream, "get_raw_completion")
468
+ else None
469
+ )
470
+ tool_calls = (
471
+ self._stream.get_tool_calls()
472
+ if hasattr(self._stream, "get_tool_calls")
473
+ else None
474
+ )
372
475
 
373
476
  return LanguageModelResponse(
374
477
  output=final_chunk.output,
@@ -395,7 +498,7 @@ class Stream(Generic[T]):
395
498
  )
396
499
 
397
500
  def to_response(self) -> LanguageModelResponse[T]:
398
- """Convert the stream to a LanguageModelResponse object.
501
+ """Convert the stream to a LanguageModelResponse object (sync mode).
399
502
 
400
503
  This method can only be called after the stream has been fully consumed.
401
504
  It's an alias for collect() with a check for consumption state.
@@ -404,8 +507,13 @@ class Stream(Generic[T]):
404
507
  LanguageModelResponse[T]: The complete response object
405
508
 
406
509
  Raises:
407
- RuntimeError: If the stream has not been fully consumed
510
+ RuntimeError: If the stream has not been fully consumed or is async
408
511
  """
512
+ if self._is_async:
513
+ raise RuntimeError(
514
+ "Cannot use sync to_response() on async stream. Use async to_response() instead."
515
+ )
516
+
409
517
  if not self._is_consumed and not self._chunks:
410
518
  raise RuntimeError(
411
519
  "Stream must be fully consumed before converting to response. "
@@ -414,194 +522,59 @@ class Stream(Generic[T]):
414
522
 
415
523
  return self.collect()
416
524
 
417
- def to_message(self) -> Any:
418
- """Convert the stream to a ChatCompletionMessageParam.
525
+ async def async_to_response(self) -> LanguageModelResponse[T]:
526
+ """Convert the stream to a LanguageModelResponse object (async mode).
419
527
 
420
528
  This method can only be called after the stream has been fully consumed.
421
- It converts the final response to a message format.
529
+ It's an alias for async_collect() with a check for consumption state.
422
530
 
423
531
  Returns:
424
- ChatCompletionMessageParam: The response as a chat message
532
+ LanguageModelResponse[T]: The complete response object
425
533
 
426
534
  Raises:
427
- RuntimeError: If the stream has not been fully consumed
535
+ RuntimeError: If the stream has not been fully consumed or is sync
428
536
  """
429
- if not self._is_consumed and not self._chunks:
537
+ if not self._is_async:
430
538
  raise RuntimeError(
431
- "Stream must be fully consumed before converting to message. "
432
- "Use collect() or iterate through the stream first."
539
+ "Cannot use async to_response() on sync stream. Use sync to_response() instead."
433
540
  )
434
541
 
435
- response = self.collect()
436
- return response.to_message()
437
-
438
-
439
- class AsyncStream(Generic[T]):
440
- """Asynchronous stream wrapper for language model streaming.
441
-
442
- This class provides a unified interface for async streaming responses
443
- from both LiteLLM and Instructor, handling the different chunk
444
- formats and providing consistent access patterns.
445
- """
446
-
447
- def __init__(
448
- self,
449
- stream: AsyncIterator[Any],
450
- output_type: Type[T] = str,
451
- model: Optional[str] = None,
452
- response_field_name: Optional[str] = None,
453
- ):
454
- """Initialize the async stream.
455
-
456
- Args:
457
- stream: The underlying async stream iterator
458
- output_type: The expected output type
459
- model: The model name
460
- response_field_name: The field name for structured outputs
461
- """
462
- self._stream = stream
463
- self._output_type = output_type
464
- self._model = model
465
- self._response_field_name = response_field_name
466
- self._chunks: List[LanguageModelResponseChunk[T]] = []
467
- self._final_output: Optional[T] = None
468
- self._is_instructor = output_type != str
469
- self._is_consumed = False
470
- self._previous_chunk_output = None
471
-
472
- def __aiter__(self) -> AsyncIterator[LanguageModelResponseChunk[T]]:
473
- """Async iterate over response chunks."""
474
- return self
475
-
476
- async def __anext__(self) -> LanguageModelResponseChunk[T]:
477
- """Get the next response chunk."""
478
- try:
479
- chunk = await self._stream.__anext__()
480
- response_chunk = self._process_chunk(chunk)
481
- if response_chunk:
482
- self._chunks.append(response_chunk)
483
- return response_chunk
484
- else:
485
- return await self.__anext__() # Skip empty chunks
486
- except StopAsyncIteration:
487
- self._is_consumed = True
488
- raise StopAsyncIteration
489
-
490
- def _process_chunk(self, chunk: Any) -> Optional[LanguageModelResponseChunk[T]]:
491
- """Process a raw chunk into a LanguageModelResponseChunk."""
492
- if self._is_instructor:
493
- # Handle instructor streaming (Partial/Iterable)
494
- output = chunk
495
-
496
- # Use the chunk processor to get only new content
497
- if not hasattr(self, '_chunk_processor'):
498
- self._chunk_processor = StreamingChunkProcessor(self._output_type, self._response_field_name)
499
-
500
- content = self._chunk_processor.process_chunk(chunk)
501
-
502
- # Extract the proper output value
503
- if self._response_field_name and hasattr(chunk, self._response_field_name):
504
- output_value = getattr(chunk, self._response_field_name)
505
- else:
506
- output_value = chunk
507
-
508
- return LanguageModelResponseChunk(
509
- output=output_value,
510
- content=content,
511
- model=self._model,
512
- chunk=chunk,
513
- is_final=hasattr(chunk, "_is_final") and chunk._is_final,
514
- )
515
- else:
516
- # Handle LiteLLM streaming (ChatCompletionChunk)
517
- if hasattr(chunk, "choices") and chunk.choices:
518
- choice = chunk.choices[0]
519
- content = None
520
- if hasattr(choice, "delta") and choice.delta:
521
- content = getattr(choice.delta, "content", None)
522
-
523
- return LanguageModelResponseChunk(
524
- content=content,
525
- output=content,
526
- model=getattr(chunk, "model", self._model),
527
- finish_reason=getattr(choice, "finish_reason", None),
528
- chunk=chunk,
529
- is_final=getattr(choice, "finish_reason", None) is not None,
530
- )
531
- return None
532
-
533
- async def collect(self) -> LanguageModelResponse[T]:
534
- """Collect all chunks and return a complete LanguageModelResponse object."""
535
- if not self._chunks:
536
- # Consume the stream if not already consumed
537
- async for _ in self:
538
- pass
539
-
540
- if self._is_instructor and self._chunks:
541
- # For instructor, the final chunk contains the complete object
542
- final_chunk = self._chunks[-1]
543
-
544
- # Check if stream collector is available for raw content
545
- raw_content = None
546
- raw_completion = None
547
- tool_calls = None
548
-
549
- if hasattr(self._stream, "collector"):
550
- collector = self._stream.collector
551
- raw_content = collector.get_raw_content()
552
- raw_completion = collector.get_raw_completion()
553
- tool_calls = collector.get_tool_calls()
554
- elif hasattr(self._stream, "get_raw_content"):
555
- raw_content = self._stream.get_raw_content()
556
- raw_completion = self._stream.get_raw_completion() if hasattr(self._stream, "get_raw_completion") else None
557
- tool_calls = self._stream.get_tool_calls() if hasattr(self._stream, "get_tool_calls") else None
558
-
559
- return LanguageModelResponse(
560
- output=final_chunk.output,
561
- model=final_chunk.model or self._model or "unknown",
562
- completion=raw_completion,
563
- content=raw_content,
564
- tool_calls=tool_calls,
542
+ if not self._is_consumed and not self._chunks:
543
+ raise RuntimeError(
544
+ "Stream must be fully consumed before converting to response. "
545
+ "Use async_collect() or iterate through the stream first."
565
546
  )
566
- else:
567
- # For LiteLLM, combine content from all chunks
568
- content_parts = [chunk.content for chunk in self._chunks if chunk.content]
569
- combined_content = "".join(content_parts)
570
547
 
571
- # Create a mock completion for consistency
572
- mock_completion = None
573
- if self._chunks:
574
- mock_completion = self._chunks[-1].chunk
548
+ return await self.async_collect()
575
549
 
576
- return LanguageModelResponse(
577
- output=combined_content,
578
- model=self._model or "unknown",
579
- completion=mock_completion,
580
- content=combined_content,
581
- )
582
-
583
- async def to_response(self) -> LanguageModelResponse[T]:
584
- """Convert the stream to a LanguageModelResponse object.
550
+ def to_message(self) -> Any:
551
+ """Convert the stream to a ChatCompletionMessageParam (sync mode).
585
552
 
586
553
  This method can only be called after the stream has been fully consumed.
587
- It's an alias for collect() with a check for consumption state.
554
+ It converts the final response to a message format.
588
555
 
589
556
  Returns:
590
- LanguageModelResponse[T]: The complete response object
557
+ ChatCompletionMessageParam: The response as a chat message
591
558
 
592
559
  Raises:
593
- RuntimeError: If the stream has not been fully consumed
560
+ RuntimeError: If the stream has not been fully consumed or is async
594
561
  """
562
+ if self._is_async:
563
+ raise RuntimeError(
564
+ "Cannot use sync to_message() on async stream. Use async to_message() instead."
565
+ )
566
+
595
567
  if not self._is_consumed and not self._chunks:
596
568
  raise RuntimeError(
597
- "Stream must be fully consumed before converting to response. "
569
+ "Stream must be fully consumed before converting to message. "
598
570
  "Use collect() or iterate through the stream first."
599
571
  )
600
572
 
601
- return await self.collect()
573
+ response = self.collect()
574
+ return response.to_message()
602
575
 
603
- async def to_message(self) -> Any:
604
- """Convert the stream to a ChatCompletionMessageParam.
576
+ async def async_to_message(self) -> Any:
577
+ """Convert the stream to a ChatCompletionMessageParam (async mode).
605
578
 
606
579
  This method can only be called after the stream has been fully consumed.
607
580
  It converts the final response to a message format.
@@ -610,13 +583,18 @@ class AsyncStream(Generic[T]):
610
583
  ChatCompletionMessageParam: The response as a chat message
611
584
 
612
585
  Raises:
613
- RuntimeError: If the stream has not been fully consumed
586
+ RuntimeError: If the stream has not been fully consumed or is sync
614
587
  """
588
+ if not self._is_async:
589
+ raise RuntimeError(
590
+ "Cannot use async to_message() on sync stream. Use sync to_message() instead."
591
+ )
592
+
615
593
  if not self._is_consumed and not self._chunks:
616
594
  raise RuntimeError(
617
595
  "Stream must be fully consumed before converting to message. "
618
- "Use collect() or iterate through the stream first."
596
+ "Use async_collect() or iterate through the stream first."
619
597
  )
620
598
 
621
- response = await self.collect()
622
- return response.to_message()
599
+ response = await self.async_collect()
600
+ return response.to_message()