abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. abstractcore/__init__.py +19 -1
  2. abstractcore/architectures/detection.py +252 -6
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/retry.py +2 -2
  20. abstractcore/core/session.py +193 -7
  21. abstractcore/download.py +253 -0
  22. abstractcore/embeddings/manager.py +2 -2
  23. abstractcore/events/__init__.py +113 -2
  24. abstractcore/exceptions/__init__.py +49 -2
  25. abstractcore/media/auto_handler.py +312 -18
  26. abstractcore/media/handlers/local_handler.py +14 -2
  27. abstractcore/media/handlers/openai_handler.py +62 -3
  28. abstractcore/media/processors/__init__.py +11 -1
  29. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  30. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  31. abstractcore/media/processors/image_processor.py +7 -1
  32. abstractcore/media/processors/office_processor.py +2 -2
  33. abstractcore/media/processors/text_processor.py +18 -3
  34. abstractcore/media/types.py +164 -7
  35. abstractcore/media/utils/image_scaler.py +2 -2
  36. abstractcore/media/vision_fallback.py +2 -2
  37. abstractcore/providers/__init__.py +18 -0
  38. abstractcore/providers/anthropic_provider.py +228 -8
  39. abstractcore/providers/base.py +378 -11
  40. abstractcore/providers/huggingface_provider.py +563 -23
  41. abstractcore/providers/lmstudio_provider.py +284 -4
  42. abstractcore/providers/mlx_provider.py +27 -2
  43. abstractcore/providers/model_capabilities.py +352 -0
  44. abstractcore/providers/ollama_provider.py +282 -6
  45. abstractcore/providers/openai_provider.py +286 -8
  46. abstractcore/providers/registry.py +85 -13
  47. abstractcore/providers/streaming.py +2 -2
  48. abstractcore/server/app.py +91 -81
  49. abstractcore/tools/common_tools.py +2 -2
  50. abstractcore/tools/handler.py +2 -2
  51. abstractcore/tools/parser.py +2 -2
  52. abstractcore/tools/registry.py +2 -2
  53. abstractcore/tools/syntax_rewriter.py +2 -2
  54. abstractcore/tools/tag_rewriter.py +3 -3
  55. abstractcore/utils/__init__.py +4 -1
  56. abstractcore/utils/self_fixes.py +2 -2
  57. abstractcore/utils/trace_export.py +287 -0
  58. abstractcore/utils/version.py +1 -1
  59. abstractcore/utils/vlm_token_calculator.py +655 -0
  60. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
  61. abstractcore-2.6.0.dist-info/RECORD +108 -0
  62. abstractcore-2.5.2.dist-info/RECORD +0 -90
  63. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
  64. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
  65. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
  66. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,10 @@ Base provider with integrated telemetry, events, and exception handling.
3
3
  """
4
4
 
5
5
  import time
6
- from typing import List, Dict, Any, Optional, Union, Iterator, Type
6
+ import uuid
7
+ import asyncio
8
+ from collections import deque
9
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
7
10
  from abc import ABC, abstractmethod
8
11
 
9
12
  try:
@@ -67,6 +70,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
67
70
 
68
71
  # Create provider key for circuit breaker tracking
69
72
  self.provider_key = f"{self.__class__.__name__}:{self.model}"
73
+
74
+ # Setup Glyph compression configuration
75
+ self.glyph_config = kwargs.get('glyph_config', None)
76
+
77
+ # Setup interaction tracing
78
+ self.enable_tracing = kwargs.get('enable_tracing', False)
79
+ self._traces = deque(maxlen=kwargs.get('max_traces', 100)) # Ring buffer for memory efficiency
70
80
 
71
81
  # Provider created successfully - no event emission needed
72
82
  # (The simplified event system focuses on generation and tool events only)
@@ -173,6 +183,97 @@ class BaseProvider(AbstractCoreInterface, ABC):
173
183
  result_info = f" (result length: {len(str(result))})" if result else ""
174
184
  self.logger.info(f"Tool call completed: {tool_name}{result_info}")
175
185
 
186
+ def _capture_trace(self, prompt: str, messages: Optional[List[Dict[str, str]]],
187
+ system_prompt: Optional[str], tools: Optional[List[Dict[str, Any]]],
188
+ response: GenerateResponse, kwargs: Dict[str, Any]) -> str:
189
+ """
190
+ Capture interaction trace for observability.
191
+
192
+ Args:
193
+ prompt: Input prompt
194
+ messages: Conversation history
195
+ system_prompt: System prompt
196
+ tools: Available tools
197
+ response: Generated response
198
+ kwargs: Additional generation parameters
199
+
200
+ Returns:
201
+ Trace ID (UUID string)
202
+ """
203
+ trace_id = str(uuid.uuid4())
204
+
205
+ # Extract generation parameters
206
+ temperature = kwargs.get('temperature', self.temperature)
207
+ max_tokens = kwargs.get('max_tokens', self.max_tokens)
208
+ max_output_tokens = kwargs.get('max_output_tokens', self.max_output_tokens)
209
+ seed = kwargs.get('seed', self.seed)
210
+ top_p = kwargs.get('top_p', getattr(self, 'top_p', None))
211
+ top_k = kwargs.get('top_k', getattr(self, 'top_k', None))
212
+
213
+ # Build parameters dict
214
+ parameters = {
215
+ 'temperature': temperature,
216
+ 'max_tokens': max_tokens,
217
+ 'max_output_tokens': max_output_tokens,
218
+ }
219
+ if seed is not None:
220
+ parameters['seed'] = seed
221
+ if top_p is not None:
222
+ parameters['top_p'] = top_p
223
+ if top_k is not None:
224
+ parameters['top_k'] = top_k
225
+
226
+ # Create trace record
227
+ trace = {
228
+ 'trace_id': trace_id,
229
+ 'timestamp': datetime.now().isoformat(),
230
+ 'provider': self.__class__.__name__,
231
+ 'model': self.model,
232
+ 'system_prompt': system_prompt,
233
+ 'prompt': prompt,
234
+ 'messages': messages,
235
+ 'tools': tools,
236
+ 'parameters': parameters,
237
+ 'response': {
238
+ 'content': response.content,
239
+ 'raw_response': None, # Omit raw_response to save memory and avoid logging sensitive data
240
+ 'tool_calls': response.tool_calls,
241
+ 'finish_reason': response.finish_reason,
242
+ 'usage': response.usage,
243
+ 'generation_time_ms': response.gen_time,
244
+ },
245
+ 'metadata': kwargs.get('trace_metadata', {})
246
+ }
247
+
248
+ # Store trace in ring buffer
249
+ self._traces.append(trace)
250
+
251
+ return trace_id
252
+
253
+ def get_traces(self, trace_id: Optional[str] = None, last_n: Optional[int] = None) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
254
+ """
255
+ Retrieve interaction traces.
256
+
257
+ Args:
258
+ trace_id: Optional specific trace ID to retrieve
259
+ last_n: Optional number of most recent traces to retrieve
260
+
261
+ Returns:
262
+ Single trace dict if trace_id provided, list of traces otherwise
263
+ """
264
+ if trace_id:
265
+ # Find specific trace by ID
266
+ for trace in self._traces:
267
+ if trace['trace_id'] == trace_id:
268
+ return trace
269
+ return None
270
+
271
+ if last_n:
272
+ # Return last N traces
273
+ return list(self._traces)[-last_n:] if len(self._traces) >= last_n else list(self._traces)
274
+
275
+ # Return all traces
276
+ return list(self._traces)
176
277
 
177
278
  def _handle_api_error(self, error: Exception) -> Exception:
178
279
  """
@@ -211,6 +312,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
211
312
  retry_strategy=None, # Custom retry strategy for structured output
212
313
  tool_call_tags: Optional[str] = None, # Tool call tag rewriting
213
314
  execute_tools: Optional[bool] = None, # Tool execution control
315
+ glyph_compression: Optional[str] = None, # Glyph compression preference
214
316
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse], BaseModel]:
215
317
  """
216
318
  Generate with integrated telemetry and error handling.
@@ -227,6 +329,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
227
329
  retry_strategy: Optional retry strategy for structured output validation
228
330
  tool_call_tags: Optional tool call tag format for rewriting
229
331
  execute_tools: Whether to execute tools automatically (True) or let agent handle execution (False)
332
+ glyph_compression: Glyph compression preference ("auto", "always", "never")
230
333
  """
231
334
  # Handle structured output request
232
335
  if response_model is not None:
@@ -269,8 +372,17 @@ class BaseProvider(AbstractCoreInterface, ABC):
269
372
 
270
373
  # Process media content if provided
271
374
  processed_media = None
375
+ media_metadata = None
272
376
  if media:
273
- processed_media = self._process_media_content(media)
377
+ compression_pref = glyph_compression or kwargs.get('glyph_compression', 'auto')
378
+ processed_media = self._process_media_content(media, compression_pref)
379
+
380
+ # Extract metadata from processed media for response
381
+ if processed_media:
382
+ media_metadata = []
383
+ for media_content in processed_media:
384
+ if hasattr(media_content, 'metadata') and media_content.metadata:
385
+ media_metadata.append(media_content.metadata)
274
386
 
275
387
  # Convert tools to ToolDefinition objects first (outside retry loop)
276
388
  converted_tools = None
@@ -327,6 +439,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
327
439
  stream=stream,
328
440
  execute_tools=should_execute_tools,
329
441
  tool_call_tags=tool_call_tags,
442
+ media_metadata=media_metadata,
330
443
  **kwargs
331
444
  )
332
445
 
@@ -380,6 +493,26 @@ class BaseProvider(AbstractCoreInterface, ABC):
380
493
  # Apply default qwen3 rewriting for non-streaming responses
381
494
  response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
382
495
 
496
+ # Add visual token calculation if media metadata is available
497
+ if media_metadata and response:
498
+ self.logger.debug(f"Enhancing response with visual tokens from {len(media_metadata)} media items")
499
+ response = self._enhance_response_with_visual_tokens(response, media_metadata)
500
+
501
+ # Capture interaction trace if enabled
502
+ if self.enable_tracing and response:
503
+ trace_id = self._capture_trace(
504
+ prompt=prompt,
505
+ messages=messages,
506
+ system_prompt=system_prompt,
507
+ tools=converted_tools,
508
+ response=response,
509
+ kwargs=kwargs
510
+ )
511
+ # Attach trace_id to response metadata
512
+ if not response.metadata:
513
+ response.metadata = {}
514
+ response.metadata['trace_id'] = trace_id
515
+
383
516
  self._track_generation(prompt, response, start_time, success=True, stream=False)
384
517
  return response
385
518
 
@@ -411,6 +544,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
411
544
  stream: bool = False,
412
545
  response_model: Optional[Type[BaseModel]] = None,
413
546
  execute_tools: Optional[bool] = None,
547
+ media_metadata: Optional[List[Dict[str, Any]]] = None,
414
548
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
415
549
  """
416
550
  Internal generation method to be implemented by subclasses.
@@ -429,6 +563,102 @@ class BaseProvider(AbstractCoreInterface, ABC):
429
563
  """
430
564
  raise NotImplementedError("Subclasses must implement _generate_internal")
431
565
 
566
+ def _enhance_response_with_visual_tokens(self, response: GenerateResponse, media_metadata: List[Dict[str, Any]]) -> GenerateResponse:
567
+ """
568
+ Enhance the response with visual token calculations for Glyph compression.
569
+ This method is called automatically by BaseProvider for all providers.
570
+ """
571
+ try:
572
+ # Calculate visual tokens using VLM token calculator
573
+ provider_name = self.provider or self.__class__.__name__.lower().replace('provider', '')
574
+ self.logger.debug(f"Calculating visual tokens for provider={provider_name}, model={self.model}")
575
+
576
+ visual_tokens = self._calculate_visual_tokens(media_metadata, provider_name, self.model)
577
+ self.logger.debug(f"Calculated visual tokens: {visual_tokens}")
578
+
579
+ if visual_tokens > 0:
580
+ # Ensure response has metadata
581
+ if not response.metadata:
582
+ response.metadata = {}
583
+
584
+ # Add visual token information to metadata
585
+ response.metadata['visual_tokens'] = visual_tokens
586
+
587
+ # Ensure response has usage dict
588
+ if not response.usage:
589
+ response.usage = {}
590
+
591
+ # Add visual tokens to usage
592
+ response.usage['visual_tokens'] = visual_tokens
593
+
594
+ # Update total tokens to include visual tokens
595
+ original_total = response.usage.get('total_tokens', 0)
596
+ response.usage['total_tokens'] = original_total + visual_tokens
597
+
598
+ self.logger.info(f"Enhanced response with {visual_tokens} visual tokens (new total: {response.usage['total_tokens']})")
599
+ else:
600
+ self.logger.debug("No visual tokens calculated - skipping enhancement")
601
+
602
+ except Exception as e:
603
+ self.logger.warning(f"Failed to enhance response with visual tokens: {e}")
604
+
605
+ return response
606
+
607
+ def _calculate_visual_tokens(self, media_metadata: List[Dict[str, Any]], provider: str, model: str) -> int:
608
+ """Calculate visual tokens from media metadata using VLM token calculator."""
609
+ try:
610
+ from ..utils.vlm_token_calculator import VLMTokenCalculator
611
+ from pathlib import Path
612
+
613
+ calculator = VLMTokenCalculator()
614
+ total_visual_tokens = 0
615
+
616
+ self.logger.debug(f"Processing {len(media_metadata)} media metadata items")
617
+
618
+ for i, metadata in enumerate(media_metadata):
619
+ self.logger.debug(f"Metadata {i}: processing_method={metadata.get('processing_method')}")
620
+
621
+ # Check if this is Glyph compression
622
+ if metadata.get('processing_method') == 'direct_pdf_conversion':
623
+ glyph_cache_dir = metadata.get('glyph_cache_dir')
624
+ total_images = metadata.get('total_images', 0)
625
+
626
+ self.logger.debug(f"Glyph metadata found: cache_dir={glyph_cache_dir}, total_images={total_images}")
627
+
628
+ if glyph_cache_dir and Path(glyph_cache_dir).exists():
629
+ # Get actual image paths
630
+ cache_dir = Path(glyph_cache_dir)
631
+ image_paths = list(cache_dir.glob("image_*.png"))
632
+
633
+ self.logger.debug(f"Found {len(image_paths)} images in cache directory")
634
+
635
+ if image_paths:
636
+ # Calculate tokens for all images
637
+ token_analysis = calculator.calculate_tokens_for_images(
638
+ image_paths=image_paths,
639
+ provider=provider,
640
+ model=model
641
+ )
642
+ total_visual_tokens += token_analysis['total_tokens']
643
+
644
+ self.logger.debug(f"Calculated {token_analysis['total_tokens']} visual tokens for {len(image_paths)} Glyph images")
645
+ else:
646
+ # Fallback: estimate based on total_images
647
+ base_tokens = calculator.PROVIDER_CONFIGS.get(provider, {}).get('base_tokens', 512)
648
+ estimated_tokens = total_images * base_tokens
649
+ total_visual_tokens += estimated_tokens
650
+
651
+ self.logger.debug(f"Estimated {estimated_tokens} visual tokens for {total_images} Glyph images (fallback)")
652
+ else:
653
+ self.logger.debug(f"Cache directory not found or doesn't exist: {glyph_cache_dir}")
654
+
655
+ self.logger.debug(f"Total visual tokens calculated: {total_visual_tokens}")
656
+ return total_visual_tokens
657
+
658
+ except Exception as e:
659
+ self.logger.warning(f"Failed to calculate visual tokens: {e}")
660
+ return 0
661
+
432
662
  def _initialize_token_limits(self):
433
663
  """Initialize default token limits based on model capabilities"""
434
664
  # Set default max_tokens if not provided
@@ -805,12 +1035,14 @@ class BaseProvider(AbstractCoreInterface, ABC):
805
1035
  """Rough estimation of token count for given text"""
806
1036
  return super().estimate_tokens(text)
807
1037
 
808
- def _process_media_content(self, media: List[Union[str, Dict[str, Any], 'MediaContent']]) -> List['MediaContent']:
1038
+ def _process_media_content(self, media: List[Union[str, Dict[str, Any], 'MediaContent']],
1039
+ glyph_compression: str = "auto") -> List['MediaContent']:
809
1040
  """
810
1041
  Process media content from various input formats into standardized MediaContent objects.
811
1042
 
812
1043
  Args:
813
1044
  media: List of media inputs (file paths, MediaContent objects, or dicts)
1045
+ glyph_compression: Glyph compression preference (auto, always, never)
814
1046
 
815
1047
  Returns:
816
1048
  List of processed MediaContent objects
@@ -838,8 +1070,16 @@ class BaseProvider(AbstractCoreInterface, ABC):
838
1070
  try:
839
1071
  if isinstance(media_item, str):
840
1072
  # File path - process with auto media handler
841
- handler = AutoMediaHandler()
842
- result = handler.process_file(media_item)
1073
+ handler = AutoMediaHandler(
1074
+ enable_glyph_compression=True,
1075
+ glyph_config=getattr(self, 'glyph_config', None)
1076
+ )
1077
+ result = handler.process_file(
1078
+ media_item,
1079
+ provider=self.provider,
1080
+ model=self.model,
1081
+ glyph_compression=glyph_compression
1082
+ )
843
1083
  if result.success:
844
1084
  processed_media.append(result.media_content)
845
1085
  else:
@@ -881,14 +1121,47 @@ class BaseProvider(AbstractCoreInterface, ABC):
881
1121
  The server will use this method to aggregate models across all providers.
882
1122
 
883
1123
  Args:
884
- **kwargs: Provider-specific parameters (e.g., api_key, base_url)
1124
+ **kwargs: Provider-specific parameters including:
1125
+ - api_key: API key for authentication (if required)
1126
+ - base_url: Base URL for API endpoint (if applicable)
1127
+ - input_capabilities: Optional list of ModelInputCapability enums to filter by input capability
1128
+ (e.g., [ModelInputCapability.IMAGE] for vision models)
1129
+ - output_capabilities: Optional list of ModelOutputCapability enums to filter by output capability
1130
+ (e.g., [ModelOutputCapability.EMBEDDINGS] for embedding models)
885
1131
 
886
1132
  Returns:
887
- List of model names available for this provider
1133
+ List of model names available for this provider, optionally filtered by capabilities
1134
+
1135
+ Examples:
1136
+ >>> from abstractcore.providers import OpenAIProvider
1137
+ >>> from abstractcore.providers.model_capabilities import ModelInputCapability, ModelOutputCapability
1138
+ >>>
1139
+ >>> # Get all models
1140
+ >>> all_models = OpenAIProvider.list_available_models(api_key="...")
1141
+ >>>
1142
+ >>> # Get models that can analyze images
1143
+ >>> vision_models = OpenAIProvider.list_available_models(
1144
+ ... api_key="...",
1145
+ ... input_capabilities=[ModelInputCapability.IMAGE]
1146
+ ... )
1147
+ >>>
1148
+ >>> # Get embedding models
1149
+ >>> embedding_models = OpenAIProvider.list_available_models(
1150
+ ... api_key="...",
1151
+ ... output_capabilities=[ModelOutputCapability.EMBEDDINGS]
1152
+ ... )
1153
+ >>>
1154
+ >>> # Get vision models that generate text (most common case)
1155
+ >>> vision_text_models = OpenAIProvider.list_available_models(
1156
+ ... api_key="...",
1157
+ ... input_capabilities=[ModelInputCapability.TEXT, ModelInputCapability.IMAGE],
1158
+ ... output_capabilities=[ModelOutputCapability.TEXT]
1159
+ ... )
888
1160
 
889
1161
  Note:
890
1162
  This is an abstract method that MUST be implemented by all provider subclasses.
891
1163
  Each provider should implement its own discovery logic (API calls, local scanning, etc.).
1164
+ Providers should apply the capability filters if provided in kwargs.
892
1165
  """
893
1166
  pass
894
1167
 
@@ -1168,9 +1441,9 @@ Please provide a structured response."""
1168
1441
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse], BaseModel]:
1169
1442
  """
1170
1443
  Generate response from the LLM.
1171
-
1444
+
1172
1445
  This method implements the AbstractCoreInterface and delegates to generate_with_telemetry.
1173
-
1446
+
1174
1447
  Args:
1175
1448
  prompt: The input prompt
1176
1449
  messages: Optional conversation history
@@ -1178,7 +1451,7 @@ Please provide a structured response."""
1178
1451
  tools: Optional list of available tools
1179
1452
  stream: Whether to stream the response
1180
1453
  **kwargs: Additional provider-specific parameters (including response_model)
1181
-
1454
+
1182
1455
  Returns:
1183
1456
  GenerateResponse, iterator of GenerateResponse for streaming, or BaseModel for structured output
1184
1457
  """
@@ -1189,4 +1462,98 @@ Please provide a structured response."""
1189
1462
  tools=tools,
1190
1463
  stream=stream,
1191
1464
  **kwargs
1192
- )
1465
+ )
1466
+
1467
+ async def agenerate(self,
1468
+ prompt: str = "",
1469
+ messages: Optional[List[Dict]] = None,
1470
+ system_prompt: Optional[str] = None,
1471
+ tools: Optional[List] = None,
1472
+ media: Optional[List] = None,
1473
+ stream: bool = False,
1474
+ **kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse], BaseModel]:
1475
+ """
1476
+ Async generation - works with all providers.
1477
+
1478
+ Calls _agenerate_internal() which can be overridden for native async.
1479
+ Default implementation uses asyncio.to_thread() fallback.
1480
+
1481
+ Args:
1482
+ prompt: Text prompt
1483
+ messages: Conversation history
1484
+ system_prompt: System instructions
1485
+ tools: Available tools
1486
+ media: Media attachments
1487
+ stream: Enable streaming
1488
+ **kwargs: Additional generation parameters (including response_model)
1489
+
1490
+ Returns:
1491
+ GenerateResponse, AsyncIterator[GenerateResponse] for streaming, or BaseModel for structured output
1492
+ """
1493
+ return await self._agenerate_internal(
1494
+ prompt, messages, system_prompt, tools, media, stream, **kwargs
1495
+ )
1496
+
1497
+ async def _agenerate_internal(self,
1498
+ prompt: str,
1499
+ messages: Optional[List[Dict]],
1500
+ system_prompt: Optional[str],
1501
+ tools: Optional[List],
1502
+ media: Optional[List],
1503
+ stream: bool,
1504
+ **kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse], BaseModel]:
1505
+ """
1506
+ Internal async generation method.
1507
+
1508
+ Default implementation: Uses asyncio.to_thread() to run sync generate().
1509
+ Providers override this for native async (3-10x faster for batch operations).
1510
+
1511
+ Args:
1512
+ prompt: Text prompt
1513
+ messages: Conversation history
1514
+ system_prompt: System instructions
1515
+ tools: Available tools
1516
+ media: Media attachments
1517
+ stream: Enable streaming
1518
+ **kwargs: Additional generation parameters
1519
+
1520
+ Returns:
1521
+ GenerateResponse, AsyncIterator[GenerateResponse] for streaming, or BaseModel for structured output
1522
+ """
1523
+ if stream:
1524
+ # Return async iterator for streaming
1525
+ return self._async_stream_generate(
1526
+ prompt, messages, system_prompt, tools, media, **kwargs
1527
+ )
1528
+ else:
1529
+ # Run sync generate in thread pool (fallback)
1530
+ return await asyncio.to_thread(
1531
+ self.generate,
1532
+ prompt, messages, system_prompt, tools, stream, **kwargs
1533
+ )
1534
+
1535
+ async def _async_stream_generate(self,
1536
+ prompt: str,
1537
+ messages: Optional[List[Dict]],
1538
+ system_prompt: Optional[str],
1539
+ tools: Optional[List],
1540
+ media: Optional[List],
1541
+ **kwargs) -> AsyncIterator[GenerateResponse]:
1542
+ """
1543
+ Async streaming generator.
1544
+
1545
+ Wraps sync streaming in async iterator, yielding control to event loop.
1546
+ """
1547
+ # Get sync generator in thread pool
1548
+ def get_sync_stream():
1549
+ return self.generate(
1550
+ prompt, messages, system_prompt, tools,
1551
+ stream=True, **kwargs
1552
+ )
1553
+
1554
+ sync_gen = await asyncio.to_thread(get_sync_stream)
1555
+
1556
+ # Yield chunks asynchronously
1557
+ for chunk in sync_gen:
1558
+ yield chunk
1559
+ await asyncio.sleep(0) # Yield control to event loop