abstractcore 2.5.3__py3-none-any.whl → 2.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. abstractcore/__init__.py +7 -1
  2. abstractcore/architectures/detection.py +2 -2
  3. abstractcore/config/__init__.py +24 -1
  4. abstractcore/config/manager.py +47 -0
  5. abstractcore/core/retry.py +2 -2
  6. abstractcore/core/session.py +132 -1
  7. abstractcore/download.py +253 -0
  8. abstractcore/embeddings/manager.py +2 -2
  9. abstractcore/events/__init__.py +112 -1
  10. abstractcore/exceptions/__init__.py +49 -2
  11. abstractcore/media/processors/office_processor.py +2 -2
  12. abstractcore/media/utils/image_scaler.py +2 -2
  13. abstractcore/media/vision_fallback.py +2 -2
  14. abstractcore/providers/anthropic_provider.py +200 -6
  15. abstractcore/providers/base.py +100 -5
  16. abstractcore/providers/lmstudio_provider.py +254 -4
  17. abstractcore/providers/ollama_provider.py +253 -4
  18. abstractcore/providers/openai_provider.py +258 -6
  19. abstractcore/providers/registry.py +9 -1
  20. abstractcore/providers/streaming.py +2 -2
  21. abstractcore/tools/common_tools.py +2 -2
  22. abstractcore/tools/handler.py +2 -2
  23. abstractcore/tools/parser.py +2 -2
  24. abstractcore/tools/registry.py +2 -2
  25. abstractcore/tools/syntax_rewriter.py +2 -2
  26. abstractcore/tools/tag_rewriter.py +3 -3
  27. abstractcore/utils/self_fixes.py +2 -2
  28. abstractcore/utils/version.py +1 -1
  29. {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/METADATA +162 -4
  30. {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/RECORD +34 -33
  31. {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/WHEEL +0 -0
  32. {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/entry_points.txt +0 -0
  33. {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/licenses/LICENSE +0 -0
  34. {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/top_level.txt +0 -0
@@ -2,10 +2,11 @@
2
2
  LM Studio provider implementation (OpenAI-compatible API).
3
3
  """
4
4
 
5
+ import os
5
6
  import httpx
6
7
  import json
7
8
  import time
8
- from typing import List, Dict, Any, Optional, Union, Iterator, Type
9
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
9
10
 
10
11
  try:
11
12
  from pydantic import BaseModel
@@ -15,7 +16,7 @@ except ImportError:
15
16
  BaseModel = None
16
17
  from .base import BaseProvider
17
18
  from ..core.types import GenerateResponse
18
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
19
+ from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
19
20
  from ..tools import UniversalToolHandler, execute_tools
20
21
  from ..events import EventType
21
22
 
@@ -23,14 +24,19 @@ from ..events import EventType
23
24
  class LMStudioProvider(BaseProvider):
24
25
  """LM Studio provider using OpenAI-compatible API"""
25
26
 
26
- def __init__(self, model: str = "local-model", base_url: str = "http://localhost:1234/v1", **kwargs):
27
+ def __init__(self, model: str = "local-model", base_url: Optional[str] = None, **kwargs):
27
28
  super().__init__(model, **kwargs)
28
29
  self.provider = "lmstudio"
29
30
 
30
31
  # Initialize tool handler
31
32
  self.tool_handler = UniversalToolHandler(model)
32
33
 
33
- self.base_url = base_url.rstrip('/')
34
+ # Base URL priority: parameter > LMSTUDIO_BASE_URL > default
35
+ self.base_url = (
36
+ base_url or
37
+ os.getenv("LMSTUDIO_BASE_URL") or
38
+ "http://localhost:1234/v1"
39
+ ).rstrip('/')
34
40
 
35
41
  # Get timeout value - None means unlimited timeout
36
42
  timeout_value = getattr(self, '_timeout', None)
@@ -47,9 +53,21 @@ class LMStudioProvider(BaseProvider):
47
53
  except Exception:
48
54
  raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
49
55
 
56
+ self._async_client = None # Lazy-loaded async client
57
+
50
58
  # Validate model exists in LMStudio
51
59
  self._validate_model()
52
60
 
61
+ @property
62
+ def async_client(self):
63
+ """Lazy-load async HTTP client for native async operations."""
64
+ if self._async_client is None:
65
+ timeout_value = getattr(self, '_timeout', None)
66
+ if timeout_value is not None and timeout_value <= 0:
67
+ timeout_value = None
68
+ self._async_client = httpx.AsyncClient(timeout=timeout_value)
69
+ return self._async_client
70
+
53
71
  def _validate_model(self):
54
72
  """Validate that the model exists in LMStudio"""
55
73
  try:
@@ -87,6 +105,17 @@ class LMStudioProvider(BaseProvider):
87
105
  if hasattr(self, 'client') and self.client is not None:
88
106
  self.client.close()
89
107
 
108
+ # Close async client if it was created
109
+ if self._async_client is not None:
110
+ import asyncio
111
+ try:
112
+ loop = asyncio.get_running_loop()
113
+ loop.create_task(self._async_client.aclose())
114
+ except RuntimeError:
115
+ # No running loop
116
+ import asyncio
117
+ asyncio.run(self._async_client.aclose())
118
+
90
119
  except Exception as e:
91
120
  # Log but don't raise - unload should be best-effort
92
121
  if hasattr(self, 'logger'):
@@ -359,6 +388,227 @@ class LMStudioProvider(BaseProvider):
359
388
  finish_reason="error"
360
389
  )
361
390
 
391
+ async def _agenerate_internal(self,
392
+ prompt: str,
393
+ messages: Optional[List[Dict[str, str]]] = None,
394
+ system_prompt: Optional[str] = None,
395
+ tools: Optional[List[Dict[str, Any]]] = None,
396
+ media: Optional[List['MediaContent']] = None,
397
+ stream: bool = False,
398
+ response_model: Optional[Type[BaseModel]] = None,
399
+ execute_tools: Optional[bool] = None,
400
+ tool_call_tags: Optional[str] = None,
401
+ **kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
402
+ """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
403
+
404
+ # Build messages for chat completions with tool support (same logic as sync)
405
+ chat_messages = []
406
+
407
+ # Add tools to system prompt if provided
408
+ enhanced_system_prompt = system_prompt
409
+ if tools and self.tool_handler.supports_prompted:
410
+ tool_prompt = self.tool_handler.format_tools_prompt(tools)
411
+ if enhanced_system_prompt:
412
+ enhanced_system_prompt += f"\n\n{tool_prompt}"
413
+ else:
414
+ enhanced_system_prompt = tool_prompt
415
+
416
+ # Add system message if provided
417
+ if enhanced_system_prompt:
418
+ chat_messages.append({
419
+ "role": "system",
420
+ "content": enhanced_system_prompt
421
+ })
422
+
423
+ # Add conversation history
424
+ if messages:
425
+ chat_messages.extend(messages)
426
+
427
+ # Handle media content
428
+ if media:
429
+ user_message_text = prompt.strip() if prompt else ""
430
+ if not user_message_text and chat_messages:
431
+ for msg in reversed(chat_messages):
432
+ if msg.get("role") == "user" and msg.get("content"):
433
+ user_message_text = msg["content"]
434
+ break
435
+ try:
436
+ processed_media = self._process_media_content(media)
437
+ media_handler = self._get_media_handler_for_model(self.model)
438
+ multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
439
+
440
+ if isinstance(multimodal_message, str):
441
+ if chat_messages and chat_messages[-1].get("role") == "user":
442
+ chat_messages[-1]["content"] = multimodal_message
443
+ else:
444
+ chat_messages.append({"role": "user", "content": multimodal_message})
445
+ else:
446
+ if chat_messages and chat_messages[-1].get("role") == "user":
447
+ chat_messages[-1] = multimodal_message
448
+ else:
449
+ chat_messages.append(multimodal_message)
450
+ except ImportError:
451
+ self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
452
+ if user_message_text:
453
+ chat_messages.append({"role": "user", "content": user_message_text})
454
+ except Exception as e:
455
+ self.logger.warning(f"Failed to process media content: {e}")
456
+ if user_message_text:
457
+ chat_messages.append({"role": "user", "content": user_message_text})
458
+
459
+ # Add prompt as separate message if provided
460
+ elif prompt and prompt.strip():
461
+ chat_messages.append({"role": "user", "content": prompt})
462
+
463
+ # Build request payload
464
+ generation_kwargs = self._prepare_generation_kwargs(**kwargs)
465
+ max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
466
+
467
+ payload = {
468
+ "model": self.model,
469
+ "messages": chat_messages,
470
+ "stream": stream,
471
+ "temperature": kwargs.get("temperature", self.temperature),
472
+ "max_tokens": max_output_tokens,
473
+ "top_p": kwargs.get("top_p", 0.9),
474
+ }
475
+
476
+ # Add additional parameters
477
+ if "frequency_penalty" in kwargs:
478
+ payload["frequency_penalty"] = kwargs["frequency_penalty"]
479
+ if "presence_penalty" in kwargs:
480
+ payload["presence_penalty"] = kwargs["presence_penalty"]
481
+ if "repetition_penalty" in kwargs:
482
+ payload["repetition_penalty"] = kwargs["repetition_penalty"]
483
+
484
+ # Add seed if provided
485
+ seed_value = kwargs.get("seed", self.seed)
486
+ if seed_value is not None:
487
+ payload["seed"] = seed_value
488
+
489
+ # Add structured output support
490
+ if response_model and PYDANTIC_AVAILABLE:
491
+ json_schema = response_model.model_json_schema()
492
+ payload["response_format"] = {
493
+ "type": "json_schema",
494
+ "json_schema": {
495
+ "name": response_model.__name__,
496
+ "schema": json_schema
497
+ }
498
+ }
499
+
500
+ if stream:
501
+ return self._async_stream_generate(payload)
502
+ else:
503
+ response = await self._async_single_generate(payload)
504
+
505
+ # Execute tools if enabled
506
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
507
+ response = self._handle_prompted_tool_execution(response, tools, execute_tools)
508
+
509
+ return response
510
+
511
+ async def _async_single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
512
+ """Native async single response generation."""
513
+ try:
514
+ # Track generation time
515
+ start_time = time.time()
516
+ response = await self.async_client.post(
517
+ f"{self.base_url}/chat/completions",
518
+ json=payload,
519
+ headers={"Content-Type": "application/json"}
520
+ )
521
+ response.raise_for_status()
522
+ gen_time = round((time.time() - start_time) * 1000, 1)
523
+
524
+ result = response.json()
525
+
526
+ # Extract response from OpenAI format
527
+ if "choices" in result and len(result["choices"]) > 0:
528
+ choice = result["choices"][0]
529
+ content = choice.get("message", {}).get("content", "")
530
+ finish_reason = choice.get("finish_reason", "stop")
531
+ else:
532
+ content = "No response generated"
533
+ finish_reason = "error"
534
+
535
+ # Extract usage info
536
+ usage = result.get("usage", {})
537
+
538
+ return GenerateResponse(
539
+ content=content,
540
+ model=self.model,
541
+ finish_reason=finish_reason,
542
+ raw_response=result,
543
+ usage={
544
+ "input_tokens": usage.get("prompt_tokens", 0),
545
+ "output_tokens": usage.get("completion_tokens", 0),
546
+ "total_tokens": usage.get("total_tokens", 0),
547
+ "prompt_tokens": usage.get("prompt_tokens", 0),
548
+ "completion_tokens": usage.get("completion_tokens", 0)
549
+ },
550
+ gen_time=gen_time
551
+ )
552
+
553
+ except Exception as e:
554
+ error_str = str(e).lower()
555
+ if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
556
+ try:
557
+ available_models = self.list_available_models(base_url=self.base_url)
558
+ error_message = format_model_error("LMStudio", self.model, available_models)
559
+ raise ModelNotFoundError(error_message)
560
+ except Exception:
561
+ raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio")
562
+ else:
563
+ raise ProviderAPIError(f"LMStudio API error: {str(e)}")
564
+
565
+ async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
566
+ """Native async streaming response generation."""
567
+ try:
568
+ async with self.async_client.stream(
569
+ "POST",
570
+ f"{self.base_url}/chat/completions",
571
+ json=payload,
572
+ headers={"Content-Type": "application/json"}
573
+ ) as response:
574
+ response.raise_for_status()
575
+
576
+ async for line in response.aiter_lines():
577
+ if line:
578
+ line = line.strip()
579
+
580
+ if line.startswith("data: "):
581
+ data = line[6:] # Remove "data: " prefix
582
+
583
+ if data == "[DONE]":
584
+ break
585
+
586
+ try:
587
+ chunk = json.loads(data)
588
+
589
+ if "choices" in chunk and len(chunk["choices"]) > 0:
590
+ choice = chunk["choices"][0]
591
+ delta = choice.get("delta", {})
592
+ content = delta.get("content", "")
593
+ finish_reason = choice.get("finish_reason")
594
+
595
+ yield GenerateResponse(
596
+ content=content,
597
+ model=self.model,
598
+ finish_reason=finish_reason,
599
+ raw_response=chunk
600
+ )
601
+
602
+ except json.JSONDecodeError:
603
+ continue
604
+
605
+ except Exception as e:
606
+ yield GenerateResponse(
607
+ content=f"Error: {str(e)}",
608
+ model=self.model,
609
+ finish_reason="error"
610
+ )
611
+
362
612
  def get_capabilities(self) -> List[str]:
363
613
  """Get LM Studio capabilities"""
364
614
  return ["streaming", "chat", "tools"]
@@ -3,9 +3,10 @@ Ollama provider implementation.
3
3
  """
4
4
 
5
5
  import json
6
+ import os
6
7
  import httpx
7
8
  import time
8
- from typing import List, Dict, Any, Optional, Union, Iterator, Type
9
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
9
10
 
10
11
  try:
11
12
  from pydantic import BaseModel
@@ -15,7 +16,7 @@ except ImportError:
15
16
  BaseModel = None
16
17
  from .base import BaseProvider
17
18
  from ..core.types import GenerateResponse
18
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
19
+ from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
19
20
  from ..tools import UniversalToolHandler, ToolDefinition, execute_tools
20
21
  from ..events import EventType
21
22
 
@@ -23,16 +24,33 @@ from ..events import EventType
23
24
  class OllamaProvider(BaseProvider):
24
25
  """Ollama provider for local models with full integration"""
25
26
 
26
- def __init__(self, model: str = "qwen3:4b-instruct-2507-q4_K_M", base_url: str = "http://localhost:11434", **kwargs):
27
+ def __init__(self, model: str = "qwen3:4b-instruct-2507-q4_K_M", base_url: Optional[str] = None, **kwargs):
27
28
  super().__init__(model, **kwargs)
28
29
  self.provider = "ollama"
29
30
 
30
- self.base_url = base_url.rstrip('/')
31
+ # Base URL priority: parameter > OLLAMA_BASE_URL > OLLAMA_HOST > default
32
+ self.base_url = (
33
+ base_url or
34
+ os.getenv("OLLAMA_BASE_URL") or
35
+ os.getenv("OLLAMA_HOST") or
36
+ "http://localhost:11434"
37
+ ).rstrip('/')
31
38
  self.client = httpx.Client(timeout=self._timeout)
39
+ self._async_client = None # Lazy-loaded async client
32
40
 
33
41
  # Initialize tool handler
34
42
  self.tool_handler = UniversalToolHandler(model)
35
43
 
44
+ @property
45
+ def async_client(self):
46
+ """Lazy-load async HTTP client for native async operations."""
47
+ if self._async_client is None:
48
+ self._async_client = httpx.AsyncClient(
49
+ base_url=self.base_url,
50
+ timeout=self._timeout
51
+ )
52
+ return self._async_client
53
+
36
54
  def unload(self) -> None:
37
55
  """
38
56
  Unload the model from Ollama server memory.
@@ -59,6 +77,17 @@ class OllamaProvider(BaseProvider):
59
77
  if hasattr(self, 'client') and self.client is not None:
60
78
  self.client.close()
61
79
 
80
+ # Close async client if it was created
81
+ if self._async_client is not None:
82
+ import asyncio
83
+ try:
84
+ loop = asyncio.get_running_loop()
85
+ loop.create_task(self._async_client.aclose())
86
+ except RuntimeError:
87
+ # No running loop, close synchronously
88
+ import asyncio
89
+ asyncio.run(self._async_client.aclose())
90
+
62
91
  except Exception as e:
63
92
  # Log but don't raise - unload should be best-effort
64
93
  if hasattr(self, 'logger'):
@@ -377,6 +406,226 @@ class OllamaProvider(BaseProvider):
377
406
  finish_reason="error"
378
407
  )
379
408
 
409
+ async def _agenerate_internal(self,
410
+ prompt: str,
411
+ messages: Optional[List[Dict]],
412
+ system_prompt: Optional[str],
413
+ tools: Optional[List],
414
+ media: Optional[List],
415
+ stream: bool,
416
+ **kwargs):
417
+ """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
418
+ # Handle tools for prompted models
419
+ effective_system_prompt = system_prompt
420
+ if tools and self.tool_handler.supports_prompted:
421
+ tool_prompt = self.tool_handler.format_tools_prompt(tools)
422
+ if effective_system_prompt:
423
+ effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
424
+ else:
425
+ effective_system_prompt = tool_prompt
426
+
427
+ # Build request payload (same logic as sync)
428
+ generation_kwargs = self._prepare_generation_kwargs(**kwargs)
429
+ max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
430
+ response_model = kwargs.get('response_model')
431
+
432
+ payload = {
433
+ "model": self.model,
434
+ "stream": stream,
435
+ "options": {
436
+ "temperature": kwargs.get("temperature", self.temperature),
437
+ "num_predict": max_output_tokens,
438
+ }
439
+ }
440
+
441
+ seed_value = kwargs.get("seed", self.seed)
442
+ if seed_value is not None:
443
+ payload["options"]["seed"] = seed_value
444
+
445
+ # Add structured output support
446
+ if response_model and PYDANTIC_AVAILABLE:
447
+ json_schema = response_model.model_json_schema()
448
+ payload["format"] = json_schema
449
+
450
+ # Use chat format
451
+ use_chat_format = tools is not None or messages is not None or True
452
+
453
+ if use_chat_format:
454
+ payload["messages"] = []
455
+
456
+ if effective_system_prompt:
457
+ payload["messages"].append({
458
+ "role": "system",
459
+ "content": effective_system_prompt
460
+ })
461
+
462
+ if messages:
463
+ converted_messages = self._convert_messages_for_ollama(messages)
464
+ payload["messages"].extend(converted_messages)
465
+
466
+ if media:
467
+ user_message_text = prompt.strip() if prompt else ""
468
+ try:
469
+ from ..media.handlers import LocalMediaHandler
470
+ media_handler = LocalMediaHandler("ollama", self.model_capabilities, model_name=self.model)
471
+ multimodal_message = media_handler.create_multimodal_message(user_message_text, media)
472
+
473
+ if isinstance(multimodal_message, str):
474
+ payload["messages"].append({"role": "user", "content": multimodal_message})
475
+ else:
476
+ payload["messages"].append(multimodal_message)
477
+ except Exception as e:
478
+ if hasattr(self, 'logger'):
479
+ self.logger.warning(f"Failed to process media: {e}")
480
+ if user_message_text:
481
+ payload["messages"].append({"role": "user", "content": user_message_text})
482
+
483
+ elif prompt and prompt.strip():
484
+ payload["messages"].append({"role": "user", "content": prompt})
485
+
486
+ endpoint = "/api/chat"
487
+ else:
488
+ full_prompt = prompt
489
+ if effective_system_prompt:
490
+ full_prompt = f"{effective_system_prompt}\n\n{prompt}"
491
+ payload["prompt"] = full_prompt
492
+ endpoint = "/api/generate"
493
+
494
+ if stream:
495
+ return self._async_stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
496
+ else:
497
+ return await self._async_single_generate(endpoint, payload, tools, kwargs.get('media_metadata'))
498
+
499
+ async def _async_single_generate(self, endpoint: str, payload: Dict[str, Any],
500
+ tools: Optional[List[Dict[str, Any]]] = None,
501
+ media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
502
+ """Native async single response generation."""
503
+ try:
504
+ start_time = time.time()
505
+ response = await self.async_client.post(endpoint, json=payload)
506
+ response.raise_for_status()
507
+ gen_time = round((time.time() - start_time) * 1000, 1)
508
+
509
+ result = response.json()
510
+
511
+ if endpoint == "/api/chat":
512
+ content = result.get("message", {}).get("content", "")
513
+ else:
514
+ content = result.get("response", "")
515
+
516
+ generate_response = GenerateResponse(
517
+ content=content,
518
+ model=self.model,
519
+ finish_reason="stop",
520
+ raw_response=result,
521
+ usage={
522
+ "input_tokens": result.get("prompt_eval_count", 0),
523
+ "output_tokens": result.get("eval_count", 0),
524
+ "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0),
525
+ "prompt_tokens": result.get("prompt_eval_count", 0),
526
+ "completion_tokens": result.get("eval_count", 0)
527
+ },
528
+ gen_time=gen_time
529
+ )
530
+
531
+ if media_metadata:
532
+ if not generate_response.metadata:
533
+ generate_response.metadata = {}
534
+ generate_response.metadata['media_metadata'] = media_metadata
535
+
536
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
537
+ return self._handle_tool_execution(generate_response, tools)
538
+
539
+ return generate_response
540
+
541
+ except Exception as e:
542
+ error_str = str(e).lower()
543
+ if ('404' in error_str or 'not found' in error_str):
544
+ available_models = self.list_available_models(base_url=self.base_url)
545
+ error_message = format_model_error("Ollama", self.model, available_models)
546
+ raise ModelNotFoundError(error_message)
547
+ else:
548
+ return GenerateResponse(
549
+ content=f"Error: {str(e)}",
550
+ model=self.model,
551
+ finish_reason="error"
552
+ )
553
+
554
+ async def _async_stream_generate(self, endpoint: str, payload: Dict[str, Any],
555
+ tools: Optional[List[Dict[str, Any]]] = None,
556
+ tool_call_tags: Optional[str] = None):
557
+ """Native async streaming response generation."""
558
+ try:
559
+ async with self.async_client.stream("POST", endpoint, json=payload) as response:
560
+ response.raise_for_status()
561
+
562
+ full_content = ""
563
+ rewriter = None
564
+ buffer = ""
565
+ if tool_call_tags:
566
+ try:
567
+ from ..tools.tag_rewriter import create_tag_rewriter
568
+ rewriter = create_tag_rewriter(tool_call_tags)
569
+ except ImportError:
570
+ pass
571
+
572
+ async for line in response.aiter_lines():
573
+ if line:
574
+ try:
575
+ chunk = json.loads(line)
576
+
577
+ if endpoint == "/api/chat":
578
+ content = chunk.get("message", {}).get("content", "")
579
+ else:
580
+ content = chunk.get("response", "")
581
+
582
+ done = chunk.get("done", False)
583
+ full_content += content
584
+
585
+ if rewriter and content:
586
+ rewritten_content, buffer = rewriter.rewrite_streaming_chunk(content, buffer)
587
+ content = rewritten_content
588
+
589
+ chunk_response = GenerateResponse(
590
+ content=content,
591
+ model=self.model,
592
+ finish_reason="stop" if done else None,
593
+ raw_response=chunk
594
+ )
595
+
596
+ yield chunk_response
597
+
598
+ if done:
599
+ break
600
+
601
+ except json.JSONDecodeError:
602
+ continue
603
+
604
+ # Execute tools if enabled
605
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and full_content:
606
+ complete_response = GenerateResponse(
607
+ content=full_content,
608
+ model=self.model,
609
+ finish_reason="stop"
610
+ )
611
+
612
+ final_response = self._handle_tool_execution(complete_response, tools)
613
+
614
+ if final_response.content != full_content:
615
+ tool_results_content = final_response.content[len(full_content):]
616
+ yield GenerateResponse(
617
+ content=tool_results_content,
618
+ model=self.model,
619
+ finish_reason="stop"
620
+ )
621
+
622
+ except Exception as e:
623
+ yield GenerateResponse(
624
+ content=f"Error: {str(e)}",
625
+ model=self.model,
626
+ finish_reason="error"
627
+ )
628
+
380
629
  def _handle_tool_execution(self, response: GenerateResponse, tools: List[Dict[str, Any]]) -> GenerateResponse:
381
630
  """Handle tool execution for prompted models"""
382
631
  # Parse tool calls from response