abstractcore 2.5.3__py3-none-any.whl → 2.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -1
- abstractcore/architectures/detection.py +2 -2
- abstractcore/config/__init__.py +24 -1
- abstractcore/config/manager.py +47 -0
- abstractcore/core/retry.py +2 -2
- abstractcore/core/session.py +132 -1
- abstractcore/download.py +253 -0
- abstractcore/embeddings/manager.py +2 -2
- abstractcore/events/__init__.py +112 -1
- abstractcore/exceptions/__init__.py +49 -2
- abstractcore/media/processors/office_processor.py +2 -2
- abstractcore/media/utils/image_scaler.py +2 -2
- abstractcore/media/vision_fallback.py +2 -2
- abstractcore/providers/anthropic_provider.py +200 -6
- abstractcore/providers/base.py +100 -5
- abstractcore/providers/lmstudio_provider.py +254 -4
- abstractcore/providers/ollama_provider.py +253 -4
- abstractcore/providers/openai_provider.py +258 -6
- abstractcore/providers/registry.py +9 -1
- abstractcore/providers/streaming.py +2 -2
- abstractcore/tools/common_tools.py +2 -2
- abstractcore/tools/handler.py +2 -2
- abstractcore/tools/parser.py +2 -2
- abstractcore/tools/registry.py +2 -2
- abstractcore/tools/syntax_rewriter.py +2 -2
- abstractcore/tools/tag_rewriter.py +3 -3
- abstractcore/utils/self_fixes.py +2 -2
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/METADATA +162 -4
- {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/RECORD +34 -33
- {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.3.dist-info → abstractcore-2.6.2.dist-info}/top_level.txt +0 -0
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
LM Studio provider implementation (OpenAI-compatible API).
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import os
|
|
5
6
|
import httpx
|
|
6
7
|
import json
|
|
7
8
|
import time
|
|
8
|
-
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
9
|
+
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
from pydantic import BaseModel
|
|
@@ -15,7 +16,7 @@ except ImportError:
|
|
|
15
16
|
BaseModel = None
|
|
16
17
|
from .base import BaseProvider
|
|
17
18
|
from ..core.types import GenerateResponse
|
|
18
|
-
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
|
|
19
|
+
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
|
|
19
20
|
from ..tools import UniversalToolHandler, execute_tools
|
|
20
21
|
from ..events import EventType
|
|
21
22
|
|
|
@@ -23,14 +24,19 @@ from ..events import EventType
|
|
|
23
24
|
class LMStudioProvider(BaseProvider):
|
|
24
25
|
"""LM Studio provider using OpenAI-compatible API"""
|
|
25
26
|
|
|
26
|
-
def __init__(self, model: str = "local-model", base_url: str =
|
|
27
|
+
def __init__(self, model: str = "local-model", base_url: Optional[str] = None, **kwargs):
|
|
27
28
|
super().__init__(model, **kwargs)
|
|
28
29
|
self.provider = "lmstudio"
|
|
29
30
|
|
|
30
31
|
# Initialize tool handler
|
|
31
32
|
self.tool_handler = UniversalToolHandler(model)
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
# Base URL priority: parameter > LMSTUDIO_BASE_URL > default
|
|
35
|
+
self.base_url = (
|
|
36
|
+
base_url or
|
|
37
|
+
os.getenv("LMSTUDIO_BASE_URL") or
|
|
38
|
+
"http://localhost:1234/v1"
|
|
39
|
+
).rstrip('/')
|
|
34
40
|
|
|
35
41
|
# Get timeout value - None means unlimited timeout
|
|
36
42
|
timeout_value = getattr(self, '_timeout', None)
|
|
@@ -47,9 +53,21 @@ class LMStudioProvider(BaseProvider):
|
|
|
47
53
|
except Exception:
|
|
48
54
|
raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
|
|
49
55
|
|
|
56
|
+
self._async_client = None # Lazy-loaded async client
|
|
57
|
+
|
|
50
58
|
# Validate model exists in LMStudio
|
|
51
59
|
self._validate_model()
|
|
52
60
|
|
|
61
|
+
@property
|
|
62
|
+
def async_client(self):
|
|
63
|
+
"""Lazy-load async HTTP client for native async operations."""
|
|
64
|
+
if self._async_client is None:
|
|
65
|
+
timeout_value = getattr(self, '_timeout', None)
|
|
66
|
+
if timeout_value is not None and timeout_value <= 0:
|
|
67
|
+
timeout_value = None
|
|
68
|
+
self._async_client = httpx.AsyncClient(timeout=timeout_value)
|
|
69
|
+
return self._async_client
|
|
70
|
+
|
|
53
71
|
def _validate_model(self):
|
|
54
72
|
"""Validate that the model exists in LMStudio"""
|
|
55
73
|
try:
|
|
@@ -87,6 +105,17 @@ class LMStudioProvider(BaseProvider):
|
|
|
87
105
|
if hasattr(self, 'client') and self.client is not None:
|
|
88
106
|
self.client.close()
|
|
89
107
|
|
|
108
|
+
# Close async client if it was created
|
|
109
|
+
if self._async_client is not None:
|
|
110
|
+
import asyncio
|
|
111
|
+
try:
|
|
112
|
+
loop = asyncio.get_running_loop()
|
|
113
|
+
loop.create_task(self._async_client.aclose())
|
|
114
|
+
except RuntimeError:
|
|
115
|
+
# No running loop
|
|
116
|
+
import asyncio
|
|
117
|
+
asyncio.run(self._async_client.aclose())
|
|
118
|
+
|
|
90
119
|
except Exception as e:
|
|
91
120
|
# Log but don't raise - unload should be best-effort
|
|
92
121
|
if hasattr(self, 'logger'):
|
|
@@ -359,6 +388,227 @@ class LMStudioProvider(BaseProvider):
|
|
|
359
388
|
finish_reason="error"
|
|
360
389
|
)
|
|
361
390
|
|
|
391
|
+
async def _agenerate_internal(self,
|
|
392
|
+
prompt: str,
|
|
393
|
+
messages: Optional[List[Dict[str, str]]] = None,
|
|
394
|
+
system_prompt: Optional[str] = None,
|
|
395
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
396
|
+
media: Optional[List['MediaContent']] = None,
|
|
397
|
+
stream: bool = False,
|
|
398
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
399
|
+
execute_tools: Optional[bool] = None,
|
|
400
|
+
tool_call_tags: Optional[str] = None,
|
|
401
|
+
**kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
|
|
402
|
+
"""Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
|
|
403
|
+
|
|
404
|
+
# Build messages for chat completions with tool support (same logic as sync)
|
|
405
|
+
chat_messages = []
|
|
406
|
+
|
|
407
|
+
# Add tools to system prompt if provided
|
|
408
|
+
enhanced_system_prompt = system_prompt
|
|
409
|
+
if tools and self.tool_handler.supports_prompted:
|
|
410
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools)
|
|
411
|
+
if enhanced_system_prompt:
|
|
412
|
+
enhanced_system_prompt += f"\n\n{tool_prompt}"
|
|
413
|
+
else:
|
|
414
|
+
enhanced_system_prompt = tool_prompt
|
|
415
|
+
|
|
416
|
+
# Add system message if provided
|
|
417
|
+
if enhanced_system_prompt:
|
|
418
|
+
chat_messages.append({
|
|
419
|
+
"role": "system",
|
|
420
|
+
"content": enhanced_system_prompt
|
|
421
|
+
})
|
|
422
|
+
|
|
423
|
+
# Add conversation history
|
|
424
|
+
if messages:
|
|
425
|
+
chat_messages.extend(messages)
|
|
426
|
+
|
|
427
|
+
# Handle media content
|
|
428
|
+
if media:
|
|
429
|
+
user_message_text = prompt.strip() if prompt else ""
|
|
430
|
+
if not user_message_text and chat_messages:
|
|
431
|
+
for msg in reversed(chat_messages):
|
|
432
|
+
if msg.get("role") == "user" and msg.get("content"):
|
|
433
|
+
user_message_text = msg["content"]
|
|
434
|
+
break
|
|
435
|
+
try:
|
|
436
|
+
processed_media = self._process_media_content(media)
|
|
437
|
+
media_handler = self._get_media_handler_for_model(self.model)
|
|
438
|
+
multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
|
|
439
|
+
|
|
440
|
+
if isinstance(multimodal_message, str):
|
|
441
|
+
if chat_messages and chat_messages[-1].get("role") == "user":
|
|
442
|
+
chat_messages[-1]["content"] = multimodal_message
|
|
443
|
+
else:
|
|
444
|
+
chat_messages.append({"role": "user", "content": multimodal_message})
|
|
445
|
+
else:
|
|
446
|
+
if chat_messages and chat_messages[-1].get("role") == "user":
|
|
447
|
+
chat_messages[-1] = multimodal_message
|
|
448
|
+
else:
|
|
449
|
+
chat_messages.append(multimodal_message)
|
|
450
|
+
except ImportError:
|
|
451
|
+
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
452
|
+
if user_message_text:
|
|
453
|
+
chat_messages.append({"role": "user", "content": user_message_text})
|
|
454
|
+
except Exception as e:
|
|
455
|
+
self.logger.warning(f"Failed to process media content: {e}")
|
|
456
|
+
if user_message_text:
|
|
457
|
+
chat_messages.append({"role": "user", "content": user_message_text})
|
|
458
|
+
|
|
459
|
+
# Add prompt as separate message if provided
|
|
460
|
+
elif prompt and prompt.strip():
|
|
461
|
+
chat_messages.append({"role": "user", "content": prompt})
|
|
462
|
+
|
|
463
|
+
# Build request payload
|
|
464
|
+
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
465
|
+
max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
466
|
+
|
|
467
|
+
payload = {
|
|
468
|
+
"model": self.model,
|
|
469
|
+
"messages": chat_messages,
|
|
470
|
+
"stream": stream,
|
|
471
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
472
|
+
"max_tokens": max_output_tokens,
|
|
473
|
+
"top_p": kwargs.get("top_p", 0.9),
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
# Add additional parameters
|
|
477
|
+
if "frequency_penalty" in kwargs:
|
|
478
|
+
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
479
|
+
if "presence_penalty" in kwargs:
|
|
480
|
+
payload["presence_penalty"] = kwargs["presence_penalty"]
|
|
481
|
+
if "repetition_penalty" in kwargs:
|
|
482
|
+
payload["repetition_penalty"] = kwargs["repetition_penalty"]
|
|
483
|
+
|
|
484
|
+
# Add seed if provided
|
|
485
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
486
|
+
if seed_value is not None:
|
|
487
|
+
payload["seed"] = seed_value
|
|
488
|
+
|
|
489
|
+
# Add structured output support
|
|
490
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
491
|
+
json_schema = response_model.model_json_schema()
|
|
492
|
+
payload["response_format"] = {
|
|
493
|
+
"type": "json_schema",
|
|
494
|
+
"json_schema": {
|
|
495
|
+
"name": response_model.__name__,
|
|
496
|
+
"schema": json_schema
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if stream:
|
|
501
|
+
return self._async_stream_generate(payload)
|
|
502
|
+
else:
|
|
503
|
+
response = await self._async_single_generate(payload)
|
|
504
|
+
|
|
505
|
+
# Execute tools if enabled
|
|
506
|
+
if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
|
|
507
|
+
response = self._handle_prompted_tool_execution(response, tools, execute_tools)
|
|
508
|
+
|
|
509
|
+
return response
|
|
510
|
+
|
|
511
|
+
async def _async_single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
|
|
512
|
+
"""Native async single response generation."""
|
|
513
|
+
try:
|
|
514
|
+
# Track generation time
|
|
515
|
+
start_time = time.time()
|
|
516
|
+
response = await self.async_client.post(
|
|
517
|
+
f"{self.base_url}/chat/completions",
|
|
518
|
+
json=payload,
|
|
519
|
+
headers={"Content-Type": "application/json"}
|
|
520
|
+
)
|
|
521
|
+
response.raise_for_status()
|
|
522
|
+
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
523
|
+
|
|
524
|
+
result = response.json()
|
|
525
|
+
|
|
526
|
+
# Extract response from OpenAI format
|
|
527
|
+
if "choices" in result and len(result["choices"]) > 0:
|
|
528
|
+
choice = result["choices"][0]
|
|
529
|
+
content = choice.get("message", {}).get("content", "")
|
|
530
|
+
finish_reason = choice.get("finish_reason", "stop")
|
|
531
|
+
else:
|
|
532
|
+
content = "No response generated"
|
|
533
|
+
finish_reason = "error"
|
|
534
|
+
|
|
535
|
+
# Extract usage info
|
|
536
|
+
usage = result.get("usage", {})
|
|
537
|
+
|
|
538
|
+
return GenerateResponse(
|
|
539
|
+
content=content,
|
|
540
|
+
model=self.model,
|
|
541
|
+
finish_reason=finish_reason,
|
|
542
|
+
raw_response=result,
|
|
543
|
+
usage={
|
|
544
|
+
"input_tokens": usage.get("prompt_tokens", 0),
|
|
545
|
+
"output_tokens": usage.get("completion_tokens", 0),
|
|
546
|
+
"total_tokens": usage.get("total_tokens", 0),
|
|
547
|
+
"prompt_tokens": usage.get("prompt_tokens", 0),
|
|
548
|
+
"completion_tokens": usage.get("completion_tokens", 0)
|
|
549
|
+
},
|
|
550
|
+
gen_time=gen_time
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
except Exception as e:
|
|
554
|
+
error_str = str(e).lower()
|
|
555
|
+
if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
|
|
556
|
+
try:
|
|
557
|
+
available_models = self.list_available_models(base_url=self.base_url)
|
|
558
|
+
error_message = format_model_error("LMStudio", self.model, available_models)
|
|
559
|
+
raise ModelNotFoundError(error_message)
|
|
560
|
+
except Exception:
|
|
561
|
+
raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio")
|
|
562
|
+
else:
|
|
563
|
+
raise ProviderAPIError(f"LMStudio API error: {str(e)}")
|
|
564
|
+
|
|
565
|
+
async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
|
|
566
|
+
"""Native async streaming response generation."""
|
|
567
|
+
try:
|
|
568
|
+
async with self.async_client.stream(
|
|
569
|
+
"POST",
|
|
570
|
+
f"{self.base_url}/chat/completions",
|
|
571
|
+
json=payload,
|
|
572
|
+
headers={"Content-Type": "application/json"}
|
|
573
|
+
) as response:
|
|
574
|
+
response.raise_for_status()
|
|
575
|
+
|
|
576
|
+
async for line in response.aiter_lines():
|
|
577
|
+
if line:
|
|
578
|
+
line = line.strip()
|
|
579
|
+
|
|
580
|
+
if line.startswith("data: "):
|
|
581
|
+
data = line[6:] # Remove "data: " prefix
|
|
582
|
+
|
|
583
|
+
if data == "[DONE]":
|
|
584
|
+
break
|
|
585
|
+
|
|
586
|
+
try:
|
|
587
|
+
chunk = json.loads(data)
|
|
588
|
+
|
|
589
|
+
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
590
|
+
choice = chunk["choices"][0]
|
|
591
|
+
delta = choice.get("delta", {})
|
|
592
|
+
content = delta.get("content", "")
|
|
593
|
+
finish_reason = choice.get("finish_reason")
|
|
594
|
+
|
|
595
|
+
yield GenerateResponse(
|
|
596
|
+
content=content,
|
|
597
|
+
model=self.model,
|
|
598
|
+
finish_reason=finish_reason,
|
|
599
|
+
raw_response=chunk
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
except json.JSONDecodeError:
|
|
603
|
+
continue
|
|
604
|
+
|
|
605
|
+
except Exception as e:
|
|
606
|
+
yield GenerateResponse(
|
|
607
|
+
content=f"Error: {str(e)}",
|
|
608
|
+
model=self.model,
|
|
609
|
+
finish_reason="error"
|
|
610
|
+
)
|
|
611
|
+
|
|
362
612
|
def get_capabilities(self) -> List[str]:
|
|
363
613
|
"""Get LM Studio capabilities"""
|
|
364
614
|
return ["streaming", "chat", "tools"]
|
|
@@ -3,9 +3,10 @@ Ollama provider implementation.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
+
import os
|
|
6
7
|
import httpx
|
|
7
8
|
import time
|
|
8
|
-
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
9
|
+
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
from pydantic import BaseModel
|
|
@@ -15,7 +16,7 @@ except ImportError:
|
|
|
15
16
|
BaseModel = None
|
|
16
17
|
from .base import BaseProvider
|
|
17
18
|
from ..core.types import GenerateResponse
|
|
18
|
-
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
|
|
19
|
+
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
|
|
19
20
|
from ..tools import UniversalToolHandler, ToolDefinition, execute_tools
|
|
20
21
|
from ..events import EventType
|
|
21
22
|
|
|
@@ -23,16 +24,33 @@ from ..events import EventType
|
|
|
23
24
|
class OllamaProvider(BaseProvider):
|
|
24
25
|
"""Ollama provider for local models with full integration"""
|
|
25
26
|
|
|
26
|
-
def __init__(self, model: str = "qwen3:4b-instruct-2507-q4_K_M", base_url: str =
|
|
27
|
+
def __init__(self, model: str = "qwen3:4b-instruct-2507-q4_K_M", base_url: Optional[str] = None, **kwargs):
|
|
27
28
|
super().__init__(model, **kwargs)
|
|
28
29
|
self.provider = "ollama"
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
# Base URL priority: parameter > OLLAMA_BASE_URL > OLLAMA_HOST > default
|
|
32
|
+
self.base_url = (
|
|
33
|
+
base_url or
|
|
34
|
+
os.getenv("OLLAMA_BASE_URL") or
|
|
35
|
+
os.getenv("OLLAMA_HOST") or
|
|
36
|
+
"http://localhost:11434"
|
|
37
|
+
).rstrip('/')
|
|
31
38
|
self.client = httpx.Client(timeout=self._timeout)
|
|
39
|
+
self._async_client = None # Lazy-loaded async client
|
|
32
40
|
|
|
33
41
|
# Initialize tool handler
|
|
34
42
|
self.tool_handler = UniversalToolHandler(model)
|
|
35
43
|
|
|
44
|
+
@property
|
|
45
|
+
def async_client(self):
|
|
46
|
+
"""Lazy-load async HTTP client for native async operations."""
|
|
47
|
+
if self._async_client is None:
|
|
48
|
+
self._async_client = httpx.AsyncClient(
|
|
49
|
+
base_url=self.base_url,
|
|
50
|
+
timeout=self._timeout
|
|
51
|
+
)
|
|
52
|
+
return self._async_client
|
|
53
|
+
|
|
36
54
|
def unload(self) -> None:
|
|
37
55
|
"""
|
|
38
56
|
Unload the model from Ollama server memory.
|
|
@@ -59,6 +77,17 @@ class OllamaProvider(BaseProvider):
|
|
|
59
77
|
if hasattr(self, 'client') and self.client is not None:
|
|
60
78
|
self.client.close()
|
|
61
79
|
|
|
80
|
+
# Close async client if it was created
|
|
81
|
+
if self._async_client is not None:
|
|
82
|
+
import asyncio
|
|
83
|
+
try:
|
|
84
|
+
loop = asyncio.get_running_loop()
|
|
85
|
+
loop.create_task(self._async_client.aclose())
|
|
86
|
+
except RuntimeError:
|
|
87
|
+
# No running loop, close synchronously
|
|
88
|
+
import asyncio
|
|
89
|
+
asyncio.run(self._async_client.aclose())
|
|
90
|
+
|
|
62
91
|
except Exception as e:
|
|
63
92
|
# Log but don't raise - unload should be best-effort
|
|
64
93
|
if hasattr(self, 'logger'):
|
|
@@ -377,6 +406,226 @@ class OllamaProvider(BaseProvider):
|
|
|
377
406
|
finish_reason="error"
|
|
378
407
|
)
|
|
379
408
|
|
|
409
|
+
async def _agenerate_internal(self,
|
|
410
|
+
prompt: str,
|
|
411
|
+
messages: Optional[List[Dict]],
|
|
412
|
+
system_prompt: Optional[str],
|
|
413
|
+
tools: Optional[List],
|
|
414
|
+
media: Optional[List],
|
|
415
|
+
stream: bool,
|
|
416
|
+
**kwargs):
|
|
417
|
+
"""Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
|
|
418
|
+
# Handle tools for prompted models
|
|
419
|
+
effective_system_prompt = system_prompt
|
|
420
|
+
if tools and self.tool_handler.supports_prompted:
|
|
421
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools)
|
|
422
|
+
if effective_system_prompt:
|
|
423
|
+
effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
|
|
424
|
+
else:
|
|
425
|
+
effective_system_prompt = tool_prompt
|
|
426
|
+
|
|
427
|
+
# Build request payload (same logic as sync)
|
|
428
|
+
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
429
|
+
max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
430
|
+
response_model = kwargs.get('response_model')
|
|
431
|
+
|
|
432
|
+
payload = {
|
|
433
|
+
"model": self.model,
|
|
434
|
+
"stream": stream,
|
|
435
|
+
"options": {
|
|
436
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
437
|
+
"num_predict": max_output_tokens,
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
442
|
+
if seed_value is not None:
|
|
443
|
+
payload["options"]["seed"] = seed_value
|
|
444
|
+
|
|
445
|
+
# Add structured output support
|
|
446
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
447
|
+
json_schema = response_model.model_json_schema()
|
|
448
|
+
payload["format"] = json_schema
|
|
449
|
+
|
|
450
|
+
# Use chat format
|
|
451
|
+
use_chat_format = tools is not None or messages is not None or True
|
|
452
|
+
|
|
453
|
+
if use_chat_format:
|
|
454
|
+
payload["messages"] = []
|
|
455
|
+
|
|
456
|
+
if effective_system_prompt:
|
|
457
|
+
payload["messages"].append({
|
|
458
|
+
"role": "system",
|
|
459
|
+
"content": effective_system_prompt
|
|
460
|
+
})
|
|
461
|
+
|
|
462
|
+
if messages:
|
|
463
|
+
converted_messages = self._convert_messages_for_ollama(messages)
|
|
464
|
+
payload["messages"].extend(converted_messages)
|
|
465
|
+
|
|
466
|
+
if media:
|
|
467
|
+
user_message_text = prompt.strip() if prompt else ""
|
|
468
|
+
try:
|
|
469
|
+
from ..media.handlers import LocalMediaHandler
|
|
470
|
+
media_handler = LocalMediaHandler("ollama", self.model_capabilities, model_name=self.model)
|
|
471
|
+
multimodal_message = media_handler.create_multimodal_message(user_message_text, media)
|
|
472
|
+
|
|
473
|
+
if isinstance(multimodal_message, str):
|
|
474
|
+
payload["messages"].append({"role": "user", "content": multimodal_message})
|
|
475
|
+
else:
|
|
476
|
+
payload["messages"].append(multimodal_message)
|
|
477
|
+
except Exception as e:
|
|
478
|
+
if hasattr(self, 'logger'):
|
|
479
|
+
self.logger.warning(f"Failed to process media: {e}")
|
|
480
|
+
if user_message_text:
|
|
481
|
+
payload["messages"].append({"role": "user", "content": user_message_text})
|
|
482
|
+
|
|
483
|
+
elif prompt and prompt.strip():
|
|
484
|
+
payload["messages"].append({"role": "user", "content": prompt})
|
|
485
|
+
|
|
486
|
+
endpoint = "/api/chat"
|
|
487
|
+
else:
|
|
488
|
+
full_prompt = prompt
|
|
489
|
+
if effective_system_prompt:
|
|
490
|
+
full_prompt = f"{effective_system_prompt}\n\n{prompt}"
|
|
491
|
+
payload["prompt"] = full_prompt
|
|
492
|
+
endpoint = "/api/generate"
|
|
493
|
+
|
|
494
|
+
if stream:
|
|
495
|
+
return self._async_stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
|
|
496
|
+
else:
|
|
497
|
+
return await self._async_single_generate(endpoint, payload, tools, kwargs.get('media_metadata'))
|
|
498
|
+
|
|
499
|
+
async def _async_single_generate(self, endpoint: str, payload: Dict[str, Any],
|
|
500
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
501
|
+
media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
|
|
502
|
+
"""Native async single response generation."""
|
|
503
|
+
try:
|
|
504
|
+
start_time = time.time()
|
|
505
|
+
response = await self.async_client.post(endpoint, json=payload)
|
|
506
|
+
response.raise_for_status()
|
|
507
|
+
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
508
|
+
|
|
509
|
+
result = response.json()
|
|
510
|
+
|
|
511
|
+
if endpoint == "/api/chat":
|
|
512
|
+
content = result.get("message", {}).get("content", "")
|
|
513
|
+
else:
|
|
514
|
+
content = result.get("response", "")
|
|
515
|
+
|
|
516
|
+
generate_response = GenerateResponse(
|
|
517
|
+
content=content,
|
|
518
|
+
model=self.model,
|
|
519
|
+
finish_reason="stop",
|
|
520
|
+
raw_response=result,
|
|
521
|
+
usage={
|
|
522
|
+
"input_tokens": result.get("prompt_eval_count", 0),
|
|
523
|
+
"output_tokens": result.get("eval_count", 0),
|
|
524
|
+
"total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0),
|
|
525
|
+
"prompt_tokens": result.get("prompt_eval_count", 0),
|
|
526
|
+
"completion_tokens": result.get("eval_count", 0)
|
|
527
|
+
},
|
|
528
|
+
gen_time=gen_time
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
if media_metadata:
|
|
532
|
+
if not generate_response.metadata:
|
|
533
|
+
generate_response.metadata = {}
|
|
534
|
+
generate_response.metadata['media_metadata'] = media_metadata
|
|
535
|
+
|
|
536
|
+
if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
|
|
537
|
+
return self._handle_tool_execution(generate_response, tools)
|
|
538
|
+
|
|
539
|
+
return generate_response
|
|
540
|
+
|
|
541
|
+
except Exception as e:
|
|
542
|
+
error_str = str(e).lower()
|
|
543
|
+
if ('404' in error_str or 'not found' in error_str):
|
|
544
|
+
available_models = self.list_available_models(base_url=self.base_url)
|
|
545
|
+
error_message = format_model_error("Ollama", self.model, available_models)
|
|
546
|
+
raise ModelNotFoundError(error_message)
|
|
547
|
+
else:
|
|
548
|
+
return GenerateResponse(
|
|
549
|
+
content=f"Error: {str(e)}",
|
|
550
|
+
model=self.model,
|
|
551
|
+
finish_reason="error"
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
async def _async_stream_generate(self, endpoint: str, payload: Dict[str, Any],
|
|
555
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
556
|
+
tool_call_tags: Optional[str] = None):
|
|
557
|
+
"""Native async streaming response generation."""
|
|
558
|
+
try:
|
|
559
|
+
async with self.async_client.stream("POST", endpoint, json=payload) as response:
|
|
560
|
+
response.raise_for_status()
|
|
561
|
+
|
|
562
|
+
full_content = ""
|
|
563
|
+
rewriter = None
|
|
564
|
+
buffer = ""
|
|
565
|
+
if tool_call_tags:
|
|
566
|
+
try:
|
|
567
|
+
from ..tools.tag_rewriter import create_tag_rewriter
|
|
568
|
+
rewriter = create_tag_rewriter(tool_call_tags)
|
|
569
|
+
except ImportError:
|
|
570
|
+
pass
|
|
571
|
+
|
|
572
|
+
async for line in response.aiter_lines():
|
|
573
|
+
if line:
|
|
574
|
+
try:
|
|
575
|
+
chunk = json.loads(line)
|
|
576
|
+
|
|
577
|
+
if endpoint == "/api/chat":
|
|
578
|
+
content = chunk.get("message", {}).get("content", "")
|
|
579
|
+
else:
|
|
580
|
+
content = chunk.get("response", "")
|
|
581
|
+
|
|
582
|
+
done = chunk.get("done", False)
|
|
583
|
+
full_content += content
|
|
584
|
+
|
|
585
|
+
if rewriter and content:
|
|
586
|
+
rewritten_content, buffer = rewriter.rewrite_streaming_chunk(content, buffer)
|
|
587
|
+
content = rewritten_content
|
|
588
|
+
|
|
589
|
+
chunk_response = GenerateResponse(
|
|
590
|
+
content=content,
|
|
591
|
+
model=self.model,
|
|
592
|
+
finish_reason="stop" if done else None,
|
|
593
|
+
raw_response=chunk
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
yield chunk_response
|
|
597
|
+
|
|
598
|
+
if done:
|
|
599
|
+
break
|
|
600
|
+
|
|
601
|
+
except json.JSONDecodeError:
|
|
602
|
+
continue
|
|
603
|
+
|
|
604
|
+
# Execute tools if enabled
|
|
605
|
+
if self.execute_tools and tools and self.tool_handler.supports_prompted and full_content:
|
|
606
|
+
complete_response = GenerateResponse(
|
|
607
|
+
content=full_content,
|
|
608
|
+
model=self.model,
|
|
609
|
+
finish_reason="stop"
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
final_response = self._handle_tool_execution(complete_response, tools)
|
|
613
|
+
|
|
614
|
+
if final_response.content != full_content:
|
|
615
|
+
tool_results_content = final_response.content[len(full_content):]
|
|
616
|
+
yield GenerateResponse(
|
|
617
|
+
content=tool_results_content,
|
|
618
|
+
model=self.model,
|
|
619
|
+
finish_reason="stop"
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
except Exception as e:
|
|
623
|
+
yield GenerateResponse(
|
|
624
|
+
content=f"Error: {str(e)}",
|
|
625
|
+
model=self.model,
|
|
626
|
+
finish_reason="error"
|
|
627
|
+
)
|
|
628
|
+
|
|
380
629
|
def _handle_tool_execution(self, response: GenerateResponse, tools: List[Dict[str, Any]]) -> GenerateResponse:
|
|
381
630
|
"""Handle tool execution for prompted models"""
|
|
382
631
|
# Parse tool calls from response
|