abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +781 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +31 -19
- abstractcore/config/manager.py +389 -11
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +35 -923
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +461 -13
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.2.dist-info/METADATA +562 -0
- abstractcore-2.11.2.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
|
@@ -1,927 +1,39 @@
|
|
|
1
1
|
"""
|
|
2
2
|
LM Studio provider implementation (OpenAI-compatible API).
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
import httpx
|
|
7
|
-
import json
|
|
8
|
-
import time
|
|
9
|
-
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
|
|
10
|
-
|
|
11
|
-
try:
|
|
12
|
-
from pydantic import BaseModel
|
|
13
|
-
PYDANTIC_AVAILABLE = True
|
|
14
|
-
except ImportError:
|
|
15
|
-
PYDANTIC_AVAILABLE = False
|
|
16
|
-
BaseModel = None
|
|
17
|
-
from .base import BaseProvider
|
|
18
|
-
from ..core.types import GenerateResponse
|
|
19
|
-
from ..exceptions import (
|
|
20
|
-
ProviderAPIError,
|
|
21
|
-
ModelNotFoundError,
|
|
22
|
-
InvalidRequestError,
|
|
23
|
-
format_model_error,
|
|
24
|
-
format_provider_error,
|
|
25
|
-
)
|
|
26
|
-
from ..tools import UniversalToolHandler, execute_tools
|
|
27
|
-
from ..events import EventType
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class LMStudioProvider(BaseProvider):
|
|
31
|
-
"""LM Studio provider using OpenAI-compatible API"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, model: str = "local-model", base_url: Optional[str] = None, **kwargs):
|
|
34
|
-
super().__init__(model, **kwargs)
|
|
35
|
-
self.provider = "lmstudio"
|
|
36
|
-
|
|
37
|
-
# Initialize tool handler
|
|
38
|
-
self.tool_handler = UniversalToolHandler(model)
|
|
39
|
-
|
|
40
|
-
# Base URL priority: parameter > LMSTUDIO_BASE_URL > default
|
|
41
|
-
self.base_url = (
|
|
42
|
-
base_url or
|
|
43
|
-
os.getenv("LMSTUDIO_BASE_URL") or
|
|
44
|
-
"http://localhost:1234/v1"
|
|
45
|
-
).rstrip('/')
|
|
46
|
-
|
|
47
|
-
# Get timeout value - None means unlimited timeout
|
|
48
|
-
timeout_value = getattr(self, '_timeout', None)
|
|
49
|
-
# Validate timeout if provided (None is allowed for unlimited)
|
|
50
|
-
if timeout_value is not None and timeout_value <= 0:
|
|
51
|
-
timeout_value = None # Invalid timeout becomes unlimited
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
self.client = httpx.Client(timeout=timeout_value)
|
|
55
|
-
except Exception as e:
|
|
56
|
-
# Fallback with default timeout if client creation fails
|
|
57
|
-
try:
|
|
58
|
-
fallback_timeout = None
|
|
59
|
-
try:
|
|
60
|
-
from ..config.manager import get_config_manager
|
|
61
|
-
|
|
62
|
-
fallback_timeout = float(get_config_manager().get_default_timeout())
|
|
63
|
-
except Exception:
|
|
64
|
-
fallback_timeout = 7200.0
|
|
65
|
-
if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
|
|
66
|
-
fallback_timeout = None
|
|
67
|
-
self.client = httpx.Client(timeout=fallback_timeout)
|
|
68
|
-
except Exception:
|
|
69
|
-
raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
|
|
70
|
-
|
|
71
|
-
self._async_client = None # Lazy-loaded async client
|
|
72
|
-
|
|
73
|
-
# Validate model exists in LMStudio
|
|
74
|
-
self._validate_model()
|
|
75
|
-
|
|
76
|
-
@property
|
|
77
|
-
def async_client(self):
|
|
78
|
-
"""Lazy-load async HTTP client for native async operations."""
|
|
79
|
-
if self._async_client is None:
|
|
80
|
-
timeout_value = getattr(self, '_timeout', None)
|
|
81
|
-
if timeout_value is not None and timeout_value <= 0:
|
|
82
|
-
timeout_value = None
|
|
83
|
-
self._async_client = httpx.AsyncClient(timeout=timeout_value)
|
|
84
|
-
return self._async_client
|
|
85
|
-
|
|
86
|
-
def _validate_model(self):
|
|
87
|
-
"""Validate that the model exists in LMStudio"""
|
|
88
|
-
try:
|
|
89
|
-
# Use base_url as-is (should include /v1) for model discovery
|
|
90
|
-
available_models = self.list_available_models(base_url=self.base_url)
|
|
91
|
-
if available_models and self.model not in available_models:
|
|
92
|
-
error_message = format_model_error("LMStudio", self.model, available_models)
|
|
93
|
-
raise ModelNotFoundError(error_message)
|
|
94
|
-
except httpx.ConnectError:
|
|
95
|
-
# LMStudio not running - will fail later when trying to generate
|
|
96
|
-
if hasattr(self, 'logger'):
|
|
97
|
-
self.logger.debug(f"LMStudio server not accessible at {self.base_url} - model validation skipped")
|
|
98
|
-
pass
|
|
99
|
-
except ModelNotFoundError:
|
|
100
|
-
# Re-raise model not found errors
|
|
101
|
-
raise
|
|
102
|
-
except Exception as e:
|
|
103
|
-
# Other errors (like timeout, None type errors) - continue, will fail later if needed
|
|
104
|
-
if hasattr(self, 'logger'):
|
|
105
|
-
self.logger.debug(f"Model validation failed with error: {e} - continuing anyway")
|
|
106
|
-
pass
|
|
107
|
-
|
|
108
|
-
def unload(self) -> None:
|
|
109
|
-
"""
|
|
110
|
-
Close HTTP client connection.
|
|
111
|
-
|
|
112
|
-
Note: LMStudio manages model memory automatically using TTL (time-to-live)
|
|
113
|
-
and auto-evict features. There is no explicit API to unload models.
|
|
114
|
-
Models will be automatically unloaded after the configured TTL expires.
|
|
115
|
-
|
|
116
|
-
This method only closes the HTTP client connection for cleanup.
|
|
117
|
-
"""
|
|
118
|
-
try:
|
|
119
|
-
# Close the HTTP client connection
|
|
120
|
-
if hasattr(self, 'client') and self.client is not None:
|
|
121
|
-
self.client.close()
|
|
122
|
-
|
|
123
|
-
# Close async client if it was created
|
|
124
|
-
if self._async_client is not None:
|
|
125
|
-
import asyncio
|
|
126
|
-
try:
|
|
127
|
-
loop = asyncio.get_running_loop()
|
|
128
|
-
loop.create_task(self._async_client.aclose())
|
|
129
|
-
except RuntimeError:
|
|
130
|
-
# No running loop
|
|
131
|
-
import asyncio
|
|
132
|
-
asyncio.run(self._async_client.aclose())
|
|
133
|
-
|
|
134
|
-
except Exception as e:
|
|
135
|
-
# Log but don't raise - unload should be best-effort
|
|
136
|
-
if hasattr(self, 'logger'):
|
|
137
|
-
self.logger.warning(f"Error during unload: {e}")
|
|
138
|
-
|
|
139
|
-
def generate(self, *args, **kwargs):
|
|
140
|
-
"""Public generate method that includes telemetry"""
|
|
141
|
-
return self.generate_with_telemetry(*args, **kwargs)
|
|
142
|
-
|
|
143
|
-
def _generate_internal(self,
|
|
144
|
-
prompt: str,
|
|
145
|
-
messages: Optional[List[Dict[str, str]]] = None,
|
|
146
|
-
system_prompt: Optional[str] = None,
|
|
147
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
148
|
-
media: Optional[List['MediaContent']] = None,
|
|
149
|
-
stream: bool = False,
|
|
150
|
-
response_model: Optional[Type[BaseModel]] = None,
|
|
151
|
-
execute_tools: Optional[bool] = None,
|
|
152
|
-
tool_call_tags: Optional[str] = None,
|
|
153
|
-
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
154
|
-
"""Generate response using LM Studio"""
|
|
155
|
-
|
|
156
|
-
# Build messages for chat completions with tool support
|
|
157
|
-
chat_messages = []
|
|
158
|
-
|
|
159
|
-
# Add tools to system prompt if provided
|
|
160
|
-
final_system_prompt = system_prompt
|
|
161
|
-
# Prefer native tools when the model supports them. Only inject a prompted tool list
|
|
162
|
-
# when native tool calling is not available.
|
|
163
|
-
if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
|
|
164
|
-
include_tool_list = True
|
|
165
|
-
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
166
|
-
include_tool_list = False
|
|
167
|
-
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
168
|
-
if final_system_prompt:
|
|
169
|
-
final_system_prompt += f"\n\n{tool_prompt}"
|
|
170
|
-
else:
|
|
171
|
-
final_system_prompt = tool_prompt
|
|
172
|
-
|
|
173
|
-
# Add system message if provided
|
|
174
|
-
if final_system_prompt:
|
|
175
|
-
chat_messages.append({
|
|
176
|
-
"role": "system",
|
|
177
|
-
"content": final_system_prompt
|
|
178
|
-
})
|
|
179
|
-
|
|
180
|
-
# Add conversation history
|
|
181
|
-
if messages:
|
|
182
|
-
chat_messages.extend(messages)
|
|
183
|
-
|
|
184
|
-
# Handle media content regardless of prompt (media can be used with messages too)
|
|
185
|
-
if media:
|
|
186
|
-
# Get the last user message content to combine with media
|
|
187
|
-
user_message_text = prompt.strip() if prompt else ""
|
|
188
|
-
if not user_message_text and chat_messages:
|
|
189
|
-
# If no prompt, try to get text from the last user message
|
|
190
|
-
for msg in reversed(chat_messages):
|
|
191
|
-
if msg.get("role") == "user" and msg.get("content"):
|
|
192
|
-
user_message_text = msg["content"]
|
|
193
|
-
break
|
|
194
|
-
try:
|
|
195
|
-
# CRITICAL FIX: Process media files into MediaContent objects first
|
|
196
|
-
processed_media = self._process_media_content(media)
|
|
197
|
-
|
|
198
|
-
# Use capability-based media handler selection
|
|
199
|
-
media_handler = self._get_media_handler_for_model(self.model)
|
|
200
|
-
|
|
201
|
-
# Create multimodal message combining text and processed media
|
|
202
|
-
multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
|
|
203
|
-
|
|
204
|
-
# For LMStudio (OpenAI-compatible), we might get a string (embedded text) or dict (structured)
|
|
205
|
-
if isinstance(multimodal_message, str):
|
|
206
|
-
# Replace the last user message with the multimodal message, or add new one
|
|
207
|
-
if chat_messages and chat_messages[-1].get("role") == "user":
|
|
208
|
-
chat_messages[-1]["content"] = multimodal_message
|
|
209
|
-
else:
|
|
210
|
-
chat_messages.append({
|
|
211
|
-
"role": "user",
|
|
212
|
-
"content": multimodal_message
|
|
213
|
-
})
|
|
214
|
-
else:
|
|
215
|
-
if chat_messages and chat_messages[-1].get("role") == "user":
|
|
216
|
-
# Replace last user message with structured multimodal message
|
|
217
|
-
chat_messages[-1] = multimodal_message
|
|
218
|
-
else:
|
|
219
|
-
chat_messages.append(multimodal_message)
|
|
220
|
-
except ImportError:
|
|
221
|
-
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
222
|
-
if user_message_text:
|
|
223
|
-
chat_messages.append({
|
|
224
|
-
"role": "user",
|
|
225
|
-
"content": user_message_text
|
|
226
|
-
})
|
|
227
|
-
except Exception as e:
|
|
228
|
-
self.logger.warning(f"Failed to process media content: {e}")
|
|
229
|
-
if user_message_text:
|
|
230
|
-
chat_messages.append({
|
|
231
|
-
"role": "user",
|
|
232
|
-
"content": user_message_text
|
|
233
|
-
})
|
|
234
|
-
|
|
235
|
-
# Add prompt as separate message if provided (for backward compatibility)
|
|
236
|
-
elif prompt and prompt.strip():
|
|
237
|
-
chat_messages.append({
|
|
238
|
-
"role": "user",
|
|
239
|
-
"content": prompt
|
|
240
|
-
})
|
|
241
|
-
|
|
242
|
-
# Build request payload using unified system
|
|
243
|
-
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
244
|
-
max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
245
|
-
|
|
246
|
-
payload = {
|
|
247
|
-
"model": self.model,
|
|
248
|
-
"messages": chat_messages,
|
|
249
|
-
"stream": stream,
|
|
250
|
-
"temperature": kwargs.get("temperature", self.temperature),
|
|
251
|
-
"max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
|
|
252
|
-
"top_p": kwargs.get("top_p", 0.9),
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
# Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
|
|
256
|
-
if tools and self.tool_handler.supports_native:
|
|
257
|
-
payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
|
|
258
|
-
payload["tool_choice"] = kwargs.get("tool_choice", "auto")
|
|
259
|
-
|
|
260
|
-
# Add additional generation parameters if provided (OpenAI-compatible)
|
|
261
|
-
if "frequency_penalty" in kwargs:
|
|
262
|
-
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
263
|
-
if "presence_penalty" in kwargs:
|
|
264
|
-
payload["presence_penalty"] = kwargs["presence_penalty"]
|
|
265
|
-
if "repetition_penalty" in kwargs:
|
|
266
|
-
# Some models support repetition_penalty directly
|
|
267
|
-
payload["repetition_penalty"] = kwargs["repetition_penalty"]
|
|
268
|
-
|
|
269
|
-
# Add seed if provided (LMStudio supports seed via OpenAI-compatible API)
|
|
270
|
-
seed_value = kwargs.get("seed", self.seed)
|
|
271
|
-
if seed_value is not None:
|
|
272
|
-
payload["seed"] = seed_value
|
|
273
|
-
|
|
274
|
-
# Add structured output support (OpenAI-compatible format)
|
|
275
|
-
# LMStudio supports native structured outputs using the response_format parameter
|
|
276
|
-
# This provides server-side guaranteed schema compliance
|
|
277
|
-
if response_model and PYDANTIC_AVAILABLE:
|
|
278
|
-
json_schema = response_model.model_json_schema()
|
|
279
|
-
payload["response_format"] = {
|
|
280
|
-
"type": "json_schema",
|
|
281
|
-
"json_schema": {
|
|
282
|
-
"name": response_model.__name__,
|
|
283
|
-
"schema": json_schema
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
if stream:
|
|
288
|
-
# Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
|
|
289
|
-
return self._stream_generate(payload)
|
|
290
|
-
else:
|
|
291
|
-
response = self._single_generate(payload)
|
|
292
|
-
|
|
293
|
-
# Execute tools if enabled and tools are present
|
|
294
|
-
if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
|
|
295
|
-
response = self._handle_prompted_tool_execution(response, tools, execute_tools)
|
|
296
|
-
|
|
297
|
-
return response
|
|
298
|
-
|
|
299
|
-
def _single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
|
|
300
|
-
"""Generate single response"""
|
|
301
|
-
try:
|
|
302
|
-
# Ensure client is available
|
|
303
|
-
if not hasattr(self, 'client') or self.client is None:
|
|
304
|
-
raise ProviderAPIError("HTTP client not initialized")
|
|
305
|
-
|
|
306
|
-
# Track generation time
|
|
307
|
-
start_time = time.time()
|
|
308
|
-
request_url = f"{self.base_url}/chat/completions"
|
|
309
|
-
response = self.client.post(
|
|
310
|
-
request_url,
|
|
311
|
-
json=payload,
|
|
312
|
-
headers={"Content-Type": "application/json"}
|
|
313
|
-
)
|
|
314
|
-
response.raise_for_status()
|
|
315
|
-
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
316
|
-
|
|
317
|
-
result = response.json()
|
|
318
|
-
|
|
319
|
-
# Extract response from OpenAI format
|
|
320
|
-
if "choices" in result and len(result["choices"]) > 0:
|
|
321
|
-
choice = result["choices"][0]
|
|
322
|
-
message = choice.get("message") or {}
|
|
323
|
-
if not isinstance(message, dict):
|
|
324
|
-
message = {}
|
|
325
|
-
|
|
326
|
-
content = message.get("content", "")
|
|
327
|
-
reasoning = message.get("reasoning")
|
|
328
|
-
tool_calls = message.get("tool_calls")
|
|
329
|
-
if tool_calls is None:
|
|
330
|
-
# Some servers surface tool calls at the choice level.
|
|
331
|
-
tool_calls = choice.get("tool_calls")
|
|
332
|
-
finish_reason = choice.get("finish_reason", "stop")
|
|
333
|
-
else:
|
|
334
|
-
content = "No response generated"
|
|
335
|
-
reasoning = None
|
|
336
|
-
tool_calls = None
|
|
337
|
-
finish_reason = "error"
|
|
338
|
-
|
|
339
|
-
# Extract usage info
|
|
340
|
-
usage = result.get("usage", {})
|
|
341
|
-
|
|
342
|
-
metadata = {}
|
|
343
|
-
if isinstance(reasoning, str) and reasoning.strip():
|
|
344
|
-
metadata["reasoning"] = reasoning
|
|
345
|
-
# Runtime observability: capture the exact HTTP JSON payload we sent.
|
|
346
|
-
metadata["_provider_request"] = {
|
|
347
|
-
"url": request_url,
|
|
348
|
-
"payload": payload,
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
return GenerateResponse(
|
|
352
|
-
content=content,
|
|
353
|
-
model=self.model,
|
|
354
|
-
finish_reason=finish_reason,
|
|
355
|
-
raw_response=result,
|
|
356
|
-
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
357
|
-
metadata=metadata or None,
|
|
358
|
-
usage={
|
|
359
|
-
"input_tokens": usage.get("prompt_tokens", 0),
|
|
360
|
-
"output_tokens": usage.get("completion_tokens", 0),
|
|
361
|
-
"total_tokens": usage.get("total_tokens", 0),
|
|
362
|
-
# Keep legacy keys for backward compatibility
|
|
363
|
-
"prompt_tokens": usage.get("prompt_tokens", 0),
|
|
364
|
-
"completion_tokens": usage.get("completion_tokens", 0)
|
|
365
|
-
},
|
|
366
|
-
gen_time=gen_time
|
|
367
|
-
)
|
|
368
|
-
|
|
369
|
-
except httpx.HTTPStatusError as e:
|
|
370
|
-
# Improve debuggability: include LMStudio's error response body (often a JSON error envelope).
|
|
371
|
-
resp = getattr(e, "response", None)
|
|
372
|
-
status = getattr(resp, "status_code", None)
|
|
373
|
-
|
|
374
|
-
body_text = ""
|
|
375
|
-
try:
|
|
376
|
-
if resp is not None:
|
|
377
|
-
# Try to extract a structured error message if the server returns JSON.
|
|
378
|
-
try:
|
|
379
|
-
j = resp.json()
|
|
380
|
-
if isinstance(j, dict):
|
|
381
|
-
err = j.get("error")
|
|
382
|
-
if isinstance(err, dict):
|
|
383
|
-
msg = err.get("message") or err.get("error") or err.get("detail")
|
|
384
|
-
if isinstance(msg, str) and msg.strip():
|
|
385
|
-
body_text = msg.strip()
|
|
386
|
-
if not body_text:
|
|
387
|
-
msg2 = j.get("message") or j.get("detail")
|
|
388
|
-
if isinstance(msg2, str) and msg2.strip():
|
|
389
|
-
body_text = msg2.strip()
|
|
390
|
-
if not body_text:
|
|
391
|
-
body_text = json.dumps(j, ensure_ascii=False)
|
|
392
|
-
except Exception:
|
|
393
|
-
body_text = str(getattr(resp, "text", "") or "").strip()
|
|
394
|
-
except Exception:
|
|
395
|
-
body_text = ""
|
|
396
|
-
|
|
397
|
-
if body_text and len(body_text) > 2000:
|
|
398
|
-
body_text = body_text[:2000] + "…"
|
|
399
|
-
|
|
400
|
-
# Preserve classification for BaseProvider error normalization.
|
|
401
|
-
base = str(e)
|
|
402
|
-
detail = f"{base} | response={body_text}" if body_text else base
|
|
403
|
-
if isinstance(status, int) and 400 <= status < 500:
|
|
404
|
-
raise InvalidRequestError(detail)
|
|
405
|
-
raise ProviderAPIError(detail)
|
|
406
|
-
|
|
407
|
-
except AttributeError as e:
|
|
408
|
-
# Handle None type errors specifically
|
|
409
|
-
if "'NoneType'" in str(e):
|
|
410
|
-
raise ProviderAPIError(f"LMStudio provider not properly initialized: {str(e)}")
|
|
411
|
-
else:
|
|
412
|
-
raise ProviderAPIError(f"LMStudio configuration error: {str(e)}")
|
|
413
|
-
except Exception as e:
|
|
414
|
-
error_str = str(e).lower()
|
|
415
|
-
if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
|
|
416
|
-
# Model not found - show available models
|
|
417
|
-
try:
|
|
418
|
-
available_models = self.list_available_models(base_url=self.base_url)
|
|
419
|
-
error_message = format_model_error("LMStudio", self.model, available_models)
|
|
420
|
-
raise ModelNotFoundError(error_message)
|
|
421
|
-
except Exception:
|
|
422
|
-
# If model discovery also fails, provide a generic error
|
|
423
|
-
raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio and could not fetch available models")
|
|
424
|
-
else:
|
|
425
|
-
raise
|
|
426
|
-
|
|
427
|
-
def _stream_generate(self, payload: Dict[str, Any]) -> Iterator[GenerateResponse]:
|
|
428
|
-
"""Generate streaming response"""
|
|
429
|
-
try:
|
|
430
|
-
with self.client.stream(
|
|
431
|
-
"POST",
|
|
432
|
-
f"{self.base_url}/chat/completions",
|
|
433
|
-
json=payload,
|
|
434
|
-
headers={"Content-Type": "application/json"}
|
|
435
|
-
) as response:
|
|
436
|
-
response.raise_for_status()
|
|
437
|
-
|
|
438
|
-
for line in response.iter_lines():
|
|
439
|
-
if line:
|
|
440
|
-
# Decode bytes to string if necessary
|
|
441
|
-
if isinstance(line, bytes):
|
|
442
|
-
line = line.decode('utf-8')
|
|
443
|
-
line = line.strip()
|
|
444
|
-
|
|
445
|
-
if line.startswith("data: "):
|
|
446
|
-
data = line[6:] # Remove "data: " prefix
|
|
447
|
-
|
|
448
|
-
if data == "[DONE]":
|
|
449
|
-
break
|
|
450
|
-
|
|
451
|
-
try:
|
|
452
|
-
chunk = json.loads(data)
|
|
453
|
-
|
|
454
|
-
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
455
|
-
choice = chunk["choices"][0]
|
|
456
|
-
delta = choice.get("delta", {})
|
|
457
|
-
if not isinstance(delta, dict):
|
|
458
|
-
delta = {}
|
|
459
|
-
content = delta.get("content", "")
|
|
460
|
-
reasoning = delta.get("reasoning")
|
|
461
|
-
tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
|
|
462
|
-
finish_reason = choice.get("finish_reason")
|
|
463
|
-
|
|
464
|
-
metadata = {}
|
|
465
|
-
if isinstance(reasoning, str) and reasoning.strip():
|
|
466
|
-
metadata["reasoning"] = reasoning
|
|
467
|
-
|
|
468
|
-
yield GenerateResponse(
|
|
469
|
-
content=content,
|
|
470
|
-
model=self.model,
|
|
471
|
-
finish_reason=finish_reason,
|
|
472
|
-
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
473
|
-
metadata=metadata or None,
|
|
474
|
-
raw_response=chunk,
|
|
475
|
-
)
|
|
476
3
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
except Exception as e:
|
|
481
|
-
yield GenerateResponse(
|
|
482
|
-
content=f"Error: {str(e)}",
|
|
483
|
-
model=self.model,
|
|
484
|
-
finish_reason="error"
|
|
485
|
-
)
|
|
486
|
-
|
|
487
|
-
async def _agenerate_internal(self,
|
|
488
|
-
prompt: str,
|
|
489
|
-
messages: Optional[List[Dict[str, str]]] = None,
|
|
490
|
-
system_prompt: Optional[str] = None,
|
|
491
|
-
tools: Optional[List[Dict[str, Any]]] = None,
|
|
492
|
-
media: Optional[List['MediaContent']] = None,
|
|
493
|
-
stream: bool = False,
|
|
494
|
-
response_model: Optional[Type[BaseModel]] = None,
|
|
495
|
-
execute_tools: Optional[bool] = None,
|
|
496
|
-
tool_call_tags: Optional[str] = None,
|
|
497
|
-
**kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
|
|
498
|
-
"""Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
|
|
499
|
-
|
|
500
|
-
# Build messages for chat completions with tool support (same logic as sync)
|
|
501
|
-
chat_messages = []
|
|
502
|
-
|
|
503
|
-
# Add tools to system prompt if provided
|
|
504
|
-
final_system_prompt = system_prompt
|
|
505
|
-
# Prefer native tools when available; only inject prompted tool syntax as fallback.
|
|
506
|
-
if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
|
|
507
|
-
include_tool_list = True
|
|
508
|
-
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
509
|
-
include_tool_list = False
|
|
510
|
-
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
511
|
-
if final_system_prompt:
|
|
512
|
-
final_system_prompt += f"\n\n{tool_prompt}"
|
|
513
|
-
else:
|
|
514
|
-
final_system_prompt = tool_prompt
|
|
515
|
-
|
|
516
|
-
# Add system message if provided
|
|
517
|
-
if final_system_prompt:
|
|
518
|
-
chat_messages.append({
|
|
519
|
-
"role": "system",
|
|
520
|
-
"content": final_system_prompt
|
|
521
|
-
})
|
|
522
|
-
|
|
523
|
-
# Add conversation history
|
|
524
|
-
if messages:
|
|
525
|
-
chat_messages.extend(messages)
|
|
526
|
-
|
|
527
|
-
# Handle media content
|
|
528
|
-
if media:
|
|
529
|
-
user_message_text = prompt.strip() if prompt else ""
|
|
530
|
-
if not user_message_text and chat_messages:
|
|
531
|
-
for msg in reversed(chat_messages):
|
|
532
|
-
if msg.get("role") == "user" and msg.get("content"):
|
|
533
|
-
user_message_text = msg["content"]
|
|
534
|
-
break
|
|
535
|
-
try:
|
|
536
|
-
processed_media = self._process_media_content(media)
|
|
537
|
-
media_handler = self._get_media_handler_for_model(self.model)
|
|
538
|
-
multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
|
|
539
|
-
|
|
540
|
-
if isinstance(multimodal_message, str):
|
|
541
|
-
if chat_messages and chat_messages[-1].get("role") == "user":
|
|
542
|
-
chat_messages[-1]["content"] = multimodal_message
|
|
543
|
-
else:
|
|
544
|
-
chat_messages.append({"role": "user", "content": multimodal_message})
|
|
545
|
-
else:
|
|
546
|
-
if chat_messages and chat_messages[-1].get("role") == "user":
|
|
547
|
-
chat_messages[-1] = multimodal_message
|
|
548
|
-
else:
|
|
549
|
-
chat_messages.append(multimodal_message)
|
|
550
|
-
except ImportError:
|
|
551
|
-
self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
|
|
552
|
-
if user_message_text:
|
|
553
|
-
chat_messages.append({"role": "user", "content": user_message_text})
|
|
554
|
-
except Exception as e:
|
|
555
|
-
self.logger.warning(f"Failed to process media content: {e}")
|
|
556
|
-
if user_message_text:
|
|
557
|
-
chat_messages.append({"role": "user", "content": user_message_text})
|
|
558
|
-
|
|
559
|
-
# Add prompt as separate message if provided
|
|
560
|
-
elif prompt and prompt.strip():
|
|
561
|
-
chat_messages.append({"role": "user", "content": prompt})
|
|
562
|
-
|
|
563
|
-
# Build request payload
|
|
564
|
-
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
565
|
-
max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
566
|
-
|
|
567
|
-
payload = {
|
|
568
|
-
"model": self.model,
|
|
569
|
-
"messages": chat_messages,
|
|
570
|
-
"stream": stream,
|
|
571
|
-
"temperature": kwargs.get("temperature", self.temperature),
|
|
572
|
-
"max_tokens": max_output_tokens,
|
|
573
|
-
"top_p": kwargs.get("top_p", 0.9),
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
# Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
|
|
577
|
-
if tools and self.tool_handler.supports_native:
|
|
578
|
-
payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
|
|
579
|
-
payload["tool_choice"] = kwargs.get("tool_choice", "auto")
|
|
580
|
-
|
|
581
|
-
# Add additional parameters
|
|
582
|
-
if "frequency_penalty" in kwargs:
|
|
583
|
-
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
584
|
-
if "presence_penalty" in kwargs:
|
|
585
|
-
payload["presence_penalty"] = kwargs["presence_penalty"]
|
|
586
|
-
if "repetition_penalty" in kwargs:
|
|
587
|
-
payload["repetition_penalty"] = kwargs["repetition_penalty"]
|
|
588
|
-
|
|
589
|
-
# Add seed if provided
|
|
590
|
-
seed_value = kwargs.get("seed", self.seed)
|
|
591
|
-
if seed_value is not None:
|
|
592
|
-
payload["seed"] = seed_value
|
|
593
|
-
|
|
594
|
-
# Add structured output support
|
|
595
|
-
if response_model and PYDANTIC_AVAILABLE:
|
|
596
|
-
json_schema = response_model.model_json_schema()
|
|
597
|
-
payload["response_format"] = {
|
|
598
|
-
"type": "json_schema",
|
|
599
|
-
"json_schema": {
|
|
600
|
-
"name": response_model.__name__,
|
|
601
|
-
"schema": json_schema
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
if stream:
|
|
606
|
-
return self._async_stream_generate(payload)
|
|
607
|
-
else:
|
|
608
|
-
response = await self._async_single_generate(payload)
|
|
609
|
-
|
|
610
|
-
# Execute tools if enabled
|
|
611
|
-
if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
|
|
612
|
-
response = self._handle_prompted_tool_execution(response, tools, execute_tools)
|
|
613
|
-
|
|
614
|
-
return response
|
|
615
|
-
|
|
616
|
-
async def _async_single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
|
|
617
|
-
"""Native async single response generation."""
|
|
618
|
-
try:
|
|
619
|
-
# Track generation time
|
|
620
|
-
start_time = time.time()
|
|
621
|
-
request_url = f"{self.base_url}/chat/completions"
|
|
622
|
-
response = await self.async_client.post(
|
|
623
|
-
request_url,
|
|
624
|
-
json=payload,
|
|
625
|
-
headers={"Content-Type": "application/json"}
|
|
626
|
-
)
|
|
627
|
-
response.raise_for_status()
|
|
628
|
-
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
629
|
-
|
|
630
|
-
result = response.json()
|
|
631
|
-
|
|
632
|
-
# Extract response from OpenAI format
|
|
633
|
-
if "choices" in result and len(result["choices"]) > 0:
|
|
634
|
-
choice = result["choices"][0]
|
|
635
|
-
message = choice.get("message") or {}
|
|
636
|
-
if not isinstance(message, dict):
|
|
637
|
-
message = {}
|
|
638
|
-
|
|
639
|
-
content = message.get("content", "")
|
|
640
|
-
reasoning = message.get("reasoning")
|
|
641
|
-
tool_calls = message.get("tool_calls")
|
|
642
|
-
if tool_calls is None:
|
|
643
|
-
tool_calls = choice.get("tool_calls")
|
|
644
|
-
finish_reason = choice.get("finish_reason", "stop")
|
|
645
|
-
else:
|
|
646
|
-
content = "No response generated"
|
|
647
|
-
reasoning = None
|
|
648
|
-
tool_calls = None
|
|
649
|
-
finish_reason = "error"
|
|
650
|
-
|
|
651
|
-
# Extract usage info
|
|
652
|
-
usage = result.get("usage", {})
|
|
653
|
-
|
|
654
|
-
metadata = {}
|
|
655
|
-
if isinstance(reasoning, str) and reasoning.strip():
|
|
656
|
-
metadata["reasoning"] = reasoning
|
|
657
|
-
metadata["_provider_request"] = {
|
|
658
|
-
"url": request_url,
|
|
659
|
-
"payload": payload,
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
return GenerateResponse(
|
|
663
|
-
content=content,
|
|
664
|
-
model=self.model,
|
|
665
|
-
finish_reason=finish_reason,
|
|
666
|
-
raw_response=result,
|
|
667
|
-
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
668
|
-
metadata=metadata or None,
|
|
669
|
-
usage={
|
|
670
|
-
"input_tokens": usage.get("prompt_tokens", 0),
|
|
671
|
-
"output_tokens": usage.get("completion_tokens", 0),
|
|
672
|
-
"total_tokens": usage.get("total_tokens", 0),
|
|
673
|
-
"prompt_tokens": usage.get("prompt_tokens", 0),
|
|
674
|
-
"completion_tokens": usage.get("completion_tokens", 0)
|
|
675
|
-
},
|
|
676
|
-
gen_time=gen_time
|
|
677
|
-
)
|
|
678
|
-
|
|
679
|
-
except Exception as e:
|
|
680
|
-
error_str = str(e).lower()
|
|
681
|
-
if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
|
|
682
|
-
try:
|
|
683
|
-
available_models = self.list_available_models(base_url=self.base_url)
|
|
684
|
-
error_message = format_model_error("LMStudio", self.model, available_models)
|
|
685
|
-
raise ModelNotFoundError(error_message)
|
|
686
|
-
except Exception:
|
|
687
|
-
raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio")
|
|
688
|
-
else:
|
|
689
|
-
raise ProviderAPIError(f"LMStudio API error: {str(e)}")
|
|
690
|
-
|
|
691
|
-
async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
|
|
692
|
-
"""Native async streaming response generation."""
|
|
693
|
-
try:
|
|
694
|
-
async with self.async_client.stream(
|
|
695
|
-
"POST",
|
|
696
|
-
f"{self.base_url}/chat/completions",
|
|
697
|
-
json=payload,
|
|
698
|
-
headers={"Content-Type": "application/json"}
|
|
699
|
-
) as response:
|
|
700
|
-
response.raise_for_status()
|
|
701
|
-
|
|
702
|
-
async for line in response.aiter_lines():
|
|
703
|
-
if line:
|
|
704
|
-
line = line.strip()
|
|
705
|
-
|
|
706
|
-
if line.startswith("data: "):
|
|
707
|
-
data = line[6:] # Remove "data: " prefix
|
|
708
|
-
|
|
709
|
-
if data == "[DONE]":
|
|
710
|
-
break
|
|
711
|
-
|
|
712
|
-
try:
|
|
713
|
-
chunk = json.loads(data)
|
|
714
|
-
|
|
715
|
-
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
716
|
-
choice = chunk["choices"][0]
|
|
717
|
-
delta = choice.get("delta", {})
|
|
718
|
-
if not isinstance(delta, dict):
|
|
719
|
-
delta = {}
|
|
720
|
-
content = delta.get("content", "")
|
|
721
|
-
reasoning = delta.get("reasoning")
|
|
722
|
-
tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
|
|
723
|
-
finish_reason = choice.get("finish_reason")
|
|
724
|
-
|
|
725
|
-
metadata = {}
|
|
726
|
-
if isinstance(reasoning, str) and reasoning.strip():
|
|
727
|
-
metadata["reasoning"] = reasoning
|
|
728
|
-
|
|
729
|
-
yield GenerateResponse(
|
|
730
|
-
content=content,
|
|
731
|
-
model=self.model,
|
|
732
|
-
finish_reason=finish_reason,
|
|
733
|
-
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
734
|
-
metadata=metadata or None,
|
|
735
|
-
raw_response=chunk
|
|
736
|
-
)
|
|
737
|
-
|
|
738
|
-
except json.JSONDecodeError:
|
|
739
|
-
continue
|
|
740
|
-
|
|
741
|
-
except Exception as e:
|
|
742
|
-
yield GenerateResponse(
|
|
743
|
-
content=f"Error: {str(e)}",
|
|
744
|
-
model=self.model,
|
|
745
|
-
finish_reason="error"
|
|
746
|
-
)
|
|
747
|
-
|
|
748
|
-
def get_capabilities(self) -> List[str]:
|
|
749
|
-
"""Get LM Studio capabilities"""
|
|
750
|
-
return ["streaming", "chat", "tools"]
|
|
751
|
-
|
|
752
|
-
def validate_config(self) -> bool:
|
|
753
|
-
"""Validate LM Studio connection"""
|
|
754
|
-
try:
|
|
755
|
-
response = self.client.get(f"{self.base_url}/models")
|
|
756
|
-
return response.status_code == 200
|
|
757
|
-
except:
|
|
758
|
-
return False
|
|
759
|
-
|
|
760
|
-
# Removed override - using BaseProvider method with JSON capabilities
|
|
761
|
-
|
|
762
|
-
def _get_provider_max_tokens_param(self, kwargs: Dict[str, Any]) -> int:
|
|
763
|
-
"""Get max tokens parameter for LMStudio API"""
|
|
764
|
-
# For LMStudio (OpenAI-compatible), max_tokens is the max output tokens
|
|
765
|
-
return kwargs.get("max_output_tokens", self.max_output_tokens)
|
|
766
|
-
|
|
767
|
-
def _update_http_client_timeout(self) -> None:
|
|
768
|
-
"""Update HTTP client timeout when timeout is changed."""
|
|
769
|
-
if hasattr(self, 'client') and self.client is not None:
|
|
770
|
-
try:
|
|
771
|
-
# Create new client with updated timeout
|
|
772
|
-
self.client.close()
|
|
773
|
-
|
|
774
|
-
# Get timeout value - None means unlimited timeout
|
|
775
|
-
timeout_value = getattr(self, '_timeout', None)
|
|
776
|
-
# Validate timeout if provided (None is allowed for unlimited)
|
|
777
|
-
if timeout_value is not None and timeout_value <= 0:
|
|
778
|
-
timeout_value = None # Invalid timeout becomes unlimited
|
|
779
|
-
|
|
780
|
-
self.client = httpx.Client(timeout=timeout_value)
|
|
781
|
-
except Exception as e:
|
|
782
|
-
# Log error but don't fail - timeout update is not critical
|
|
783
|
-
if hasattr(self, 'logger'):
|
|
784
|
-
self.logger.warning(f"Failed to update HTTP client timeout: {e}")
|
|
785
|
-
# Try to create a new client with default timeout
|
|
786
|
-
try:
|
|
787
|
-
fallback_timeout = None
|
|
788
|
-
try:
|
|
789
|
-
from ..config.manager import get_config_manager
|
|
790
|
-
|
|
791
|
-
fallback_timeout = float(get_config_manager().get_default_timeout())
|
|
792
|
-
except Exception:
|
|
793
|
-
fallback_timeout = 7200.0
|
|
794
|
-
if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
|
|
795
|
-
fallback_timeout = None
|
|
796
|
-
self.client = httpx.Client(timeout=fallback_timeout)
|
|
797
|
-
except Exception:
|
|
798
|
-
pass # Best effort - don't fail the operation
|
|
799
|
-
|
|
800
|
-
def _normalize_model_name(self, model_name: str) -> str:
|
|
801
|
-
"""Remove common provider prefixes from model name."""
|
|
802
|
-
for prefix in ["lmstudio/", "qwen/", "ollama/", "huggingface/"]:
|
|
803
|
-
if model_name.startswith(prefix):
|
|
804
|
-
model_name = model_name[len(prefix):]
|
|
805
|
-
return model_name
|
|
806
|
-
|
|
807
|
-
def _get_media_handler_for_model(self, model_name: str):
|
|
808
|
-
"""Get appropriate media handler based on model vision capabilities."""
|
|
809
|
-
from ..media.handlers import OpenAIMediaHandler, LocalMediaHandler
|
|
810
|
-
|
|
811
|
-
# Normalize model name by removing provider prefixes
|
|
812
|
-
clean_model_name = self._normalize_model_name(model_name)
|
|
813
|
-
|
|
814
|
-
# Determine if model supports vision
|
|
815
|
-
try:
|
|
816
|
-
from ..architectures.detection import supports_vision
|
|
817
|
-
use_vision_handler = supports_vision(clean_model_name)
|
|
818
|
-
except Exception as e:
|
|
819
|
-
self.logger.debug(f"Vision detection failed: {e}, defaulting to LocalMediaHandler")
|
|
820
|
-
use_vision_handler = False
|
|
821
|
-
|
|
822
|
-
# Create appropriate handler
|
|
823
|
-
if use_vision_handler:
|
|
824
|
-
handler = OpenAIMediaHandler(self.model_capabilities, model_name=model_name)
|
|
825
|
-
self.logger.debug(f"Using OpenAIMediaHandler for vision model: {clean_model_name}")
|
|
826
|
-
else:
|
|
827
|
-
handler = LocalMediaHandler("lmstudio", self.model_capabilities, model_name=model_name)
|
|
828
|
-
self.logger.debug(f"Using LocalMediaHandler for model: {clean_model_name}")
|
|
829
|
-
|
|
830
|
-
return handler
|
|
831
|
-
|
|
832
|
-
def list_available_models(self, **kwargs) -> List[str]:
|
|
833
|
-
"""
|
|
834
|
-
List available models from LMStudio server.
|
|
835
|
-
|
|
836
|
-
Args:
|
|
837
|
-
**kwargs: Optional parameters including:
|
|
838
|
-
- base_url: LMStudio server URL
|
|
839
|
-
- input_capabilities: List of ModelInputCapability enums to filter by input capability
|
|
840
|
-
- output_capabilities: List of ModelOutputCapability enums to filter by output capability
|
|
841
|
-
|
|
842
|
-
Returns:
|
|
843
|
-
List of model names, optionally filtered by capabilities
|
|
844
|
-
"""
|
|
845
|
-
try:
|
|
846
|
-
from .model_capabilities import filter_models_by_capabilities
|
|
847
|
-
|
|
848
|
-
# Use provided base_url or fall back to instance base_url
|
|
849
|
-
base_url = kwargs.get('base_url', self.base_url)
|
|
850
|
-
|
|
851
|
-
response = self.client.get(f"{base_url}/models", timeout=5.0)
|
|
852
|
-
if response.status_code == 200:
|
|
853
|
-
data = response.json()
|
|
854
|
-
models = [model["id"] for model in data.get("data", [])]
|
|
855
|
-
models = sorted(models)
|
|
856
|
-
|
|
857
|
-
# Apply new capability filtering if provided
|
|
858
|
-
input_capabilities = kwargs.get('input_capabilities')
|
|
859
|
-
output_capabilities = kwargs.get('output_capabilities')
|
|
860
|
-
|
|
861
|
-
if input_capabilities or output_capabilities:
|
|
862
|
-
models = filter_models_by_capabilities(
|
|
863
|
-
models,
|
|
864
|
-
input_capabilities=input_capabilities,
|
|
865
|
-
output_capabilities=output_capabilities
|
|
866
|
-
)
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
return models
|
|
870
|
-
else:
|
|
871
|
-
self.logger.warning(f"LMStudio API returned status {response.status_code}")
|
|
872
|
-
return []
|
|
873
|
-
except Exception as e:
|
|
874
|
-
self.logger.warning(f"Failed to list LMStudio models: {e}")
|
|
875
|
-
return []
|
|
4
|
+
LM Studio exposes an OpenAI-compatible server (by default at `http://localhost:1234/v1`).
|
|
5
|
+
This provider is a thin wrapper around `OpenAICompatibleProvider` with LM Studio defaults.
|
|
6
|
+
"""
|
|
876
7
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
""
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
# Call LMStudio's embeddings API (OpenAI-compatible)
|
|
910
|
-
response = self.client.post(
|
|
911
|
-
f"{self.base_url}/embeddings",
|
|
912
|
-
json=payload,
|
|
913
|
-
headers={"Content-Type": "application/json"}
|
|
914
|
-
)
|
|
915
|
-
response.raise_for_status()
|
|
916
|
-
|
|
917
|
-
# LMStudio returns OpenAI-compatible format, so we can return it directly
|
|
918
|
-
result = response.json()
|
|
919
|
-
|
|
920
|
-
# Ensure the model field uses our provider-prefixed format
|
|
921
|
-
result["model"] = self.model
|
|
922
|
-
|
|
923
|
-
return result
|
|
924
|
-
|
|
925
|
-
except Exception as e:
|
|
926
|
-
self.logger.error(f"Failed to generate embeddings: {e}")
|
|
927
|
-
raise ProviderAPIError(f"LMStudio embedding error: {str(e)}")
|
|
8
|
+
from typing import Optional, Any
|
|
9
|
+
|
|
10
|
+
from .openai_compatible_provider import OpenAICompatibleProvider
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LMStudioProvider(OpenAICompatibleProvider):
|
|
14
|
+
"""LM Studio provider using OpenAI-compatible API."""
|
|
15
|
+
|
|
16
|
+
PROVIDER_ID = "lmstudio"
|
|
17
|
+
PROVIDER_DISPLAY_NAME = "LMStudio"
|
|
18
|
+
BASE_URL_ENV_VAR = "LMSTUDIO_BASE_URL"
|
|
19
|
+
API_KEY_ENV_VAR = None
|
|
20
|
+
DEFAULT_BASE_URL = "http://localhost:1234/v1"
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
model: str = "local-model",
|
|
25
|
+
base_url: Optional[str] = None,
|
|
26
|
+
timeout: Optional[float] = None,
|
|
27
|
+
**kwargs: Any,
|
|
28
|
+
):
|
|
29
|
+
# ADR-0027: Local LM Studio calls should default to no client-side timeout.
|
|
30
|
+
#
|
|
31
|
+
# We intentionally treat "timeout omitted" as "unlimited" for this provider, rather
|
|
32
|
+
# than inheriting the global `abstractcore` default timeout (which may be tuned for
|
|
33
|
+
# remote providers). Operators can still override via:
|
|
34
|
+
# - explicit `timeout=...` when constructing the provider, or
|
|
35
|
+
# - runtime provider config (ConfigurationManager.configure_provider('lmstudio', timeout=...)).
|
|
36
|
+
if "timeout" in kwargs:
|
|
37
|
+
timeout = kwargs.pop("timeout")
|
|
38
|
+
|
|
39
|
+
super().__init__(model=model, base_url=base_url, timeout=timeout, **kwargs)
|