abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/extractor.py +33 -100
  3. abstractcore/apps/intent.py +19 -0
  4. abstractcore/apps/judge.py +20 -1
  5. abstractcore/apps/summarizer.py +20 -1
  6. abstractcore/architectures/detection.py +34 -1
  7. abstractcore/architectures/response_postprocessing.py +313 -0
  8. abstractcore/assets/architecture_formats.json +38 -8
  9. abstractcore/assets/model_capabilities.json +781 -160
  10. abstractcore/compression/__init__.py +1 -2
  11. abstractcore/compression/glyph_processor.py +6 -4
  12. abstractcore/config/main.py +31 -19
  13. abstractcore/config/manager.py +389 -11
  14. abstractcore/config/vision_config.py +5 -5
  15. abstractcore/core/interface.py +151 -3
  16. abstractcore/core/session.py +16 -10
  17. abstractcore/download.py +1 -1
  18. abstractcore/embeddings/manager.py +20 -6
  19. abstractcore/endpoint/__init__.py +2 -0
  20. abstractcore/endpoint/app.py +458 -0
  21. abstractcore/mcp/client.py +3 -1
  22. abstractcore/media/__init__.py +52 -17
  23. abstractcore/media/auto_handler.py +42 -22
  24. abstractcore/media/base.py +44 -1
  25. abstractcore/media/capabilities.py +12 -33
  26. abstractcore/media/enrichment.py +105 -0
  27. abstractcore/media/handlers/anthropic_handler.py +19 -28
  28. abstractcore/media/handlers/local_handler.py +124 -70
  29. abstractcore/media/handlers/openai_handler.py +19 -31
  30. abstractcore/media/processors/__init__.py +4 -2
  31. abstractcore/media/processors/audio_processor.py +57 -0
  32. abstractcore/media/processors/office_processor.py +8 -3
  33. abstractcore/media/processors/pdf_processor.py +46 -3
  34. abstractcore/media/processors/text_processor.py +22 -24
  35. abstractcore/media/processors/video_processor.py +58 -0
  36. abstractcore/media/types.py +97 -4
  37. abstractcore/media/utils/image_scaler.py +20 -2
  38. abstractcore/media/utils/video_frames.py +219 -0
  39. abstractcore/media/vision_fallback.py +136 -22
  40. abstractcore/processing/__init__.py +32 -3
  41. abstractcore/processing/basic_deepsearch.py +15 -10
  42. abstractcore/processing/basic_intent.py +3 -2
  43. abstractcore/processing/basic_judge.py +3 -2
  44. abstractcore/processing/basic_summarizer.py +1 -1
  45. abstractcore/providers/__init__.py +3 -1
  46. abstractcore/providers/anthropic_provider.py +95 -8
  47. abstractcore/providers/base.py +1516 -81
  48. abstractcore/providers/huggingface_provider.py +546 -69
  49. abstractcore/providers/lmstudio_provider.py +35 -923
  50. abstractcore/providers/mlx_provider.py +382 -35
  51. abstractcore/providers/model_capabilities.py +5 -1
  52. abstractcore/providers/ollama_provider.py +99 -15
  53. abstractcore/providers/openai_compatible_provider.py +406 -180
  54. abstractcore/providers/openai_provider.py +188 -44
  55. abstractcore/providers/openrouter_provider.py +76 -0
  56. abstractcore/providers/registry.py +61 -5
  57. abstractcore/providers/streaming.py +138 -33
  58. abstractcore/providers/vllm_provider.py +92 -817
  59. abstractcore/server/app.py +461 -13
  60. abstractcore/server/audio_endpoints.py +139 -0
  61. abstractcore/server/vision_endpoints.py +1319 -0
  62. abstractcore/structured/handler.py +316 -41
  63. abstractcore/tools/common_tools.py +5501 -2012
  64. abstractcore/tools/comms_tools.py +1641 -0
  65. abstractcore/tools/core.py +37 -7
  66. abstractcore/tools/handler.py +4 -9
  67. abstractcore/tools/parser.py +49 -2
  68. abstractcore/tools/tag_rewriter.py +2 -1
  69. abstractcore/tools/telegram_tdlib.py +407 -0
  70. abstractcore/tools/telegram_tools.py +261 -0
  71. abstractcore/utils/cli.py +1085 -72
  72. abstractcore/utils/token_utils.py +2 -0
  73. abstractcore/utils/truncation.py +29 -0
  74. abstractcore/utils/version.py +3 -4
  75. abstractcore/utils/vlm_token_calculator.py +12 -2
  76. abstractcore-2.11.2.dist-info/METADATA +562 -0
  77. abstractcore-2.11.2.dist-info/RECORD +133 -0
  78. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
  79. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
  80. abstractcore-2.9.1.dist-info/METADATA +0 -1190
  81. abstractcore-2.9.1.dist-info/RECORD +0 -119
  82. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
  83. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
@@ -1,927 +1,39 @@
1
1
  """
2
2
  LM Studio provider implementation (OpenAI-compatible API).
3
- """
4
-
5
- import os
6
- import httpx
7
- import json
8
- import time
9
- from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
10
-
11
- try:
12
- from pydantic import BaseModel
13
- PYDANTIC_AVAILABLE = True
14
- except ImportError:
15
- PYDANTIC_AVAILABLE = False
16
- BaseModel = None
17
- from .base import BaseProvider
18
- from ..core.types import GenerateResponse
19
- from ..exceptions import (
20
- ProviderAPIError,
21
- ModelNotFoundError,
22
- InvalidRequestError,
23
- format_model_error,
24
- format_provider_error,
25
- )
26
- from ..tools import UniversalToolHandler, execute_tools
27
- from ..events import EventType
28
-
29
-
30
- class LMStudioProvider(BaseProvider):
31
- """LM Studio provider using OpenAI-compatible API"""
32
-
33
- def __init__(self, model: str = "local-model", base_url: Optional[str] = None, **kwargs):
34
- super().__init__(model, **kwargs)
35
- self.provider = "lmstudio"
36
-
37
- # Initialize tool handler
38
- self.tool_handler = UniversalToolHandler(model)
39
-
40
- # Base URL priority: parameter > LMSTUDIO_BASE_URL > default
41
- self.base_url = (
42
- base_url or
43
- os.getenv("LMSTUDIO_BASE_URL") or
44
- "http://localhost:1234/v1"
45
- ).rstrip('/')
46
-
47
- # Get timeout value - None means unlimited timeout
48
- timeout_value = getattr(self, '_timeout', None)
49
- # Validate timeout if provided (None is allowed for unlimited)
50
- if timeout_value is not None and timeout_value <= 0:
51
- timeout_value = None # Invalid timeout becomes unlimited
52
-
53
- try:
54
- self.client = httpx.Client(timeout=timeout_value)
55
- except Exception as e:
56
- # Fallback with default timeout if client creation fails
57
- try:
58
- fallback_timeout = None
59
- try:
60
- from ..config.manager import get_config_manager
61
-
62
- fallback_timeout = float(get_config_manager().get_default_timeout())
63
- except Exception:
64
- fallback_timeout = 7200.0
65
- if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
66
- fallback_timeout = None
67
- self.client = httpx.Client(timeout=fallback_timeout)
68
- except Exception:
69
- raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
70
-
71
- self._async_client = None # Lazy-loaded async client
72
-
73
- # Validate model exists in LMStudio
74
- self._validate_model()
75
-
76
- @property
77
- def async_client(self):
78
- """Lazy-load async HTTP client for native async operations."""
79
- if self._async_client is None:
80
- timeout_value = getattr(self, '_timeout', None)
81
- if timeout_value is not None and timeout_value <= 0:
82
- timeout_value = None
83
- self._async_client = httpx.AsyncClient(timeout=timeout_value)
84
- return self._async_client
85
-
86
- def _validate_model(self):
87
- """Validate that the model exists in LMStudio"""
88
- try:
89
- # Use base_url as-is (should include /v1) for model discovery
90
- available_models = self.list_available_models(base_url=self.base_url)
91
- if available_models and self.model not in available_models:
92
- error_message = format_model_error("LMStudio", self.model, available_models)
93
- raise ModelNotFoundError(error_message)
94
- except httpx.ConnectError:
95
- # LMStudio not running - will fail later when trying to generate
96
- if hasattr(self, 'logger'):
97
- self.logger.debug(f"LMStudio server not accessible at {self.base_url} - model validation skipped")
98
- pass
99
- except ModelNotFoundError:
100
- # Re-raise model not found errors
101
- raise
102
- except Exception as e:
103
- # Other errors (like timeout, None type errors) - continue, will fail later if needed
104
- if hasattr(self, 'logger'):
105
- self.logger.debug(f"Model validation failed with error: {e} - continuing anyway")
106
- pass
107
-
108
- def unload(self) -> None:
109
- """
110
- Close HTTP client connection.
111
-
112
- Note: LMStudio manages model memory automatically using TTL (time-to-live)
113
- and auto-evict features. There is no explicit API to unload models.
114
- Models will be automatically unloaded after the configured TTL expires.
115
-
116
- This method only closes the HTTP client connection for cleanup.
117
- """
118
- try:
119
- # Close the HTTP client connection
120
- if hasattr(self, 'client') and self.client is not None:
121
- self.client.close()
122
-
123
- # Close async client if it was created
124
- if self._async_client is not None:
125
- import asyncio
126
- try:
127
- loop = asyncio.get_running_loop()
128
- loop.create_task(self._async_client.aclose())
129
- except RuntimeError:
130
- # No running loop
131
- import asyncio
132
- asyncio.run(self._async_client.aclose())
133
-
134
- except Exception as e:
135
- # Log but don't raise - unload should be best-effort
136
- if hasattr(self, 'logger'):
137
- self.logger.warning(f"Error during unload: {e}")
138
-
139
- def generate(self, *args, **kwargs):
140
- """Public generate method that includes telemetry"""
141
- return self.generate_with_telemetry(*args, **kwargs)
142
-
143
- def _generate_internal(self,
144
- prompt: str,
145
- messages: Optional[List[Dict[str, str]]] = None,
146
- system_prompt: Optional[str] = None,
147
- tools: Optional[List[Dict[str, Any]]] = None,
148
- media: Optional[List['MediaContent']] = None,
149
- stream: bool = False,
150
- response_model: Optional[Type[BaseModel]] = None,
151
- execute_tools: Optional[bool] = None,
152
- tool_call_tags: Optional[str] = None,
153
- **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
154
- """Generate response using LM Studio"""
155
-
156
- # Build messages for chat completions with tool support
157
- chat_messages = []
158
-
159
- # Add tools to system prompt if provided
160
- final_system_prompt = system_prompt
161
- # Prefer native tools when the model supports them. Only inject a prompted tool list
162
- # when native tool calling is not available.
163
- if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
164
- include_tool_list = True
165
- if final_system_prompt and "## Tools (session)" in final_system_prompt:
166
- include_tool_list = False
167
- tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
168
- if final_system_prompt:
169
- final_system_prompt += f"\n\n{tool_prompt}"
170
- else:
171
- final_system_prompt = tool_prompt
172
-
173
- # Add system message if provided
174
- if final_system_prompt:
175
- chat_messages.append({
176
- "role": "system",
177
- "content": final_system_prompt
178
- })
179
-
180
- # Add conversation history
181
- if messages:
182
- chat_messages.extend(messages)
183
-
184
- # Handle media content regardless of prompt (media can be used with messages too)
185
- if media:
186
- # Get the last user message content to combine with media
187
- user_message_text = prompt.strip() if prompt else ""
188
- if not user_message_text and chat_messages:
189
- # If no prompt, try to get text from the last user message
190
- for msg in reversed(chat_messages):
191
- if msg.get("role") == "user" and msg.get("content"):
192
- user_message_text = msg["content"]
193
- break
194
- try:
195
- # CRITICAL FIX: Process media files into MediaContent objects first
196
- processed_media = self._process_media_content(media)
197
-
198
- # Use capability-based media handler selection
199
- media_handler = self._get_media_handler_for_model(self.model)
200
-
201
- # Create multimodal message combining text and processed media
202
- multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
203
-
204
- # For LMStudio (OpenAI-compatible), we might get a string (embedded text) or dict (structured)
205
- if isinstance(multimodal_message, str):
206
- # Replace the last user message with the multimodal message, or add new one
207
- if chat_messages and chat_messages[-1].get("role") == "user":
208
- chat_messages[-1]["content"] = multimodal_message
209
- else:
210
- chat_messages.append({
211
- "role": "user",
212
- "content": multimodal_message
213
- })
214
- else:
215
- if chat_messages and chat_messages[-1].get("role") == "user":
216
- # Replace last user message with structured multimodal message
217
- chat_messages[-1] = multimodal_message
218
- else:
219
- chat_messages.append(multimodal_message)
220
- except ImportError:
221
- self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
222
- if user_message_text:
223
- chat_messages.append({
224
- "role": "user",
225
- "content": user_message_text
226
- })
227
- except Exception as e:
228
- self.logger.warning(f"Failed to process media content: {e}")
229
- if user_message_text:
230
- chat_messages.append({
231
- "role": "user",
232
- "content": user_message_text
233
- })
234
-
235
- # Add prompt as separate message if provided (for backward compatibility)
236
- elif prompt and prompt.strip():
237
- chat_messages.append({
238
- "role": "user",
239
- "content": prompt
240
- })
241
-
242
- # Build request payload using unified system
243
- generation_kwargs = self._prepare_generation_kwargs(**kwargs)
244
- max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
245
-
246
- payload = {
247
- "model": self.model,
248
- "messages": chat_messages,
249
- "stream": stream,
250
- "temperature": kwargs.get("temperature", self.temperature),
251
- "max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
252
- "top_p": kwargs.get("top_p", 0.9),
253
- }
254
-
255
- # Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
256
- if tools and self.tool_handler.supports_native:
257
- payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
258
- payload["tool_choice"] = kwargs.get("tool_choice", "auto")
259
-
260
- # Add additional generation parameters if provided (OpenAI-compatible)
261
- if "frequency_penalty" in kwargs:
262
- payload["frequency_penalty"] = kwargs["frequency_penalty"]
263
- if "presence_penalty" in kwargs:
264
- payload["presence_penalty"] = kwargs["presence_penalty"]
265
- if "repetition_penalty" in kwargs:
266
- # Some models support repetition_penalty directly
267
- payload["repetition_penalty"] = kwargs["repetition_penalty"]
268
-
269
- # Add seed if provided (LMStudio supports seed via OpenAI-compatible API)
270
- seed_value = kwargs.get("seed", self.seed)
271
- if seed_value is not None:
272
- payload["seed"] = seed_value
273
-
274
- # Add structured output support (OpenAI-compatible format)
275
- # LMStudio supports native structured outputs using the response_format parameter
276
- # This provides server-side guaranteed schema compliance
277
- if response_model and PYDANTIC_AVAILABLE:
278
- json_schema = response_model.model_json_schema()
279
- payload["response_format"] = {
280
- "type": "json_schema",
281
- "json_schema": {
282
- "name": response_model.__name__,
283
- "schema": json_schema
284
- }
285
- }
286
-
287
- if stream:
288
- # Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
289
- return self._stream_generate(payload)
290
- else:
291
- response = self._single_generate(payload)
292
-
293
- # Execute tools if enabled and tools are present
294
- if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
295
- response = self._handle_prompted_tool_execution(response, tools, execute_tools)
296
-
297
- return response
298
-
299
- def _single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
300
- """Generate single response"""
301
- try:
302
- # Ensure client is available
303
- if not hasattr(self, 'client') or self.client is None:
304
- raise ProviderAPIError("HTTP client not initialized")
305
-
306
- # Track generation time
307
- start_time = time.time()
308
- request_url = f"{self.base_url}/chat/completions"
309
- response = self.client.post(
310
- request_url,
311
- json=payload,
312
- headers={"Content-Type": "application/json"}
313
- )
314
- response.raise_for_status()
315
- gen_time = round((time.time() - start_time) * 1000, 1)
316
-
317
- result = response.json()
318
-
319
- # Extract response from OpenAI format
320
- if "choices" in result and len(result["choices"]) > 0:
321
- choice = result["choices"][0]
322
- message = choice.get("message") or {}
323
- if not isinstance(message, dict):
324
- message = {}
325
-
326
- content = message.get("content", "")
327
- reasoning = message.get("reasoning")
328
- tool_calls = message.get("tool_calls")
329
- if tool_calls is None:
330
- # Some servers surface tool calls at the choice level.
331
- tool_calls = choice.get("tool_calls")
332
- finish_reason = choice.get("finish_reason", "stop")
333
- else:
334
- content = "No response generated"
335
- reasoning = None
336
- tool_calls = None
337
- finish_reason = "error"
338
-
339
- # Extract usage info
340
- usage = result.get("usage", {})
341
-
342
- metadata = {}
343
- if isinstance(reasoning, str) and reasoning.strip():
344
- metadata["reasoning"] = reasoning
345
- # Runtime observability: capture the exact HTTP JSON payload we sent.
346
- metadata["_provider_request"] = {
347
- "url": request_url,
348
- "payload": payload,
349
- }
350
-
351
- return GenerateResponse(
352
- content=content,
353
- model=self.model,
354
- finish_reason=finish_reason,
355
- raw_response=result,
356
- tool_calls=tool_calls if isinstance(tool_calls, list) else None,
357
- metadata=metadata or None,
358
- usage={
359
- "input_tokens": usage.get("prompt_tokens", 0),
360
- "output_tokens": usage.get("completion_tokens", 0),
361
- "total_tokens": usage.get("total_tokens", 0),
362
- # Keep legacy keys for backward compatibility
363
- "prompt_tokens": usage.get("prompt_tokens", 0),
364
- "completion_tokens": usage.get("completion_tokens", 0)
365
- },
366
- gen_time=gen_time
367
- )
368
-
369
- except httpx.HTTPStatusError as e:
370
- # Improve debuggability: include LMStudio's error response body (often a JSON error envelope).
371
- resp = getattr(e, "response", None)
372
- status = getattr(resp, "status_code", None)
373
-
374
- body_text = ""
375
- try:
376
- if resp is not None:
377
- # Try to extract a structured error message if the server returns JSON.
378
- try:
379
- j = resp.json()
380
- if isinstance(j, dict):
381
- err = j.get("error")
382
- if isinstance(err, dict):
383
- msg = err.get("message") or err.get("error") or err.get("detail")
384
- if isinstance(msg, str) and msg.strip():
385
- body_text = msg.strip()
386
- if not body_text:
387
- msg2 = j.get("message") or j.get("detail")
388
- if isinstance(msg2, str) and msg2.strip():
389
- body_text = msg2.strip()
390
- if not body_text:
391
- body_text = json.dumps(j, ensure_ascii=False)
392
- except Exception:
393
- body_text = str(getattr(resp, "text", "") or "").strip()
394
- except Exception:
395
- body_text = ""
396
-
397
- if body_text and len(body_text) > 2000:
398
- body_text = body_text[:2000] + "…"
399
-
400
- # Preserve classification for BaseProvider error normalization.
401
- base = str(e)
402
- detail = f"{base} | response={body_text}" if body_text else base
403
- if isinstance(status, int) and 400 <= status < 500:
404
- raise InvalidRequestError(detail)
405
- raise ProviderAPIError(detail)
406
-
407
- except AttributeError as e:
408
- # Handle None type errors specifically
409
- if "'NoneType'" in str(e):
410
- raise ProviderAPIError(f"LMStudio provider not properly initialized: {str(e)}")
411
- else:
412
- raise ProviderAPIError(f"LMStudio configuration error: {str(e)}")
413
- except Exception as e:
414
- error_str = str(e).lower()
415
- if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
416
- # Model not found - show available models
417
- try:
418
- available_models = self.list_available_models(base_url=self.base_url)
419
- error_message = format_model_error("LMStudio", self.model, available_models)
420
- raise ModelNotFoundError(error_message)
421
- except Exception:
422
- # If model discovery also fails, provide a generic error
423
- raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio and could not fetch available models")
424
- else:
425
- raise
426
-
427
- def _stream_generate(self, payload: Dict[str, Any]) -> Iterator[GenerateResponse]:
428
- """Generate streaming response"""
429
- try:
430
- with self.client.stream(
431
- "POST",
432
- f"{self.base_url}/chat/completions",
433
- json=payload,
434
- headers={"Content-Type": "application/json"}
435
- ) as response:
436
- response.raise_for_status()
437
-
438
- for line in response.iter_lines():
439
- if line:
440
- # Decode bytes to string if necessary
441
- if isinstance(line, bytes):
442
- line = line.decode('utf-8')
443
- line = line.strip()
444
-
445
- if line.startswith("data: "):
446
- data = line[6:] # Remove "data: " prefix
447
-
448
- if data == "[DONE]":
449
- break
450
-
451
- try:
452
- chunk = json.loads(data)
453
-
454
- if "choices" in chunk and len(chunk["choices"]) > 0:
455
- choice = chunk["choices"][0]
456
- delta = choice.get("delta", {})
457
- if not isinstance(delta, dict):
458
- delta = {}
459
- content = delta.get("content", "")
460
- reasoning = delta.get("reasoning")
461
- tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
462
- finish_reason = choice.get("finish_reason")
463
-
464
- metadata = {}
465
- if isinstance(reasoning, str) and reasoning.strip():
466
- metadata["reasoning"] = reasoning
467
-
468
- yield GenerateResponse(
469
- content=content,
470
- model=self.model,
471
- finish_reason=finish_reason,
472
- tool_calls=tool_calls if isinstance(tool_calls, list) else None,
473
- metadata=metadata or None,
474
- raw_response=chunk,
475
- )
476
3
 
477
- except json.JSONDecodeError:
478
- continue
479
-
480
- except Exception as e:
481
- yield GenerateResponse(
482
- content=f"Error: {str(e)}",
483
- model=self.model,
484
- finish_reason="error"
485
- )
486
-
487
- async def _agenerate_internal(self,
488
- prompt: str,
489
- messages: Optional[List[Dict[str, str]]] = None,
490
- system_prompt: Optional[str] = None,
491
- tools: Optional[List[Dict[str, Any]]] = None,
492
- media: Optional[List['MediaContent']] = None,
493
- stream: bool = False,
494
- response_model: Optional[Type[BaseModel]] = None,
495
- execute_tools: Optional[bool] = None,
496
- tool_call_tags: Optional[str] = None,
497
- **kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
498
- """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
499
-
500
- # Build messages for chat completions with tool support (same logic as sync)
501
- chat_messages = []
502
-
503
- # Add tools to system prompt if provided
504
- final_system_prompt = system_prompt
505
- # Prefer native tools when available; only inject prompted tool syntax as fallback.
506
- if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
507
- include_tool_list = True
508
- if final_system_prompt and "## Tools (session)" in final_system_prompt:
509
- include_tool_list = False
510
- tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
511
- if final_system_prompt:
512
- final_system_prompt += f"\n\n{tool_prompt}"
513
- else:
514
- final_system_prompt = tool_prompt
515
-
516
- # Add system message if provided
517
- if final_system_prompt:
518
- chat_messages.append({
519
- "role": "system",
520
- "content": final_system_prompt
521
- })
522
-
523
- # Add conversation history
524
- if messages:
525
- chat_messages.extend(messages)
526
-
527
- # Handle media content
528
- if media:
529
- user_message_text = prompt.strip() if prompt else ""
530
- if not user_message_text and chat_messages:
531
- for msg in reversed(chat_messages):
532
- if msg.get("role") == "user" and msg.get("content"):
533
- user_message_text = msg["content"]
534
- break
535
- try:
536
- processed_media = self._process_media_content(media)
537
- media_handler = self._get_media_handler_for_model(self.model)
538
- multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
539
-
540
- if isinstance(multimodal_message, str):
541
- if chat_messages and chat_messages[-1].get("role") == "user":
542
- chat_messages[-1]["content"] = multimodal_message
543
- else:
544
- chat_messages.append({"role": "user", "content": multimodal_message})
545
- else:
546
- if chat_messages and chat_messages[-1].get("role") == "user":
547
- chat_messages[-1] = multimodal_message
548
- else:
549
- chat_messages.append(multimodal_message)
550
- except ImportError:
551
- self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
552
- if user_message_text:
553
- chat_messages.append({"role": "user", "content": user_message_text})
554
- except Exception as e:
555
- self.logger.warning(f"Failed to process media content: {e}")
556
- if user_message_text:
557
- chat_messages.append({"role": "user", "content": user_message_text})
558
-
559
- # Add prompt as separate message if provided
560
- elif prompt and prompt.strip():
561
- chat_messages.append({"role": "user", "content": prompt})
562
-
563
- # Build request payload
564
- generation_kwargs = self._prepare_generation_kwargs(**kwargs)
565
- max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
566
-
567
- payload = {
568
- "model": self.model,
569
- "messages": chat_messages,
570
- "stream": stream,
571
- "temperature": kwargs.get("temperature", self.temperature),
572
- "max_tokens": max_output_tokens,
573
- "top_p": kwargs.get("top_p", 0.9),
574
- }
575
-
576
- # Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
577
- if tools and self.tool_handler.supports_native:
578
- payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
579
- payload["tool_choice"] = kwargs.get("tool_choice", "auto")
580
-
581
- # Add additional parameters
582
- if "frequency_penalty" in kwargs:
583
- payload["frequency_penalty"] = kwargs["frequency_penalty"]
584
- if "presence_penalty" in kwargs:
585
- payload["presence_penalty"] = kwargs["presence_penalty"]
586
- if "repetition_penalty" in kwargs:
587
- payload["repetition_penalty"] = kwargs["repetition_penalty"]
588
-
589
- # Add seed if provided
590
- seed_value = kwargs.get("seed", self.seed)
591
- if seed_value is not None:
592
- payload["seed"] = seed_value
593
-
594
- # Add structured output support
595
- if response_model and PYDANTIC_AVAILABLE:
596
- json_schema = response_model.model_json_schema()
597
- payload["response_format"] = {
598
- "type": "json_schema",
599
- "json_schema": {
600
- "name": response_model.__name__,
601
- "schema": json_schema
602
- }
603
- }
604
-
605
- if stream:
606
- return self._async_stream_generate(payload)
607
- else:
608
- response = await self._async_single_generate(payload)
609
-
610
- # Execute tools if enabled
611
- if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
612
- response = self._handle_prompted_tool_execution(response, tools, execute_tools)
613
-
614
- return response
615
-
616
- async def _async_single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
617
- """Native async single response generation."""
618
- try:
619
- # Track generation time
620
- start_time = time.time()
621
- request_url = f"{self.base_url}/chat/completions"
622
- response = await self.async_client.post(
623
- request_url,
624
- json=payload,
625
- headers={"Content-Type": "application/json"}
626
- )
627
- response.raise_for_status()
628
- gen_time = round((time.time() - start_time) * 1000, 1)
629
-
630
- result = response.json()
631
-
632
- # Extract response from OpenAI format
633
- if "choices" in result and len(result["choices"]) > 0:
634
- choice = result["choices"][0]
635
- message = choice.get("message") or {}
636
- if not isinstance(message, dict):
637
- message = {}
638
-
639
- content = message.get("content", "")
640
- reasoning = message.get("reasoning")
641
- tool_calls = message.get("tool_calls")
642
- if tool_calls is None:
643
- tool_calls = choice.get("tool_calls")
644
- finish_reason = choice.get("finish_reason", "stop")
645
- else:
646
- content = "No response generated"
647
- reasoning = None
648
- tool_calls = None
649
- finish_reason = "error"
650
-
651
- # Extract usage info
652
- usage = result.get("usage", {})
653
-
654
- metadata = {}
655
- if isinstance(reasoning, str) and reasoning.strip():
656
- metadata["reasoning"] = reasoning
657
- metadata["_provider_request"] = {
658
- "url": request_url,
659
- "payload": payload,
660
- }
661
-
662
- return GenerateResponse(
663
- content=content,
664
- model=self.model,
665
- finish_reason=finish_reason,
666
- raw_response=result,
667
- tool_calls=tool_calls if isinstance(tool_calls, list) else None,
668
- metadata=metadata or None,
669
- usage={
670
- "input_tokens": usage.get("prompt_tokens", 0),
671
- "output_tokens": usage.get("completion_tokens", 0),
672
- "total_tokens": usage.get("total_tokens", 0),
673
- "prompt_tokens": usage.get("prompt_tokens", 0),
674
- "completion_tokens": usage.get("completion_tokens", 0)
675
- },
676
- gen_time=gen_time
677
- )
678
-
679
- except Exception as e:
680
- error_str = str(e).lower()
681
- if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
682
- try:
683
- available_models = self.list_available_models(base_url=self.base_url)
684
- error_message = format_model_error("LMStudio", self.model, available_models)
685
- raise ModelNotFoundError(error_message)
686
- except Exception:
687
- raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio")
688
- else:
689
- raise ProviderAPIError(f"LMStudio API error: {str(e)}")
690
-
691
- async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
692
- """Native async streaming response generation."""
693
- try:
694
- async with self.async_client.stream(
695
- "POST",
696
- f"{self.base_url}/chat/completions",
697
- json=payload,
698
- headers={"Content-Type": "application/json"}
699
- ) as response:
700
- response.raise_for_status()
701
-
702
- async for line in response.aiter_lines():
703
- if line:
704
- line = line.strip()
705
-
706
- if line.startswith("data: "):
707
- data = line[6:] # Remove "data: " prefix
708
-
709
- if data == "[DONE]":
710
- break
711
-
712
- try:
713
- chunk = json.loads(data)
714
-
715
- if "choices" in chunk and len(chunk["choices"]) > 0:
716
- choice = chunk["choices"][0]
717
- delta = choice.get("delta", {})
718
- if not isinstance(delta, dict):
719
- delta = {}
720
- content = delta.get("content", "")
721
- reasoning = delta.get("reasoning")
722
- tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
723
- finish_reason = choice.get("finish_reason")
724
-
725
- metadata = {}
726
- if isinstance(reasoning, str) and reasoning.strip():
727
- metadata["reasoning"] = reasoning
728
-
729
- yield GenerateResponse(
730
- content=content,
731
- model=self.model,
732
- finish_reason=finish_reason,
733
- tool_calls=tool_calls if isinstance(tool_calls, list) else None,
734
- metadata=metadata or None,
735
- raw_response=chunk
736
- )
737
-
738
- except json.JSONDecodeError:
739
- continue
740
-
741
- except Exception as e:
742
- yield GenerateResponse(
743
- content=f"Error: {str(e)}",
744
- model=self.model,
745
- finish_reason="error"
746
- )
747
-
748
- def get_capabilities(self) -> List[str]:
749
- """Get LM Studio capabilities"""
750
- return ["streaming", "chat", "tools"]
751
-
752
- def validate_config(self) -> bool:
753
- """Validate LM Studio connection"""
754
- try:
755
- response = self.client.get(f"{self.base_url}/models")
756
- return response.status_code == 200
757
- except:
758
- return False
759
-
760
- # Removed override - using BaseProvider method with JSON capabilities
761
-
762
- def _get_provider_max_tokens_param(self, kwargs: Dict[str, Any]) -> int:
763
- """Get max tokens parameter for LMStudio API"""
764
- # For LMStudio (OpenAI-compatible), max_tokens is the max output tokens
765
- return kwargs.get("max_output_tokens", self.max_output_tokens)
766
-
767
- def _update_http_client_timeout(self) -> None:
768
- """Update HTTP client timeout when timeout is changed."""
769
- if hasattr(self, 'client') and self.client is not None:
770
- try:
771
- # Create new client with updated timeout
772
- self.client.close()
773
-
774
- # Get timeout value - None means unlimited timeout
775
- timeout_value = getattr(self, '_timeout', None)
776
- # Validate timeout if provided (None is allowed for unlimited)
777
- if timeout_value is not None and timeout_value <= 0:
778
- timeout_value = None # Invalid timeout becomes unlimited
779
-
780
- self.client = httpx.Client(timeout=timeout_value)
781
- except Exception as e:
782
- # Log error but don't fail - timeout update is not critical
783
- if hasattr(self, 'logger'):
784
- self.logger.warning(f"Failed to update HTTP client timeout: {e}")
785
- # Try to create a new client with default timeout
786
- try:
787
- fallback_timeout = None
788
- try:
789
- from ..config.manager import get_config_manager
790
-
791
- fallback_timeout = float(get_config_manager().get_default_timeout())
792
- except Exception:
793
- fallback_timeout = 7200.0
794
- if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
795
- fallback_timeout = None
796
- self.client = httpx.Client(timeout=fallback_timeout)
797
- except Exception:
798
- pass # Best effort - don't fail the operation
799
-
800
- def _normalize_model_name(self, model_name: str) -> str:
801
- """Remove common provider prefixes from model name."""
802
- for prefix in ["lmstudio/", "qwen/", "ollama/", "huggingface/"]:
803
- if model_name.startswith(prefix):
804
- model_name = model_name[len(prefix):]
805
- return model_name
806
-
807
- def _get_media_handler_for_model(self, model_name: str):
808
- """Get appropriate media handler based on model vision capabilities."""
809
- from ..media.handlers import OpenAIMediaHandler, LocalMediaHandler
810
-
811
- # Normalize model name by removing provider prefixes
812
- clean_model_name = self._normalize_model_name(model_name)
813
-
814
- # Determine if model supports vision
815
- try:
816
- from ..architectures.detection import supports_vision
817
- use_vision_handler = supports_vision(clean_model_name)
818
- except Exception as e:
819
- self.logger.debug(f"Vision detection failed: {e}, defaulting to LocalMediaHandler")
820
- use_vision_handler = False
821
-
822
- # Create appropriate handler
823
- if use_vision_handler:
824
- handler = OpenAIMediaHandler(self.model_capabilities, model_name=model_name)
825
- self.logger.debug(f"Using OpenAIMediaHandler for vision model: {clean_model_name}")
826
- else:
827
- handler = LocalMediaHandler("lmstudio", self.model_capabilities, model_name=model_name)
828
- self.logger.debug(f"Using LocalMediaHandler for model: {clean_model_name}")
829
-
830
- return handler
831
-
832
- def list_available_models(self, **kwargs) -> List[str]:
833
- """
834
- List available models from LMStudio server.
835
-
836
- Args:
837
- **kwargs: Optional parameters including:
838
- - base_url: LMStudio server URL
839
- - input_capabilities: List of ModelInputCapability enums to filter by input capability
840
- - output_capabilities: List of ModelOutputCapability enums to filter by output capability
841
-
842
- Returns:
843
- List of model names, optionally filtered by capabilities
844
- """
845
- try:
846
- from .model_capabilities import filter_models_by_capabilities
847
-
848
- # Use provided base_url or fall back to instance base_url
849
- base_url = kwargs.get('base_url', self.base_url)
850
-
851
- response = self.client.get(f"{base_url}/models", timeout=5.0)
852
- if response.status_code == 200:
853
- data = response.json()
854
- models = [model["id"] for model in data.get("data", [])]
855
- models = sorted(models)
856
-
857
- # Apply new capability filtering if provided
858
- input_capabilities = kwargs.get('input_capabilities')
859
- output_capabilities = kwargs.get('output_capabilities')
860
-
861
- if input_capabilities or output_capabilities:
862
- models = filter_models_by_capabilities(
863
- models,
864
- input_capabilities=input_capabilities,
865
- output_capabilities=output_capabilities
866
- )
867
-
868
-
869
- return models
870
- else:
871
- self.logger.warning(f"LMStudio API returned status {response.status_code}")
872
- return []
873
- except Exception as e:
874
- self.logger.warning(f"Failed to list LMStudio models: {e}")
875
- return []
4
+ LM Studio exposes an OpenAI-compatible server (by default at `http://localhost:1234/v1`).
5
+ This provider is a thin wrapper around `OpenAICompatibleProvider` with LM Studio defaults.
6
+ """
876
7
 
877
- def embed(self, input_text: Union[str, List[str]], **kwargs) -> Dict[str, Any]:
878
- """
879
- Generate embeddings using LMStudio's OpenAI-compatible embedding API.
880
-
881
- Args:
882
- input_text: Single string or list of strings to embed
883
- **kwargs: Additional parameters (encoding_format, dimensions, user, etc.)
884
-
885
- Returns:
886
- Dict with embeddings in OpenAI-compatible format:
887
- {
888
- "object": "list",
889
- "data": [{"object": "embedding", "embedding": [...], "index": 0}, ...],
890
- "model": "model-name",
891
- "usage": {"prompt_tokens": N, "total_tokens": N}
892
- }
893
- """
894
- try:
895
- # Prepare request payload for OpenAI-compatible API
896
- payload = {
897
- "input": input_text,
898
- "model": self.model
899
- }
900
-
901
- # Add optional parameters if provided
902
- if "encoding_format" in kwargs:
903
- payload["encoding_format"] = kwargs["encoding_format"]
904
- if "dimensions" in kwargs and kwargs["dimensions"]:
905
- payload["dimensions"] = kwargs["dimensions"]
906
- if "user" in kwargs:
907
- payload["user"] = kwargs["user"]
908
-
909
- # Call LMStudio's embeddings API (OpenAI-compatible)
910
- response = self.client.post(
911
- f"{self.base_url}/embeddings",
912
- json=payload,
913
- headers={"Content-Type": "application/json"}
914
- )
915
- response.raise_for_status()
916
-
917
- # LMStudio returns OpenAI-compatible format, so we can return it directly
918
- result = response.json()
919
-
920
- # Ensure the model field uses our provider-prefixed format
921
- result["model"] = self.model
922
-
923
- return result
924
-
925
- except Exception as e:
926
- self.logger.error(f"Failed to generate embeddings: {e}")
927
- raise ProviderAPIError(f"LMStudio embedding error: {str(e)}")
8
+ from typing import Optional, Any
9
+
10
+ from .openai_compatible_provider import OpenAICompatibleProvider
11
+
12
+
13
+ class LMStudioProvider(OpenAICompatibleProvider):
14
+ """LM Studio provider using OpenAI-compatible API."""
15
+
16
+ PROVIDER_ID = "lmstudio"
17
+ PROVIDER_DISPLAY_NAME = "LMStudio"
18
+ BASE_URL_ENV_VAR = "LMSTUDIO_BASE_URL"
19
+ API_KEY_ENV_VAR = None
20
+ DEFAULT_BASE_URL = "http://localhost:1234/v1"
21
+
22
+ def __init__(
23
+ self,
24
+ model: str = "local-model",
25
+ base_url: Optional[str] = None,
26
+ timeout: Optional[float] = None,
27
+ **kwargs: Any,
28
+ ):
29
+ # ADR-0027: Local LM Studio calls should default to no client-side timeout.
30
+ #
31
+ # We intentionally treat "timeout omitted" as "unlimited" for this provider, rather
32
+ # than inheriting the global `abstractcore` default timeout (which may be tuned for
33
+ # remote providers). Operators can still override via:
34
+ # - explicit `timeout=...` when constructing the provider, or
35
+ # - runtime provider config (ConfigurationManager.configure_provider('lmstudio', timeout=...)).
36
+ if "timeout" in kwargs:
37
+ timeout = kwargs.pop("timeout")
38
+
39
+ super().__init__(model=model, base_url=base_url, timeout=timeout, **kwargs)