abstractcore 2.6.9__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +803 -141
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/__init__.py +2 -2
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +379 -93
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +540 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +116 -30
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +46 -24
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2443 -742
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +894 -159
- abstractcore/tools/registry.py +122 -18
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/METADATA +56 -2
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/RECORD +46 -37
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/top_level.txt +0 -0
|
@@ -1287,7 +1287,13 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1287
1287
|
|
|
1288
1288
|
elif self.tool_handler.supports_prompted:
|
|
1289
1289
|
# Add tools as system prompt for prompted models
|
|
1290
|
-
|
|
1290
|
+
system_text = (
|
|
1291
|
+
chat_messages[0].get("content", "")
|
|
1292
|
+
if chat_messages and chat_messages[0].get("role") == "system"
|
|
1293
|
+
else ""
|
|
1294
|
+
)
|
|
1295
|
+
include_tool_list = "## Tools (session)" not in str(system_text)
|
|
1296
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
1291
1297
|
if chat_messages and chat_messages[0]["role"] == "system":
|
|
1292
1298
|
chat_messages[0]["content"] += f"\n\n{tool_prompt}"
|
|
1293
1299
|
else:
|
|
@@ -1577,21 +1583,24 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
1577
1583
|
"""Build input text for transformers model with tool support"""
|
|
1578
1584
|
|
|
1579
1585
|
# Add tools to system prompt if provided
|
|
1580
|
-
|
|
1586
|
+
final_system_prompt = system_prompt
|
|
1581
1587
|
if tools and self.tool_handler.supports_prompted:
|
|
1582
|
-
|
|
1583
|
-
if
|
|
1584
|
-
|
|
1588
|
+
include_tool_list = True
|
|
1589
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
1590
|
+
include_tool_list = False
|
|
1591
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
1592
|
+
if final_system_prompt:
|
|
1593
|
+
final_system_prompt += f"\n\n{tool_prompt}"
|
|
1585
1594
|
else:
|
|
1586
|
-
|
|
1595
|
+
final_system_prompt = tool_prompt
|
|
1587
1596
|
|
|
1588
1597
|
# Check if model has chat template
|
|
1589
1598
|
if hasattr(self.tokenizer, 'chat_template') and self.tokenizer.chat_template:
|
|
1590
1599
|
# Use chat template if available
|
|
1591
1600
|
chat_messages = []
|
|
1592
1601
|
|
|
1593
|
-
if
|
|
1594
|
-
chat_messages.append({"role": "system", "content":
|
|
1602
|
+
if final_system_prompt:
|
|
1603
|
+
chat_messages.append({"role": "system", "content": final_system_prompt})
|
|
1595
1604
|
|
|
1596
1605
|
if messages:
|
|
1597
1606
|
chat_messages.extend(messages)
|
|
@@ -16,7 +16,13 @@ except ImportError:
|
|
|
16
16
|
BaseModel = None
|
|
17
17
|
from .base import BaseProvider
|
|
18
18
|
from ..core.types import GenerateResponse
|
|
19
|
-
from ..exceptions import
|
|
19
|
+
from ..exceptions import (
|
|
20
|
+
ProviderAPIError,
|
|
21
|
+
ModelNotFoundError,
|
|
22
|
+
InvalidRequestError,
|
|
23
|
+
format_model_error,
|
|
24
|
+
format_provider_error,
|
|
25
|
+
)
|
|
20
26
|
from ..tools import UniversalToolHandler, execute_tools
|
|
21
27
|
from ..events import EventType
|
|
22
28
|
|
|
@@ -49,7 +55,16 @@ class LMStudioProvider(BaseProvider):
|
|
|
49
55
|
except Exception as e:
|
|
50
56
|
# Fallback with default timeout if client creation fails
|
|
51
57
|
try:
|
|
52
|
-
|
|
58
|
+
fallback_timeout = None
|
|
59
|
+
try:
|
|
60
|
+
from ..config.manager import get_config_manager
|
|
61
|
+
|
|
62
|
+
fallback_timeout = float(get_config_manager().get_default_timeout())
|
|
63
|
+
except Exception:
|
|
64
|
+
fallback_timeout = 7200.0
|
|
65
|
+
if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
|
|
66
|
+
fallback_timeout = None
|
|
67
|
+
self.client = httpx.Client(timeout=fallback_timeout)
|
|
53
68
|
except Exception:
|
|
54
69
|
raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
|
|
55
70
|
|
|
@@ -142,19 +157,24 @@ class LMStudioProvider(BaseProvider):
|
|
|
142
157
|
chat_messages = []
|
|
143
158
|
|
|
144
159
|
# Add tools to system prompt if provided
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
160
|
+
final_system_prompt = system_prompt
|
|
161
|
+
# Prefer native tools when the model supports them. Only inject a prompted tool list
|
|
162
|
+
# when native tool calling is not available.
|
|
163
|
+
if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
|
|
164
|
+
include_tool_list = True
|
|
165
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
166
|
+
include_tool_list = False
|
|
167
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
168
|
+
if final_system_prompt:
|
|
169
|
+
final_system_prompt += f"\n\n{tool_prompt}"
|
|
150
170
|
else:
|
|
151
|
-
|
|
171
|
+
final_system_prompt = tool_prompt
|
|
152
172
|
|
|
153
173
|
# Add system message if provided
|
|
154
|
-
if
|
|
174
|
+
if final_system_prompt:
|
|
155
175
|
chat_messages.append({
|
|
156
176
|
"role": "system",
|
|
157
|
-
"content":
|
|
177
|
+
"content": final_system_prompt
|
|
158
178
|
})
|
|
159
179
|
|
|
160
180
|
# Add conversation history
|
|
@@ -231,6 +251,11 @@ class LMStudioProvider(BaseProvider):
|
|
|
231
251
|
"max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
|
|
232
252
|
"top_p": kwargs.get("top_p", 0.9),
|
|
233
253
|
}
|
|
254
|
+
|
|
255
|
+
# Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
|
|
256
|
+
if tools and self.tool_handler.supports_native:
|
|
257
|
+
payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
|
|
258
|
+
payload["tool_choice"] = kwargs.get("tool_choice", "auto")
|
|
234
259
|
|
|
235
260
|
# Add additional generation parameters if provided (OpenAI-compatible)
|
|
236
261
|
if "frequency_penalty" in kwargs:
|
|
@@ -280,8 +305,9 @@ class LMStudioProvider(BaseProvider):
|
|
|
280
305
|
|
|
281
306
|
# Track generation time
|
|
282
307
|
start_time = time.time()
|
|
308
|
+
request_url = f"{self.base_url}/chat/completions"
|
|
283
309
|
response = self.client.post(
|
|
284
|
-
|
|
310
|
+
request_url,
|
|
285
311
|
json=payload,
|
|
286
312
|
headers={"Content-Type": "application/json"}
|
|
287
313
|
)
|
|
@@ -293,20 +319,42 @@ class LMStudioProvider(BaseProvider):
|
|
|
293
319
|
# Extract response from OpenAI format
|
|
294
320
|
if "choices" in result and len(result["choices"]) > 0:
|
|
295
321
|
choice = result["choices"][0]
|
|
296
|
-
|
|
322
|
+
message = choice.get("message") or {}
|
|
323
|
+
if not isinstance(message, dict):
|
|
324
|
+
message = {}
|
|
325
|
+
|
|
326
|
+
content = message.get("content", "")
|
|
327
|
+
reasoning = message.get("reasoning")
|
|
328
|
+
tool_calls = message.get("tool_calls")
|
|
329
|
+
if tool_calls is None:
|
|
330
|
+
# Some servers surface tool calls at the choice level.
|
|
331
|
+
tool_calls = choice.get("tool_calls")
|
|
297
332
|
finish_reason = choice.get("finish_reason", "stop")
|
|
298
333
|
else:
|
|
299
334
|
content = "No response generated"
|
|
335
|
+
reasoning = None
|
|
336
|
+
tool_calls = None
|
|
300
337
|
finish_reason = "error"
|
|
301
338
|
|
|
302
339
|
# Extract usage info
|
|
303
340
|
usage = result.get("usage", {})
|
|
304
341
|
|
|
342
|
+
metadata = {}
|
|
343
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
344
|
+
metadata["reasoning"] = reasoning
|
|
345
|
+
# Runtime observability: capture the exact HTTP JSON payload we sent.
|
|
346
|
+
metadata["_provider_request"] = {
|
|
347
|
+
"url": request_url,
|
|
348
|
+
"payload": payload,
|
|
349
|
+
}
|
|
350
|
+
|
|
305
351
|
return GenerateResponse(
|
|
306
352
|
content=content,
|
|
307
353
|
model=self.model,
|
|
308
354
|
finish_reason=finish_reason,
|
|
309
355
|
raw_response=result,
|
|
356
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
357
|
+
metadata=metadata or None,
|
|
310
358
|
usage={
|
|
311
359
|
"input_tokens": usage.get("prompt_tokens", 0),
|
|
312
360
|
"output_tokens": usage.get("completion_tokens", 0),
|
|
@@ -318,6 +366,44 @@ class LMStudioProvider(BaseProvider):
|
|
|
318
366
|
gen_time=gen_time
|
|
319
367
|
)
|
|
320
368
|
|
|
369
|
+
except httpx.HTTPStatusError as e:
|
|
370
|
+
# Improve debuggability: include LMStudio's error response body (often a JSON error envelope).
|
|
371
|
+
resp = getattr(e, "response", None)
|
|
372
|
+
status = getattr(resp, "status_code", None)
|
|
373
|
+
|
|
374
|
+
body_text = ""
|
|
375
|
+
try:
|
|
376
|
+
if resp is not None:
|
|
377
|
+
# Try to extract a structured error message if the server returns JSON.
|
|
378
|
+
try:
|
|
379
|
+
j = resp.json()
|
|
380
|
+
if isinstance(j, dict):
|
|
381
|
+
err = j.get("error")
|
|
382
|
+
if isinstance(err, dict):
|
|
383
|
+
msg = err.get("message") or err.get("error") or err.get("detail")
|
|
384
|
+
if isinstance(msg, str) and msg.strip():
|
|
385
|
+
body_text = msg.strip()
|
|
386
|
+
if not body_text:
|
|
387
|
+
msg2 = j.get("message") or j.get("detail")
|
|
388
|
+
if isinstance(msg2, str) and msg2.strip():
|
|
389
|
+
body_text = msg2.strip()
|
|
390
|
+
if not body_text:
|
|
391
|
+
body_text = json.dumps(j, ensure_ascii=False)
|
|
392
|
+
except Exception:
|
|
393
|
+
body_text = str(getattr(resp, "text", "") or "").strip()
|
|
394
|
+
except Exception:
|
|
395
|
+
body_text = ""
|
|
396
|
+
|
|
397
|
+
if body_text and len(body_text) > 2000:
|
|
398
|
+
body_text = body_text[:2000] + "…"
|
|
399
|
+
|
|
400
|
+
# Preserve classification for BaseProvider error normalization.
|
|
401
|
+
base = str(e)
|
|
402
|
+
detail = f"{base} | response={body_text}" if body_text else base
|
|
403
|
+
if isinstance(status, int) and 400 <= status < 500:
|
|
404
|
+
raise InvalidRequestError(detail)
|
|
405
|
+
raise ProviderAPIError(detail)
|
|
406
|
+
|
|
321
407
|
except AttributeError as e:
|
|
322
408
|
# Handle None type errors specifically
|
|
323
409
|
if "'NoneType'" in str(e):
|
|
@@ -336,7 +422,7 @@ class LMStudioProvider(BaseProvider):
|
|
|
336
422
|
# If model discovery also fails, provide a generic error
|
|
337
423
|
raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio and could not fetch available models")
|
|
338
424
|
else:
|
|
339
|
-
raise
|
|
425
|
+
raise
|
|
340
426
|
|
|
341
427
|
def _stream_generate(self, payload: Dict[str, Any]) -> Iterator[GenerateResponse]:
|
|
342
428
|
"""Generate streaming response"""
|
|
@@ -368,14 +454,24 @@ class LMStudioProvider(BaseProvider):
|
|
|
368
454
|
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
369
455
|
choice = chunk["choices"][0]
|
|
370
456
|
delta = choice.get("delta", {})
|
|
457
|
+
if not isinstance(delta, dict):
|
|
458
|
+
delta = {}
|
|
371
459
|
content = delta.get("content", "")
|
|
460
|
+
reasoning = delta.get("reasoning")
|
|
461
|
+
tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
|
|
372
462
|
finish_reason = choice.get("finish_reason")
|
|
373
463
|
|
|
464
|
+
metadata = {}
|
|
465
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
466
|
+
metadata["reasoning"] = reasoning
|
|
467
|
+
|
|
374
468
|
yield GenerateResponse(
|
|
375
469
|
content=content,
|
|
376
470
|
model=self.model,
|
|
377
471
|
finish_reason=finish_reason,
|
|
378
|
-
|
|
472
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
473
|
+
metadata=metadata or None,
|
|
474
|
+
raw_response=chunk,
|
|
379
475
|
)
|
|
380
476
|
|
|
381
477
|
except json.JSONDecodeError:
|
|
@@ -405,19 +501,23 @@ class LMStudioProvider(BaseProvider):
|
|
|
405
501
|
chat_messages = []
|
|
406
502
|
|
|
407
503
|
# Add tools to system prompt if provided
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
504
|
+
final_system_prompt = system_prompt
|
|
505
|
+
# Prefer native tools when available; only inject prompted tool syntax as fallback.
|
|
506
|
+
if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
|
|
507
|
+
include_tool_list = True
|
|
508
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
509
|
+
include_tool_list = False
|
|
510
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
511
|
+
if final_system_prompt:
|
|
512
|
+
final_system_prompt += f"\n\n{tool_prompt}"
|
|
413
513
|
else:
|
|
414
|
-
|
|
514
|
+
final_system_prompt = tool_prompt
|
|
415
515
|
|
|
416
516
|
# Add system message if provided
|
|
417
|
-
if
|
|
517
|
+
if final_system_prompt:
|
|
418
518
|
chat_messages.append({
|
|
419
519
|
"role": "system",
|
|
420
|
-
"content":
|
|
520
|
+
"content": final_system_prompt
|
|
421
521
|
})
|
|
422
522
|
|
|
423
523
|
# Add conversation history
|
|
@@ -473,6 +573,11 @@ class LMStudioProvider(BaseProvider):
|
|
|
473
573
|
"top_p": kwargs.get("top_p", 0.9),
|
|
474
574
|
}
|
|
475
575
|
|
|
576
|
+
# Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
|
|
577
|
+
if tools and self.tool_handler.supports_native:
|
|
578
|
+
payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
|
|
579
|
+
payload["tool_choice"] = kwargs.get("tool_choice", "auto")
|
|
580
|
+
|
|
476
581
|
# Add additional parameters
|
|
477
582
|
if "frequency_penalty" in kwargs:
|
|
478
583
|
payload["frequency_penalty"] = kwargs["frequency_penalty"]
|
|
@@ -513,8 +618,9 @@ class LMStudioProvider(BaseProvider):
|
|
|
513
618
|
try:
|
|
514
619
|
# Track generation time
|
|
515
620
|
start_time = time.time()
|
|
621
|
+
request_url = f"{self.base_url}/chat/completions"
|
|
516
622
|
response = await self.async_client.post(
|
|
517
|
-
|
|
623
|
+
request_url,
|
|
518
624
|
json=payload,
|
|
519
625
|
headers={"Content-Type": "application/json"}
|
|
520
626
|
)
|
|
@@ -526,20 +632,40 @@ class LMStudioProvider(BaseProvider):
|
|
|
526
632
|
# Extract response from OpenAI format
|
|
527
633
|
if "choices" in result and len(result["choices"]) > 0:
|
|
528
634
|
choice = result["choices"][0]
|
|
529
|
-
|
|
635
|
+
message = choice.get("message") or {}
|
|
636
|
+
if not isinstance(message, dict):
|
|
637
|
+
message = {}
|
|
638
|
+
|
|
639
|
+
content = message.get("content", "")
|
|
640
|
+
reasoning = message.get("reasoning")
|
|
641
|
+
tool_calls = message.get("tool_calls")
|
|
642
|
+
if tool_calls is None:
|
|
643
|
+
tool_calls = choice.get("tool_calls")
|
|
530
644
|
finish_reason = choice.get("finish_reason", "stop")
|
|
531
645
|
else:
|
|
532
646
|
content = "No response generated"
|
|
647
|
+
reasoning = None
|
|
648
|
+
tool_calls = None
|
|
533
649
|
finish_reason = "error"
|
|
534
650
|
|
|
535
651
|
# Extract usage info
|
|
536
652
|
usage = result.get("usage", {})
|
|
537
653
|
|
|
654
|
+
metadata = {}
|
|
655
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
656
|
+
metadata["reasoning"] = reasoning
|
|
657
|
+
metadata["_provider_request"] = {
|
|
658
|
+
"url": request_url,
|
|
659
|
+
"payload": payload,
|
|
660
|
+
}
|
|
661
|
+
|
|
538
662
|
return GenerateResponse(
|
|
539
663
|
content=content,
|
|
540
664
|
model=self.model,
|
|
541
665
|
finish_reason=finish_reason,
|
|
542
666
|
raw_response=result,
|
|
667
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
668
|
+
metadata=metadata or None,
|
|
543
669
|
usage={
|
|
544
670
|
"input_tokens": usage.get("prompt_tokens", 0),
|
|
545
671
|
"output_tokens": usage.get("completion_tokens", 0),
|
|
@@ -589,13 +715,23 @@ class LMStudioProvider(BaseProvider):
|
|
|
589
715
|
if "choices" in chunk and len(chunk["choices"]) > 0:
|
|
590
716
|
choice = chunk["choices"][0]
|
|
591
717
|
delta = choice.get("delta", {})
|
|
718
|
+
if not isinstance(delta, dict):
|
|
719
|
+
delta = {}
|
|
592
720
|
content = delta.get("content", "")
|
|
721
|
+
reasoning = delta.get("reasoning")
|
|
722
|
+
tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
|
|
593
723
|
finish_reason = choice.get("finish_reason")
|
|
594
724
|
|
|
725
|
+
metadata = {}
|
|
726
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
727
|
+
metadata["reasoning"] = reasoning
|
|
728
|
+
|
|
595
729
|
yield GenerateResponse(
|
|
596
730
|
content=content,
|
|
597
731
|
model=self.model,
|
|
598
732
|
finish_reason=finish_reason,
|
|
733
|
+
tool_calls=tool_calls if isinstance(tool_calls, list) else None,
|
|
734
|
+
metadata=metadata or None,
|
|
599
735
|
raw_response=chunk
|
|
600
736
|
)
|
|
601
737
|
|
|
@@ -648,7 +784,16 @@ class LMStudioProvider(BaseProvider):
|
|
|
648
784
|
self.logger.warning(f"Failed to update HTTP client timeout: {e}")
|
|
649
785
|
# Try to create a new client with default timeout
|
|
650
786
|
try:
|
|
651
|
-
|
|
787
|
+
fallback_timeout = None
|
|
788
|
+
try:
|
|
789
|
+
from ..config.manager import get_config_manager
|
|
790
|
+
|
|
791
|
+
fallback_timeout = float(get_config_manager().get_default_timeout())
|
|
792
|
+
except Exception:
|
|
793
|
+
fallback_timeout = 7200.0
|
|
794
|
+
if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
|
|
795
|
+
fallback_timeout = None
|
|
796
|
+
self.client = httpx.Client(timeout=fallback_timeout)
|
|
652
797
|
except Exception:
|
|
653
798
|
pass # Best effort - don't fail the operation
|
|
654
799
|
|
|
@@ -291,21 +291,24 @@ class MLXProvider(BaseProvider):
|
|
|
291
291
|
"""Build prompt for MLX model with tool support"""
|
|
292
292
|
|
|
293
293
|
# Add tools to system prompt if provided
|
|
294
|
-
|
|
294
|
+
final_system_prompt = system_prompt
|
|
295
295
|
if tools and self.tool_handler.supports_prompted:
|
|
296
|
-
|
|
297
|
-
if
|
|
298
|
-
|
|
296
|
+
include_tool_list = True
|
|
297
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
298
|
+
include_tool_list = False
|
|
299
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
300
|
+
if final_system_prompt:
|
|
301
|
+
final_system_prompt += f"\n\n{tool_prompt}"
|
|
299
302
|
else:
|
|
300
|
-
|
|
303
|
+
final_system_prompt = tool_prompt
|
|
301
304
|
|
|
302
305
|
# For Qwen models, use chat template format
|
|
303
306
|
if "qwen" in self.model.lower():
|
|
304
307
|
full_prompt = ""
|
|
305
308
|
|
|
306
309
|
# Add system prompt
|
|
307
|
-
if
|
|
308
|
-
full_prompt += f"<|im_start|>system\n{
|
|
310
|
+
if final_system_prompt:
|
|
311
|
+
full_prompt += f"<|im_start|>system\n{final_system_prompt}<|im_end|>\n"
|
|
309
312
|
|
|
310
313
|
# Add conversation history
|
|
311
314
|
if messages:
|
|
@@ -321,8 +324,8 @@ class MLXProvider(BaseProvider):
|
|
|
321
324
|
else:
|
|
322
325
|
# Generic format for other models
|
|
323
326
|
full_prompt = prompt
|
|
324
|
-
if
|
|
325
|
-
full_prompt = f"{
|
|
327
|
+
if final_system_prompt:
|
|
328
|
+
full_prompt = f"{final_system_prompt}\n\n{prompt}"
|
|
326
329
|
|
|
327
330
|
# Add conversation context if provided
|
|
328
331
|
if messages:
|
|
@@ -541,4 +544,4 @@ class MLXProvider(BaseProvider):
|
|
|
541
544
|
return models
|
|
542
545
|
|
|
543
546
|
except Exception:
|
|
544
|
-
return []
|
|
547
|
+
return []
|
|
@@ -148,13 +148,16 @@ class OllamaProvider(BaseProvider):
|
|
|
148
148
|
"""Internal generation with Ollama"""
|
|
149
149
|
|
|
150
150
|
# Handle tools for prompted models
|
|
151
|
-
|
|
151
|
+
final_system_prompt = system_prompt
|
|
152
152
|
if tools and self.tool_handler.supports_prompted:
|
|
153
|
-
|
|
154
|
-
if
|
|
155
|
-
|
|
153
|
+
include_tool_list = True
|
|
154
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
155
|
+
include_tool_list = False
|
|
156
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
157
|
+
if final_system_prompt:
|
|
158
|
+
final_system_prompt = f"{final_system_prompt}\n\n{tool_prompt}"
|
|
156
159
|
else:
|
|
157
|
-
|
|
160
|
+
final_system_prompt = tool_prompt
|
|
158
161
|
|
|
159
162
|
# Build request payload using unified system
|
|
160
163
|
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
@@ -189,10 +192,10 @@ class OllamaProvider(BaseProvider):
|
|
|
189
192
|
payload["messages"] = []
|
|
190
193
|
|
|
191
194
|
# Add system message if provided
|
|
192
|
-
if
|
|
195
|
+
if final_system_prompt:
|
|
193
196
|
payload["messages"].append({
|
|
194
197
|
"role": "system",
|
|
195
|
-
"content":
|
|
198
|
+
"content": final_system_prompt
|
|
196
199
|
})
|
|
197
200
|
|
|
198
201
|
# Add conversation history (converted to Ollama-compatible format)
|
|
@@ -245,8 +248,8 @@ class OllamaProvider(BaseProvider):
|
|
|
245
248
|
else:
|
|
246
249
|
# Use generate format for single prompt (legacy fallback)
|
|
247
250
|
full_prompt = prompt
|
|
248
|
-
if
|
|
249
|
-
full_prompt = f"{
|
|
251
|
+
if final_system_prompt:
|
|
252
|
+
full_prompt = f"{final_system_prompt}\n\n{prompt}"
|
|
250
253
|
|
|
251
254
|
payload["prompt"] = full_prompt
|
|
252
255
|
endpoint = "/api/generate"
|
|
@@ -292,6 +295,14 @@ class OllamaProvider(BaseProvider):
|
|
|
292
295
|
},
|
|
293
296
|
gen_time=gen_time
|
|
294
297
|
)
|
|
298
|
+
|
|
299
|
+
# Runtime observability: capture the exact HTTP JSON payload we sent to Ollama.
|
|
300
|
+
if not generate_response.metadata:
|
|
301
|
+
generate_response.metadata = {}
|
|
302
|
+
generate_response.metadata["_provider_request"] = {
|
|
303
|
+
"url": f"{self.base_url}{endpoint}",
|
|
304
|
+
"payload": payload,
|
|
305
|
+
}
|
|
295
306
|
|
|
296
307
|
# Attach media metadata if available
|
|
297
308
|
if media_metadata:
|
|
@@ -314,12 +325,8 @@ class OllamaProvider(BaseProvider):
|
|
|
314
325
|
available_models = self.list_available_models(base_url=self.base_url)
|
|
315
326
|
error_message = format_model_error("Ollama", self.model, available_models)
|
|
316
327
|
raise ModelNotFoundError(error_message)
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
content=f"Error: {str(e)}",
|
|
320
|
-
model=self.model,
|
|
321
|
-
finish_reason="error"
|
|
322
|
-
)
|
|
328
|
+
# Let BaseProvider normalize (timeouts/connectivity/etc.) consistently.
|
|
329
|
+
raise
|
|
323
330
|
|
|
324
331
|
def _stream_generate(self, endpoint: str, payload: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
|
|
325
332
|
"""Generate streaming response with tool tag rewriting support"""
|
|
@@ -367,7 +374,13 @@ class OllamaProvider(BaseProvider):
|
|
|
367
374
|
content=content,
|
|
368
375
|
model=self.model,
|
|
369
376
|
finish_reason="stop" if done else None,
|
|
370
|
-
raw_response=chunk
|
|
377
|
+
raw_response=chunk,
|
|
378
|
+
metadata={
|
|
379
|
+
"_provider_request": {
|
|
380
|
+
"url": f"{self.base_url}{endpoint}",
|
|
381
|
+
"payload": payload,
|
|
382
|
+
}
|
|
383
|
+
},
|
|
371
384
|
)
|
|
372
385
|
|
|
373
386
|
yield chunk_response
|
|
@@ -416,13 +429,16 @@ class OllamaProvider(BaseProvider):
|
|
|
416
429
|
**kwargs):
|
|
417
430
|
"""Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
|
|
418
431
|
# Handle tools for prompted models
|
|
419
|
-
|
|
432
|
+
final_system_prompt = system_prompt
|
|
420
433
|
if tools and self.tool_handler.supports_prompted:
|
|
421
|
-
|
|
422
|
-
if
|
|
423
|
-
|
|
434
|
+
include_tool_list = True
|
|
435
|
+
if final_system_prompt and "## Tools (session)" in final_system_prompt:
|
|
436
|
+
include_tool_list = False
|
|
437
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
|
|
438
|
+
if final_system_prompt:
|
|
439
|
+
final_system_prompt = f"{final_system_prompt}\n\n{tool_prompt}"
|
|
424
440
|
else:
|
|
425
|
-
|
|
441
|
+
final_system_prompt = tool_prompt
|
|
426
442
|
|
|
427
443
|
# Build request payload (same logic as sync)
|
|
428
444
|
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
@@ -453,10 +469,10 @@ class OllamaProvider(BaseProvider):
|
|
|
453
469
|
if use_chat_format:
|
|
454
470
|
payload["messages"] = []
|
|
455
471
|
|
|
456
|
-
if
|
|
472
|
+
if final_system_prompt:
|
|
457
473
|
payload["messages"].append({
|
|
458
474
|
"role": "system",
|
|
459
|
-
"content":
|
|
475
|
+
"content": final_system_prompt
|
|
460
476
|
})
|
|
461
477
|
|
|
462
478
|
if messages:
|
|
@@ -486,8 +502,8 @@ class OllamaProvider(BaseProvider):
|
|
|
486
502
|
endpoint = "/api/chat"
|
|
487
503
|
else:
|
|
488
504
|
full_prompt = prompt
|
|
489
|
-
if
|
|
490
|
-
full_prompt = f"{
|
|
505
|
+
if final_system_prompt:
|
|
506
|
+
full_prompt = f"{final_system_prompt}\n\n{prompt}"
|
|
491
507
|
payload["prompt"] = full_prompt
|
|
492
508
|
endpoint = "/api/generate"
|
|
493
509
|
|
|
@@ -809,4 +825,4 @@ class OllamaProvider(BaseProvider):
|
|
|
809
825
|
|
|
810
826
|
except Exception as e:
|
|
811
827
|
self.logger.error(f"Failed to generate embeddings: {e}")
|
|
812
|
-
raise ProviderAPIError(f"Ollama embedding error: {str(e)}")
|
|
828
|
+
raise ProviderAPIError(f"Ollama embedding error: {str(e)}")
|