abstractcore 2.6.9__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. abstractcore/apps/summarizer.py +69 -27
  2. abstractcore/architectures/detection.py +190 -25
  3. abstractcore/assets/architecture_formats.json +129 -6
  4. abstractcore/assets/model_capabilities.json +803 -141
  5. abstractcore/config/main.py +2 -2
  6. abstractcore/config/manager.py +3 -1
  7. abstractcore/events/__init__.py +7 -1
  8. abstractcore/mcp/__init__.py +30 -0
  9. abstractcore/mcp/client.py +213 -0
  10. abstractcore/mcp/factory.py +64 -0
  11. abstractcore/mcp/naming.py +28 -0
  12. abstractcore/mcp/stdio_client.py +336 -0
  13. abstractcore/mcp/tool_source.py +164 -0
  14. abstractcore/processing/__init__.py +2 -2
  15. abstractcore/processing/basic_deepsearch.py +1 -1
  16. abstractcore/processing/basic_summarizer.py +379 -93
  17. abstractcore/providers/anthropic_provider.py +91 -10
  18. abstractcore/providers/base.py +540 -16
  19. abstractcore/providers/huggingface_provider.py +17 -8
  20. abstractcore/providers/lmstudio_provider.py +170 -25
  21. abstractcore/providers/mlx_provider.py +13 -10
  22. abstractcore/providers/ollama_provider.py +42 -26
  23. abstractcore/providers/openai_compatible_provider.py +87 -22
  24. abstractcore/providers/openai_provider.py +12 -9
  25. abstractcore/providers/streaming.py +201 -39
  26. abstractcore/providers/vllm_provider.py +78 -21
  27. abstractcore/server/app.py +116 -30
  28. abstractcore/structured/retry.py +20 -7
  29. abstractcore/tools/__init__.py +46 -24
  30. abstractcore/tools/abstractignore.py +166 -0
  31. abstractcore/tools/arg_canonicalizer.py +61 -0
  32. abstractcore/tools/common_tools.py +2443 -742
  33. abstractcore/tools/core.py +109 -13
  34. abstractcore/tools/handler.py +17 -3
  35. abstractcore/tools/parser.py +894 -159
  36. abstractcore/tools/registry.py +122 -18
  37. abstractcore/tools/syntax_rewriter.py +68 -6
  38. abstractcore/tools/tag_rewriter.py +186 -1
  39. abstractcore/utils/jsonish.py +111 -0
  40. abstractcore/utils/version.py +1 -1
  41. {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/METADATA +56 -2
  42. {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/RECORD +46 -37
  43. {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/WHEEL +0 -0
  44. {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/entry_points.txt +0 -0
  45. {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/licenses/LICENSE +0 -0
  46. {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/top_level.txt +0 -0
@@ -1287,7 +1287,13 @@ class HuggingFaceProvider(BaseProvider):
1287
1287
 
1288
1288
  elif self.tool_handler.supports_prompted:
1289
1289
  # Add tools as system prompt for prompted models
1290
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
1290
+ system_text = (
1291
+ chat_messages[0].get("content", "")
1292
+ if chat_messages and chat_messages[0].get("role") == "system"
1293
+ else ""
1294
+ )
1295
+ include_tool_list = "## Tools (session)" not in str(system_text)
1296
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
1291
1297
  if chat_messages and chat_messages[0]["role"] == "system":
1292
1298
  chat_messages[0]["content"] += f"\n\n{tool_prompt}"
1293
1299
  else:
@@ -1577,21 +1583,24 @@ class HuggingFaceProvider(BaseProvider):
1577
1583
  """Build input text for transformers model with tool support"""
1578
1584
 
1579
1585
  # Add tools to system prompt if provided
1580
- enhanced_system_prompt = system_prompt
1586
+ final_system_prompt = system_prompt
1581
1587
  if tools and self.tool_handler.supports_prompted:
1582
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
1583
- if enhanced_system_prompt:
1584
- enhanced_system_prompt += f"\n\n{tool_prompt}"
1588
+ include_tool_list = True
1589
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
1590
+ include_tool_list = False
1591
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
1592
+ if final_system_prompt:
1593
+ final_system_prompt += f"\n\n{tool_prompt}"
1585
1594
  else:
1586
- enhanced_system_prompt = tool_prompt
1595
+ final_system_prompt = tool_prompt
1587
1596
 
1588
1597
  # Check if model has chat template
1589
1598
  if hasattr(self.tokenizer, 'chat_template') and self.tokenizer.chat_template:
1590
1599
  # Use chat template if available
1591
1600
  chat_messages = []
1592
1601
 
1593
- if enhanced_system_prompt:
1594
- chat_messages.append({"role": "system", "content": enhanced_system_prompt})
1602
+ if final_system_prompt:
1603
+ chat_messages.append({"role": "system", "content": final_system_prompt})
1595
1604
 
1596
1605
  if messages:
1597
1606
  chat_messages.extend(messages)
@@ -16,7 +16,13 @@ except ImportError:
16
16
  BaseModel = None
17
17
  from .base import BaseProvider
18
18
  from ..core.types import GenerateResponse
19
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
19
+ from ..exceptions import (
20
+ ProviderAPIError,
21
+ ModelNotFoundError,
22
+ InvalidRequestError,
23
+ format_model_error,
24
+ format_provider_error,
25
+ )
20
26
  from ..tools import UniversalToolHandler, execute_tools
21
27
  from ..events import EventType
22
28
 
@@ -49,7 +55,16 @@ class LMStudioProvider(BaseProvider):
49
55
  except Exception as e:
50
56
  # Fallback with default timeout if client creation fails
51
57
  try:
52
- self.client = httpx.Client(timeout=300.0)
58
+ fallback_timeout = None
59
+ try:
60
+ from ..config.manager import get_config_manager
61
+
62
+ fallback_timeout = float(get_config_manager().get_default_timeout())
63
+ except Exception:
64
+ fallback_timeout = 7200.0
65
+ if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
66
+ fallback_timeout = None
67
+ self.client = httpx.Client(timeout=fallback_timeout)
53
68
  except Exception:
54
69
  raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
55
70
 
@@ -142,19 +157,24 @@ class LMStudioProvider(BaseProvider):
142
157
  chat_messages = []
143
158
 
144
159
  # Add tools to system prompt if provided
145
- enhanced_system_prompt = system_prompt
146
- if tools and self.tool_handler.supports_prompted:
147
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
148
- if enhanced_system_prompt:
149
- enhanced_system_prompt += f"\n\n{tool_prompt}"
160
+ final_system_prompt = system_prompt
161
+ # Prefer native tools when the model supports them. Only inject a prompted tool list
162
+ # when native tool calling is not available.
163
+ if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
164
+ include_tool_list = True
165
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
166
+ include_tool_list = False
167
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
168
+ if final_system_prompt:
169
+ final_system_prompt += f"\n\n{tool_prompt}"
150
170
  else:
151
- enhanced_system_prompt = tool_prompt
171
+ final_system_prompt = tool_prompt
152
172
 
153
173
  # Add system message if provided
154
- if enhanced_system_prompt:
174
+ if final_system_prompt:
155
175
  chat_messages.append({
156
176
  "role": "system",
157
- "content": enhanced_system_prompt
177
+ "content": final_system_prompt
158
178
  })
159
179
 
160
180
  # Add conversation history
@@ -231,6 +251,11 @@ class LMStudioProvider(BaseProvider):
231
251
  "max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
232
252
  "top_p": kwargs.get("top_p", 0.9),
233
253
  }
254
+
255
+ # Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
256
+ if tools and self.tool_handler.supports_native:
257
+ payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
258
+ payload["tool_choice"] = kwargs.get("tool_choice", "auto")
234
259
 
235
260
  # Add additional generation parameters if provided (OpenAI-compatible)
236
261
  if "frequency_penalty" in kwargs:
@@ -280,8 +305,9 @@ class LMStudioProvider(BaseProvider):
280
305
 
281
306
  # Track generation time
282
307
  start_time = time.time()
308
+ request_url = f"{self.base_url}/chat/completions"
283
309
  response = self.client.post(
284
- f"{self.base_url}/chat/completions",
310
+ request_url,
285
311
  json=payload,
286
312
  headers={"Content-Type": "application/json"}
287
313
  )
@@ -293,20 +319,42 @@ class LMStudioProvider(BaseProvider):
293
319
  # Extract response from OpenAI format
294
320
  if "choices" in result and len(result["choices"]) > 0:
295
321
  choice = result["choices"][0]
296
- content = choice.get("message", {}).get("content", "")
322
+ message = choice.get("message") or {}
323
+ if not isinstance(message, dict):
324
+ message = {}
325
+
326
+ content = message.get("content", "")
327
+ reasoning = message.get("reasoning")
328
+ tool_calls = message.get("tool_calls")
329
+ if tool_calls is None:
330
+ # Some servers surface tool calls at the choice level.
331
+ tool_calls = choice.get("tool_calls")
297
332
  finish_reason = choice.get("finish_reason", "stop")
298
333
  else:
299
334
  content = "No response generated"
335
+ reasoning = None
336
+ tool_calls = None
300
337
  finish_reason = "error"
301
338
 
302
339
  # Extract usage info
303
340
  usage = result.get("usage", {})
304
341
 
342
+ metadata = {}
343
+ if isinstance(reasoning, str) and reasoning.strip():
344
+ metadata["reasoning"] = reasoning
345
+ # Runtime observability: capture the exact HTTP JSON payload we sent.
346
+ metadata["_provider_request"] = {
347
+ "url": request_url,
348
+ "payload": payload,
349
+ }
350
+
305
351
  return GenerateResponse(
306
352
  content=content,
307
353
  model=self.model,
308
354
  finish_reason=finish_reason,
309
355
  raw_response=result,
356
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
357
+ metadata=metadata or None,
310
358
  usage={
311
359
  "input_tokens": usage.get("prompt_tokens", 0),
312
360
  "output_tokens": usage.get("completion_tokens", 0),
@@ -318,6 +366,44 @@ class LMStudioProvider(BaseProvider):
318
366
  gen_time=gen_time
319
367
  )
320
368
 
369
+ except httpx.HTTPStatusError as e:
370
+ # Improve debuggability: include LMStudio's error response body (often a JSON error envelope).
371
+ resp = getattr(e, "response", None)
372
+ status = getattr(resp, "status_code", None)
373
+
374
+ body_text = ""
375
+ try:
376
+ if resp is not None:
377
+ # Try to extract a structured error message if the server returns JSON.
378
+ try:
379
+ j = resp.json()
380
+ if isinstance(j, dict):
381
+ err = j.get("error")
382
+ if isinstance(err, dict):
383
+ msg = err.get("message") or err.get("error") or err.get("detail")
384
+ if isinstance(msg, str) and msg.strip():
385
+ body_text = msg.strip()
386
+ if not body_text:
387
+ msg2 = j.get("message") or j.get("detail")
388
+ if isinstance(msg2, str) and msg2.strip():
389
+ body_text = msg2.strip()
390
+ if not body_text:
391
+ body_text = json.dumps(j, ensure_ascii=False)
392
+ except Exception:
393
+ body_text = str(getattr(resp, "text", "") or "").strip()
394
+ except Exception:
395
+ body_text = ""
396
+
397
+ if body_text and len(body_text) > 2000:
398
+ body_text = body_text[:2000] + "…"
399
+
400
+ # Preserve classification for BaseProvider error normalization.
401
+ base = str(e)
402
+ detail = f"{base} | response={body_text}" if body_text else base
403
+ if isinstance(status, int) and 400 <= status < 500:
404
+ raise InvalidRequestError(detail)
405
+ raise ProviderAPIError(detail)
406
+
321
407
  except AttributeError as e:
322
408
  # Handle None type errors specifically
323
409
  if "'NoneType'" in str(e):
@@ -336,7 +422,7 @@ class LMStudioProvider(BaseProvider):
336
422
  # If model discovery also fails, provide a generic error
337
423
  raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio and could not fetch available models")
338
424
  else:
339
- raise ProviderAPIError(f"LMStudio API error: {str(e)}")
425
+ raise
340
426
 
341
427
  def _stream_generate(self, payload: Dict[str, Any]) -> Iterator[GenerateResponse]:
342
428
  """Generate streaming response"""
@@ -368,14 +454,24 @@ class LMStudioProvider(BaseProvider):
368
454
  if "choices" in chunk and len(chunk["choices"]) > 0:
369
455
  choice = chunk["choices"][0]
370
456
  delta = choice.get("delta", {})
457
+ if not isinstance(delta, dict):
458
+ delta = {}
371
459
  content = delta.get("content", "")
460
+ reasoning = delta.get("reasoning")
461
+ tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
372
462
  finish_reason = choice.get("finish_reason")
373
463
 
464
+ metadata = {}
465
+ if isinstance(reasoning, str) and reasoning.strip():
466
+ metadata["reasoning"] = reasoning
467
+
374
468
  yield GenerateResponse(
375
469
  content=content,
376
470
  model=self.model,
377
471
  finish_reason=finish_reason,
378
- raw_response=chunk
472
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
473
+ metadata=metadata or None,
474
+ raw_response=chunk,
379
475
  )
380
476
 
381
477
  except json.JSONDecodeError:
@@ -405,19 +501,23 @@ class LMStudioProvider(BaseProvider):
405
501
  chat_messages = []
406
502
 
407
503
  # Add tools to system prompt if provided
408
- enhanced_system_prompt = system_prompt
409
- if tools and self.tool_handler.supports_prompted:
410
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
411
- if enhanced_system_prompt:
412
- enhanced_system_prompt += f"\n\n{tool_prompt}"
504
+ final_system_prompt = system_prompt
505
+ # Prefer native tools when available; only inject prompted tool syntax as fallback.
506
+ if tools and self.tool_handler.supports_prompted and not self.tool_handler.supports_native:
507
+ include_tool_list = True
508
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
509
+ include_tool_list = False
510
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
511
+ if final_system_prompt:
512
+ final_system_prompt += f"\n\n{tool_prompt}"
413
513
  else:
414
- enhanced_system_prompt = tool_prompt
514
+ final_system_prompt = tool_prompt
415
515
 
416
516
  # Add system message if provided
417
- if enhanced_system_prompt:
517
+ if final_system_prompt:
418
518
  chat_messages.append({
419
519
  "role": "system",
420
- "content": enhanced_system_prompt
520
+ "content": final_system_prompt
421
521
  })
422
522
 
423
523
  # Add conversation history
@@ -473,6 +573,11 @@ class LMStudioProvider(BaseProvider):
473
573
  "top_p": kwargs.get("top_p", 0.9),
474
574
  }
475
575
 
576
+ # Native tools (OpenAI-compatible): send structured tools/tool_choice when supported.
577
+ if tools and self.tool_handler.supports_native:
578
+ payload["tools"] = self.tool_handler.prepare_tools_for_native(tools)
579
+ payload["tool_choice"] = kwargs.get("tool_choice", "auto")
580
+
476
581
  # Add additional parameters
477
582
  if "frequency_penalty" in kwargs:
478
583
  payload["frequency_penalty"] = kwargs["frequency_penalty"]
@@ -513,8 +618,9 @@ class LMStudioProvider(BaseProvider):
513
618
  try:
514
619
  # Track generation time
515
620
  start_time = time.time()
621
+ request_url = f"{self.base_url}/chat/completions"
516
622
  response = await self.async_client.post(
517
- f"{self.base_url}/chat/completions",
623
+ request_url,
518
624
  json=payload,
519
625
  headers={"Content-Type": "application/json"}
520
626
  )
@@ -526,20 +632,40 @@ class LMStudioProvider(BaseProvider):
526
632
  # Extract response from OpenAI format
527
633
  if "choices" in result and len(result["choices"]) > 0:
528
634
  choice = result["choices"][0]
529
- content = choice.get("message", {}).get("content", "")
635
+ message = choice.get("message") or {}
636
+ if not isinstance(message, dict):
637
+ message = {}
638
+
639
+ content = message.get("content", "")
640
+ reasoning = message.get("reasoning")
641
+ tool_calls = message.get("tool_calls")
642
+ if tool_calls is None:
643
+ tool_calls = choice.get("tool_calls")
530
644
  finish_reason = choice.get("finish_reason", "stop")
531
645
  else:
532
646
  content = "No response generated"
647
+ reasoning = None
648
+ tool_calls = None
533
649
  finish_reason = "error"
534
650
 
535
651
  # Extract usage info
536
652
  usage = result.get("usage", {})
537
653
 
654
+ metadata = {}
655
+ if isinstance(reasoning, str) and reasoning.strip():
656
+ metadata["reasoning"] = reasoning
657
+ metadata["_provider_request"] = {
658
+ "url": request_url,
659
+ "payload": payload,
660
+ }
661
+
538
662
  return GenerateResponse(
539
663
  content=content,
540
664
  model=self.model,
541
665
  finish_reason=finish_reason,
542
666
  raw_response=result,
667
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
668
+ metadata=metadata or None,
543
669
  usage={
544
670
  "input_tokens": usage.get("prompt_tokens", 0),
545
671
  "output_tokens": usage.get("completion_tokens", 0),
@@ -589,13 +715,23 @@ class LMStudioProvider(BaseProvider):
589
715
  if "choices" in chunk and len(chunk["choices"]) > 0:
590
716
  choice = chunk["choices"][0]
591
717
  delta = choice.get("delta", {})
718
+ if not isinstance(delta, dict):
719
+ delta = {}
592
720
  content = delta.get("content", "")
721
+ reasoning = delta.get("reasoning")
722
+ tool_calls = delta.get("tool_calls") or choice.get("tool_calls")
593
723
  finish_reason = choice.get("finish_reason")
594
724
 
725
+ metadata = {}
726
+ if isinstance(reasoning, str) and reasoning.strip():
727
+ metadata["reasoning"] = reasoning
728
+
595
729
  yield GenerateResponse(
596
730
  content=content,
597
731
  model=self.model,
598
732
  finish_reason=finish_reason,
733
+ tool_calls=tool_calls if isinstance(tool_calls, list) else None,
734
+ metadata=metadata or None,
599
735
  raw_response=chunk
600
736
  )
601
737
 
@@ -648,7 +784,16 @@ class LMStudioProvider(BaseProvider):
648
784
  self.logger.warning(f"Failed to update HTTP client timeout: {e}")
649
785
  # Try to create a new client with default timeout
650
786
  try:
651
- self.client = httpx.Client(timeout=300.0)
787
+ fallback_timeout = None
788
+ try:
789
+ from ..config.manager import get_config_manager
790
+
791
+ fallback_timeout = float(get_config_manager().get_default_timeout())
792
+ except Exception:
793
+ fallback_timeout = 7200.0
794
+ if isinstance(fallback_timeout, (int, float)) and float(fallback_timeout) <= 0:
795
+ fallback_timeout = None
796
+ self.client = httpx.Client(timeout=fallback_timeout)
652
797
  except Exception:
653
798
  pass # Best effort - don't fail the operation
654
799
 
@@ -291,21 +291,24 @@ class MLXProvider(BaseProvider):
291
291
  """Build prompt for MLX model with tool support"""
292
292
 
293
293
  # Add tools to system prompt if provided
294
- enhanced_system_prompt = system_prompt
294
+ final_system_prompt = system_prompt
295
295
  if tools and self.tool_handler.supports_prompted:
296
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
297
- if enhanced_system_prompt:
298
- enhanced_system_prompt += f"\n\n{tool_prompt}"
296
+ include_tool_list = True
297
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
298
+ include_tool_list = False
299
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
300
+ if final_system_prompt:
301
+ final_system_prompt += f"\n\n{tool_prompt}"
299
302
  else:
300
- enhanced_system_prompt = tool_prompt
303
+ final_system_prompt = tool_prompt
301
304
 
302
305
  # For Qwen models, use chat template format
303
306
  if "qwen" in self.model.lower():
304
307
  full_prompt = ""
305
308
 
306
309
  # Add system prompt
307
- if enhanced_system_prompt:
308
- full_prompt += f"<|im_start|>system\n{enhanced_system_prompt}<|im_end|>\n"
310
+ if final_system_prompt:
311
+ full_prompt += f"<|im_start|>system\n{final_system_prompt}<|im_end|>\n"
309
312
 
310
313
  # Add conversation history
311
314
  if messages:
@@ -321,8 +324,8 @@ class MLXProvider(BaseProvider):
321
324
  else:
322
325
  # Generic format for other models
323
326
  full_prompt = prompt
324
- if enhanced_system_prompt:
325
- full_prompt = f"{enhanced_system_prompt}\n\n{prompt}"
327
+ if final_system_prompt:
328
+ full_prompt = f"{final_system_prompt}\n\n{prompt}"
326
329
 
327
330
  # Add conversation context if provided
328
331
  if messages:
@@ -541,4 +544,4 @@ class MLXProvider(BaseProvider):
541
544
  return models
542
545
 
543
546
  except Exception:
544
- return []
547
+ return []
@@ -148,13 +148,16 @@ class OllamaProvider(BaseProvider):
148
148
  """Internal generation with Ollama"""
149
149
 
150
150
  # Handle tools for prompted models
151
- effective_system_prompt = system_prompt
151
+ final_system_prompt = system_prompt
152
152
  if tools and self.tool_handler.supports_prompted:
153
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
154
- if effective_system_prompt:
155
- effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
153
+ include_tool_list = True
154
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
155
+ include_tool_list = False
156
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
157
+ if final_system_prompt:
158
+ final_system_prompt = f"{final_system_prompt}\n\n{tool_prompt}"
156
159
  else:
157
- effective_system_prompt = tool_prompt
160
+ final_system_prompt = tool_prompt
158
161
 
159
162
  # Build request payload using unified system
160
163
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
@@ -189,10 +192,10 @@ class OllamaProvider(BaseProvider):
189
192
  payload["messages"] = []
190
193
 
191
194
  # Add system message if provided
192
- if effective_system_prompt:
195
+ if final_system_prompt:
193
196
  payload["messages"].append({
194
197
  "role": "system",
195
- "content": effective_system_prompt
198
+ "content": final_system_prompt
196
199
  })
197
200
 
198
201
  # Add conversation history (converted to Ollama-compatible format)
@@ -245,8 +248,8 @@ class OllamaProvider(BaseProvider):
245
248
  else:
246
249
  # Use generate format for single prompt (legacy fallback)
247
250
  full_prompt = prompt
248
- if effective_system_prompt:
249
- full_prompt = f"{effective_system_prompt}\n\n{prompt}"
251
+ if final_system_prompt:
252
+ full_prompt = f"{final_system_prompt}\n\n{prompt}"
250
253
 
251
254
  payload["prompt"] = full_prompt
252
255
  endpoint = "/api/generate"
@@ -292,6 +295,14 @@ class OllamaProvider(BaseProvider):
292
295
  },
293
296
  gen_time=gen_time
294
297
  )
298
+
299
+ # Runtime observability: capture the exact HTTP JSON payload we sent to Ollama.
300
+ if not generate_response.metadata:
301
+ generate_response.metadata = {}
302
+ generate_response.metadata["_provider_request"] = {
303
+ "url": f"{self.base_url}{endpoint}",
304
+ "payload": payload,
305
+ }
295
306
 
296
307
  # Attach media metadata if available
297
308
  if media_metadata:
@@ -314,12 +325,8 @@ class OllamaProvider(BaseProvider):
314
325
  available_models = self.list_available_models(base_url=self.base_url)
315
326
  error_message = format_model_error("Ollama", self.model, available_models)
316
327
  raise ModelNotFoundError(error_message)
317
- else:
318
- return GenerateResponse(
319
- content=f"Error: {str(e)}",
320
- model=self.model,
321
- finish_reason="error"
322
- )
328
+ # Let BaseProvider normalize (timeouts/connectivity/etc.) consistently.
329
+ raise
323
330
 
324
331
  def _stream_generate(self, endpoint: str, payload: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
325
332
  """Generate streaming response with tool tag rewriting support"""
@@ -367,7 +374,13 @@ class OllamaProvider(BaseProvider):
367
374
  content=content,
368
375
  model=self.model,
369
376
  finish_reason="stop" if done else None,
370
- raw_response=chunk
377
+ raw_response=chunk,
378
+ metadata={
379
+ "_provider_request": {
380
+ "url": f"{self.base_url}{endpoint}",
381
+ "payload": payload,
382
+ }
383
+ },
371
384
  )
372
385
 
373
386
  yield chunk_response
@@ -416,13 +429,16 @@ class OllamaProvider(BaseProvider):
416
429
  **kwargs):
417
430
  """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
418
431
  # Handle tools for prompted models
419
- effective_system_prompt = system_prompt
432
+ final_system_prompt = system_prompt
420
433
  if tools and self.tool_handler.supports_prompted:
421
- tool_prompt = self.tool_handler.format_tools_prompt(tools)
422
- if effective_system_prompt:
423
- effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
434
+ include_tool_list = True
435
+ if final_system_prompt and "## Tools (session)" in final_system_prompt:
436
+ include_tool_list = False
437
+ tool_prompt = self.tool_handler.format_tools_prompt(tools, include_tool_list=include_tool_list)
438
+ if final_system_prompt:
439
+ final_system_prompt = f"{final_system_prompt}\n\n{tool_prompt}"
424
440
  else:
425
- effective_system_prompt = tool_prompt
441
+ final_system_prompt = tool_prompt
426
442
 
427
443
  # Build request payload (same logic as sync)
428
444
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
@@ -453,10 +469,10 @@ class OllamaProvider(BaseProvider):
453
469
  if use_chat_format:
454
470
  payload["messages"] = []
455
471
 
456
- if effective_system_prompt:
472
+ if final_system_prompt:
457
473
  payload["messages"].append({
458
474
  "role": "system",
459
- "content": effective_system_prompt
475
+ "content": final_system_prompt
460
476
  })
461
477
 
462
478
  if messages:
@@ -486,8 +502,8 @@ class OllamaProvider(BaseProvider):
486
502
  endpoint = "/api/chat"
487
503
  else:
488
504
  full_prompt = prompt
489
- if effective_system_prompt:
490
- full_prompt = f"{effective_system_prompt}\n\n{prompt}"
505
+ if final_system_prompt:
506
+ full_prompt = f"{final_system_prompt}\n\n{prompt}"
491
507
  payload["prompt"] = full_prompt
492
508
  endpoint = "/api/generate"
493
509
 
@@ -809,4 +825,4 @@ class OllamaProvider(BaseProvider):
809
825
 
810
826
  except Exception as e:
811
827
  self.logger.error(f"Failed to generate embeddings: {e}")
812
- raise ProviderAPIError(f"Ollama embedding error: {str(e)}")
828
+ raise ProviderAPIError(f"Ollama embedding error: {str(e)}")