abstractcore 2.5.3__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. abstractcore/__init__.py +7 -1
  2. abstractcore/architectures/detection.py +2 -2
  3. abstractcore/core/retry.py +2 -2
  4. abstractcore/core/session.py +132 -1
  5. abstractcore/download.py +253 -0
  6. abstractcore/embeddings/manager.py +2 -2
  7. abstractcore/events/__init__.py +112 -1
  8. abstractcore/exceptions/__init__.py +49 -2
  9. abstractcore/media/processors/office_processor.py +2 -2
  10. abstractcore/media/utils/image_scaler.py +2 -2
  11. abstractcore/media/vision_fallback.py +2 -2
  12. abstractcore/providers/anthropic_provider.py +200 -6
  13. abstractcore/providers/base.py +100 -5
  14. abstractcore/providers/lmstudio_provider.py +246 -2
  15. abstractcore/providers/ollama_provider.py +244 -2
  16. abstractcore/providers/openai_provider.py +258 -6
  17. abstractcore/providers/streaming.py +2 -2
  18. abstractcore/tools/common_tools.py +2 -2
  19. abstractcore/tools/handler.py +2 -2
  20. abstractcore/tools/parser.py +2 -2
  21. abstractcore/tools/registry.py +2 -2
  22. abstractcore/tools/syntax_rewriter.py +2 -2
  23. abstractcore/tools/tag_rewriter.py +3 -3
  24. abstractcore/utils/self_fixes.py +2 -2
  25. abstractcore/utils/version.py +1 -1
  26. {abstractcore-2.5.3.dist-info → abstractcore-2.6.0.dist-info}/METADATA +102 -4
  27. {abstractcore-2.5.3.dist-info → abstractcore-2.6.0.dist-info}/RECORD +31 -30
  28. {abstractcore-2.5.3.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
  29. {abstractcore-2.5.3.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
  30. {abstractcore-2.5.3.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
  31. {abstractcore-2.5.3.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ LM Studio provider implementation (OpenAI-compatible API).
5
5
  import httpx
6
6
  import json
7
7
  import time
8
- from typing import List, Dict, Any, Optional, Union, Iterator, Type
8
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
9
9
 
10
10
  try:
11
11
  from pydantic import BaseModel
@@ -15,7 +15,7 @@ except ImportError:
15
15
  BaseModel = None
16
16
  from .base import BaseProvider
17
17
  from ..core.types import GenerateResponse
18
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
18
+ from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
19
19
  from ..tools import UniversalToolHandler, execute_tools
20
20
  from ..events import EventType
21
21
 
@@ -47,9 +47,21 @@ class LMStudioProvider(BaseProvider):
47
47
  except Exception:
48
48
  raise RuntimeError(f"Failed to create HTTP client for LMStudio: {e}")
49
49
 
50
+ self._async_client = None # Lazy-loaded async client
51
+
50
52
  # Validate model exists in LMStudio
51
53
  self._validate_model()
52
54
 
55
+ @property
56
+ def async_client(self):
57
+ """Lazy-load async HTTP client for native async operations."""
58
+ if self._async_client is None:
59
+ timeout_value = getattr(self, '_timeout', None)
60
+ if timeout_value is not None and timeout_value <= 0:
61
+ timeout_value = None
62
+ self._async_client = httpx.AsyncClient(timeout=timeout_value)
63
+ return self._async_client
64
+
53
65
  def _validate_model(self):
54
66
  """Validate that the model exists in LMStudio"""
55
67
  try:
@@ -87,6 +99,17 @@ class LMStudioProvider(BaseProvider):
87
99
  if hasattr(self, 'client') and self.client is not None:
88
100
  self.client.close()
89
101
 
102
+ # Close async client if it was created
103
+ if self._async_client is not None:
104
+ import asyncio
105
+ try:
106
+ loop = asyncio.get_running_loop()
107
+ loop.create_task(self._async_client.aclose())
108
+ except RuntimeError:
109
+ # No running loop
110
+ import asyncio
111
+ asyncio.run(self._async_client.aclose())
112
+
90
113
  except Exception as e:
91
114
  # Log but don't raise - unload should be best-effort
92
115
  if hasattr(self, 'logger'):
@@ -359,6 +382,227 @@ class LMStudioProvider(BaseProvider):
359
382
  finish_reason="error"
360
383
  )
361
384
 
385
+ async def _agenerate_internal(self,
386
+ prompt: str,
387
+ messages: Optional[List[Dict[str, str]]] = None,
388
+ system_prompt: Optional[str] = None,
389
+ tools: Optional[List[Dict[str, Any]]] = None,
390
+ media: Optional[List['MediaContent']] = None,
391
+ stream: bool = False,
392
+ response_model: Optional[Type[BaseModel]] = None,
393
+ execute_tools: Optional[bool] = None,
394
+ tool_call_tags: Optional[str] = None,
395
+ **kwargs) -> Union[GenerateResponse, AsyncIterator[GenerateResponse]]:
396
+ """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
397
+
398
+ # Build messages for chat completions with tool support (same logic as sync)
399
+ chat_messages = []
400
+
401
+ # Add tools to system prompt if provided
402
+ enhanced_system_prompt = system_prompt
403
+ if tools and self.tool_handler.supports_prompted:
404
+ tool_prompt = self.tool_handler.format_tools_prompt(tools)
405
+ if enhanced_system_prompt:
406
+ enhanced_system_prompt += f"\n\n{tool_prompt}"
407
+ else:
408
+ enhanced_system_prompt = tool_prompt
409
+
410
+ # Add system message if provided
411
+ if enhanced_system_prompt:
412
+ chat_messages.append({
413
+ "role": "system",
414
+ "content": enhanced_system_prompt
415
+ })
416
+
417
+ # Add conversation history
418
+ if messages:
419
+ chat_messages.extend(messages)
420
+
421
+ # Handle media content
422
+ if media:
423
+ user_message_text = prompt.strip() if prompt else ""
424
+ if not user_message_text and chat_messages:
425
+ for msg in reversed(chat_messages):
426
+ if msg.get("role") == "user" and msg.get("content"):
427
+ user_message_text = msg["content"]
428
+ break
429
+ try:
430
+ processed_media = self._process_media_content(media)
431
+ media_handler = self._get_media_handler_for_model(self.model)
432
+ multimodal_message = media_handler.create_multimodal_message(user_message_text, processed_media)
433
+
434
+ if isinstance(multimodal_message, str):
435
+ if chat_messages and chat_messages[-1].get("role") == "user":
436
+ chat_messages[-1]["content"] = multimodal_message
437
+ else:
438
+ chat_messages.append({"role": "user", "content": multimodal_message})
439
+ else:
440
+ if chat_messages and chat_messages[-1].get("role") == "user":
441
+ chat_messages[-1] = multimodal_message
442
+ else:
443
+ chat_messages.append(multimodal_message)
444
+ except ImportError:
445
+ self.logger.warning("Media processing not available. Install with: pip install abstractcore[media]")
446
+ if user_message_text:
447
+ chat_messages.append({"role": "user", "content": user_message_text})
448
+ except Exception as e:
449
+ self.logger.warning(f"Failed to process media content: {e}")
450
+ if user_message_text:
451
+ chat_messages.append({"role": "user", "content": user_message_text})
452
+
453
+ # Add prompt as separate message if provided
454
+ elif prompt and prompt.strip():
455
+ chat_messages.append({"role": "user", "content": prompt})
456
+
457
+ # Build request payload
458
+ generation_kwargs = self._prepare_generation_kwargs(**kwargs)
459
+ max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
460
+
461
+ payload = {
462
+ "model": self.model,
463
+ "messages": chat_messages,
464
+ "stream": stream,
465
+ "temperature": kwargs.get("temperature", self.temperature),
466
+ "max_tokens": max_output_tokens,
467
+ "top_p": kwargs.get("top_p", 0.9),
468
+ }
469
+
470
+ # Add additional parameters
471
+ if "frequency_penalty" in kwargs:
472
+ payload["frequency_penalty"] = kwargs["frequency_penalty"]
473
+ if "presence_penalty" in kwargs:
474
+ payload["presence_penalty"] = kwargs["presence_penalty"]
475
+ if "repetition_penalty" in kwargs:
476
+ payload["repetition_penalty"] = kwargs["repetition_penalty"]
477
+
478
+ # Add seed if provided
479
+ seed_value = kwargs.get("seed", self.seed)
480
+ if seed_value is not None:
481
+ payload["seed"] = seed_value
482
+
483
+ # Add structured output support
484
+ if response_model and PYDANTIC_AVAILABLE:
485
+ json_schema = response_model.model_json_schema()
486
+ payload["response_format"] = {
487
+ "type": "json_schema",
488
+ "json_schema": {
489
+ "name": response_model.__name__,
490
+ "schema": json_schema
491
+ }
492
+ }
493
+
494
+ if stream:
495
+ return self._async_stream_generate(payload)
496
+ else:
497
+ response = await self._async_single_generate(payload)
498
+
499
+ # Execute tools if enabled
500
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and response.content:
501
+ response = self._handle_prompted_tool_execution(response, tools, execute_tools)
502
+
503
+ return response
504
+
505
+ async def _async_single_generate(self, payload: Dict[str, Any]) -> GenerateResponse:
506
+ """Native async single response generation."""
507
+ try:
508
+ # Track generation time
509
+ start_time = time.time()
510
+ response = await self.async_client.post(
511
+ f"{self.base_url}/chat/completions",
512
+ json=payload,
513
+ headers={"Content-Type": "application/json"}
514
+ )
515
+ response.raise_for_status()
516
+ gen_time = round((time.time() - start_time) * 1000, 1)
517
+
518
+ result = response.json()
519
+
520
+ # Extract response from OpenAI format
521
+ if "choices" in result and len(result["choices"]) > 0:
522
+ choice = result["choices"][0]
523
+ content = choice.get("message", {}).get("content", "")
524
+ finish_reason = choice.get("finish_reason", "stop")
525
+ else:
526
+ content = "No response generated"
527
+ finish_reason = "error"
528
+
529
+ # Extract usage info
530
+ usage = result.get("usage", {})
531
+
532
+ return GenerateResponse(
533
+ content=content,
534
+ model=self.model,
535
+ finish_reason=finish_reason,
536
+ raw_response=result,
537
+ usage={
538
+ "input_tokens": usage.get("prompt_tokens", 0),
539
+ "output_tokens": usage.get("completion_tokens", 0),
540
+ "total_tokens": usage.get("total_tokens", 0),
541
+ "prompt_tokens": usage.get("prompt_tokens", 0),
542
+ "completion_tokens": usage.get("completion_tokens", 0)
543
+ },
544
+ gen_time=gen_time
545
+ )
546
+
547
+ except Exception as e:
548
+ error_str = str(e).lower()
549
+ if ('404' in error_str or 'not found' in error_str or 'model' in error_str) and ('not found' in error_str):
550
+ try:
551
+ available_models = self.list_available_models(base_url=self.base_url)
552
+ error_message = format_model_error("LMStudio", self.model, available_models)
553
+ raise ModelNotFoundError(error_message)
554
+ except Exception:
555
+ raise ModelNotFoundError(f"Model '{self.model}' not found in LMStudio")
556
+ else:
557
+ raise ProviderAPIError(f"LMStudio API error: {str(e)}")
558
+
559
+ async def _async_stream_generate(self, payload: Dict[str, Any]) -> AsyncIterator[GenerateResponse]:
560
+ """Native async streaming response generation."""
561
+ try:
562
+ async with self.async_client.stream(
563
+ "POST",
564
+ f"{self.base_url}/chat/completions",
565
+ json=payload,
566
+ headers={"Content-Type": "application/json"}
567
+ ) as response:
568
+ response.raise_for_status()
569
+
570
+ async for line in response.aiter_lines():
571
+ if line:
572
+ line = line.strip()
573
+
574
+ if line.startswith("data: "):
575
+ data = line[6:] # Remove "data: " prefix
576
+
577
+ if data == "[DONE]":
578
+ break
579
+
580
+ try:
581
+ chunk = json.loads(data)
582
+
583
+ if "choices" in chunk and len(chunk["choices"]) > 0:
584
+ choice = chunk["choices"][0]
585
+ delta = choice.get("delta", {})
586
+ content = delta.get("content", "")
587
+ finish_reason = choice.get("finish_reason")
588
+
589
+ yield GenerateResponse(
590
+ content=content,
591
+ model=self.model,
592
+ finish_reason=finish_reason,
593
+ raw_response=chunk
594
+ )
595
+
596
+ except json.JSONDecodeError:
597
+ continue
598
+
599
+ except Exception as e:
600
+ yield GenerateResponse(
601
+ content=f"Error: {str(e)}",
602
+ model=self.model,
603
+ finish_reason="error"
604
+ )
605
+
362
606
  def get_capabilities(self) -> List[str]:
363
607
  """Get LM Studio capabilities"""
364
608
  return ["streaming", "chat", "tools"]
@@ -5,7 +5,7 @@ Ollama provider implementation.
5
5
  import json
6
6
  import httpx
7
7
  import time
8
- from typing import List, Dict, Any, Optional, Union, Iterator, Type
8
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
9
9
 
10
10
  try:
11
11
  from pydantic import BaseModel
@@ -15,7 +15,7 @@ except ImportError:
15
15
  BaseModel = None
16
16
  from .base import BaseProvider
17
17
  from ..core.types import GenerateResponse
18
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
18
+ from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
19
19
  from ..tools import UniversalToolHandler, ToolDefinition, execute_tools
20
20
  from ..events import EventType
21
21
 
@@ -29,10 +29,21 @@ class OllamaProvider(BaseProvider):
29
29
 
30
30
  self.base_url = base_url.rstrip('/')
31
31
  self.client = httpx.Client(timeout=self._timeout)
32
+ self._async_client = None # Lazy-loaded async client
32
33
 
33
34
  # Initialize tool handler
34
35
  self.tool_handler = UniversalToolHandler(model)
35
36
 
37
+ @property
38
+ def async_client(self):
39
+ """Lazy-load async HTTP client for native async operations."""
40
+ if self._async_client is None:
41
+ self._async_client = httpx.AsyncClient(
42
+ base_url=self.base_url,
43
+ timeout=self._timeout
44
+ )
45
+ return self._async_client
46
+
36
47
  def unload(self) -> None:
37
48
  """
38
49
  Unload the model from Ollama server memory.
@@ -59,6 +70,17 @@ class OllamaProvider(BaseProvider):
59
70
  if hasattr(self, 'client') and self.client is not None:
60
71
  self.client.close()
61
72
 
73
+ # Close async client if it was created
74
+ if self._async_client is not None:
75
+ import asyncio
76
+ try:
77
+ loop = asyncio.get_running_loop()
78
+ loop.create_task(self._async_client.aclose())
79
+ except RuntimeError:
80
+ # No running loop, close synchronously
81
+ import asyncio
82
+ asyncio.run(self._async_client.aclose())
83
+
62
84
  except Exception as e:
63
85
  # Log but don't raise - unload should be best-effort
64
86
  if hasattr(self, 'logger'):
@@ -377,6 +399,226 @@ class OllamaProvider(BaseProvider):
377
399
  finish_reason="error"
378
400
  )
379
401
 
402
+ async def _agenerate_internal(self,
403
+ prompt: str,
404
+ messages: Optional[List[Dict]],
405
+ system_prompt: Optional[str],
406
+ tools: Optional[List],
407
+ media: Optional[List],
408
+ stream: bool,
409
+ **kwargs):
410
+ """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
411
+ # Handle tools for prompted models
412
+ effective_system_prompt = system_prompt
413
+ if tools and self.tool_handler.supports_prompted:
414
+ tool_prompt = self.tool_handler.format_tools_prompt(tools)
415
+ if effective_system_prompt:
416
+ effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
417
+ else:
418
+ effective_system_prompt = tool_prompt
419
+
420
+ # Build request payload (same logic as sync)
421
+ generation_kwargs = self._prepare_generation_kwargs(**kwargs)
422
+ max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
423
+ response_model = kwargs.get('response_model')
424
+
425
+ payload = {
426
+ "model": self.model,
427
+ "stream": stream,
428
+ "options": {
429
+ "temperature": kwargs.get("temperature", self.temperature),
430
+ "num_predict": max_output_tokens,
431
+ }
432
+ }
433
+
434
+ seed_value = kwargs.get("seed", self.seed)
435
+ if seed_value is not None:
436
+ payload["options"]["seed"] = seed_value
437
+
438
+ # Add structured output support
439
+ if response_model and PYDANTIC_AVAILABLE:
440
+ json_schema = response_model.model_json_schema()
441
+ payload["format"] = json_schema
442
+
443
+ # Use chat format
444
+ use_chat_format = tools is not None or messages is not None or True
445
+
446
+ if use_chat_format:
447
+ payload["messages"] = []
448
+
449
+ if effective_system_prompt:
450
+ payload["messages"].append({
451
+ "role": "system",
452
+ "content": effective_system_prompt
453
+ })
454
+
455
+ if messages:
456
+ converted_messages = self._convert_messages_for_ollama(messages)
457
+ payload["messages"].extend(converted_messages)
458
+
459
+ if media:
460
+ user_message_text = prompt.strip() if prompt else ""
461
+ try:
462
+ from ..media.handlers import LocalMediaHandler
463
+ media_handler = LocalMediaHandler("ollama", self.model_capabilities, model_name=self.model)
464
+ multimodal_message = media_handler.create_multimodal_message(user_message_text, media)
465
+
466
+ if isinstance(multimodal_message, str):
467
+ payload["messages"].append({"role": "user", "content": multimodal_message})
468
+ else:
469
+ payload["messages"].append(multimodal_message)
470
+ except Exception as e:
471
+ if hasattr(self, 'logger'):
472
+ self.logger.warning(f"Failed to process media: {e}")
473
+ if user_message_text:
474
+ payload["messages"].append({"role": "user", "content": user_message_text})
475
+
476
+ elif prompt and prompt.strip():
477
+ payload["messages"].append({"role": "user", "content": prompt})
478
+
479
+ endpoint = "/api/chat"
480
+ else:
481
+ full_prompt = prompt
482
+ if effective_system_prompt:
483
+ full_prompt = f"{effective_system_prompt}\n\n{prompt}"
484
+ payload["prompt"] = full_prompt
485
+ endpoint = "/api/generate"
486
+
487
+ if stream:
488
+ return self._async_stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
489
+ else:
490
+ return await self._async_single_generate(endpoint, payload, tools, kwargs.get('media_metadata'))
491
+
492
+ async def _async_single_generate(self, endpoint: str, payload: Dict[str, Any],
493
+ tools: Optional[List[Dict[str, Any]]] = None,
494
+ media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
495
+ """Native async single response generation."""
496
+ try:
497
+ start_time = time.time()
498
+ response = await self.async_client.post(endpoint, json=payload)
499
+ response.raise_for_status()
500
+ gen_time = round((time.time() - start_time) * 1000, 1)
501
+
502
+ result = response.json()
503
+
504
+ if endpoint == "/api/chat":
505
+ content = result.get("message", {}).get("content", "")
506
+ else:
507
+ content = result.get("response", "")
508
+
509
+ generate_response = GenerateResponse(
510
+ content=content,
511
+ model=self.model,
512
+ finish_reason="stop",
513
+ raw_response=result,
514
+ usage={
515
+ "input_tokens": result.get("prompt_eval_count", 0),
516
+ "output_tokens": result.get("eval_count", 0),
517
+ "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0),
518
+ "prompt_tokens": result.get("prompt_eval_count", 0),
519
+ "completion_tokens": result.get("eval_count", 0)
520
+ },
521
+ gen_time=gen_time
522
+ )
523
+
524
+ if media_metadata:
525
+ if not generate_response.metadata:
526
+ generate_response.metadata = {}
527
+ generate_response.metadata['media_metadata'] = media_metadata
528
+
529
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
530
+ return self._handle_tool_execution(generate_response, tools)
531
+
532
+ return generate_response
533
+
534
+ except Exception as e:
535
+ error_str = str(e).lower()
536
+ if ('404' in error_str or 'not found' in error_str):
537
+ available_models = self.list_available_models(base_url=self.base_url)
538
+ error_message = format_model_error("Ollama", self.model, available_models)
539
+ raise ModelNotFoundError(error_message)
540
+ else:
541
+ return GenerateResponse(
542
+ content=f"Error: {str(e)}",
543
+ model=self.model,
544
+ finish_reason="error"
545
+ )
546
+
547
+ async def _async_stream_generate(self, endpoint: str, payload: Dict[str, Any],
548
+ tools: Optional[List[Dict[str, Any]]] = None,
549
+ tool_call_tags: Optional[str] = None):
550
+ """Native async streaming response generation."""
551
+ try:
552
+ async with self.async_client.stream("POST", endpoint, json=payload) as response:
553
+ response.raise_for_status()
554
+
555
+ full_content = ""
556
+ rewriter = None
557
+ buffer = ""
558
+ if tool_call_tags:
559
+ try:
560
+ from ..tools.tag_rewriter import create_tag_rewriter
561
+ rewriter = create_tag_rewriter(tool_call_tags)
562
+ except ImportError:
563
+ pass
564
+
565
+ async for line in response.aiter_lines():
566
+ if line:
567
+ try:
568
+ chunk = json.loads(line)
569
+
570
+ if endpoint == "/api/chat":
571
+ content = chunk.get("message", {}).get("content", "")
572
+ else:
573
+ content = chunk.get("response", "")
574
+
575
+ done = chunk.get("done", False)
576
+ full_content += content
577
+
578
+ if rewriter and content:
579
+ rewritten_content, buffer = rewriter.rewrite_streaming_chunk(content, buffer)
580
+ content = rewritten_content
581
+
582
+ chunk_response = GenerateResponse(
583
+ content=content,
584
+ model=self.model,
585
+ finish_reason="stop" if done else None,
586
+ raw_response=chunk
587
+ )
588
+
589
+ yield chunk_response
590
+
591
+ if done:
592
+ break
593
+
594
+ except json.JSONDecodeError:
595
+ continue
596
+
597
+ # Execute tools if enabled
598
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and full_content:
599
+ complete_response = GenerateResponse(
600
+ content=full_content,
601
+ model=self.model,
602
+ finish_reason="stop"
603
+ )
604
+
605
+ final_response = self._handle_tool_execution(complete_response, tools)
606
+
607
+ if final_response.content != full_content:
608
+ tool_results_content = final_response.content[len(full_content):]
609
+ yield GenerateResponse(
610
+ content=tool_results_content,
611
+ model=self.model,
612
+ finish_reason="stop"
613
+ )
614
+
615
+ except Exception as e:
616
+ yield GenerateResponse(
617
+ content=f"Error: {str(e)}",
618
+ model=self.model,
619
+ finish_reason="error"
620
+ )
621
+
380
622
  def _handle_tool_execution(self, response: GenerateResponse, tools: List[Dict[str, Any]]) -> GenerateResponse:
381
623
  """Handle tool execution for prompted models"""
382
624
  # Parse tool calls from response