abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. abstractcore/__init__.py +19 -1
  2. abstractcore/architectures/detection.py +252 -6
  3. abstractcore/assets/architecture_formats.json +14 -1
  4. abstractcore/assets/model_capabilities.json +533 -10
  5. abstractcore/compression/__init__.py +29 -0
  6. abstractcore/compression/analytics.py +420 -0
  7. abstractcore/compression/cache.py +250 -0
  8. abstractcore/compression/config.py +279 -0
  9. abstractcore/compression/exceptions.py +30 -0
  10. abstractcore/compression/glyph_processor.py +381 -0
  11. abstractcore/compression/optimizer.py +388 -0
  12. abstractcore/compression/orchestrator.py +380 -0
  13. abstractcore/compression/pil_text_renderer.py +818 -0
  14. abstractcore/compression/quality.py +226 -0
  15. abstractcore/compression/text_formatter.py +666 -0
  16. abstractcore/compression/vision_compressor.py +371 -0
  17. abstractcore/config/main.py +64 -0
  18. abstractcore/config/manager.py +100 -5
  19. abstractcore/core/retry.py +2 -2
  20. abstractcore/core/session.py +193 -7
  21. abstractcore/download.py +253 -0
  22. abstractcore/embeddings/manager.py +2 -2
  23. abstractcore/events/__init__.py +113 -2
  24. abstractcore/exceptions/__init__.py +49 -2
  25. abstractcore/media/auto_handler.py +312 -18
  26. abstractcore/media/handlers/local_handler.py +14 -2
  27. abstractcore/media/handlers/openai_handler.py +62 -3
  28. abstractcore/media/processors/__init__.py +11 -1
  29. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  30. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  31. abstractcore/media/processors/image_processor.py +7 -1
  32. abstractcore/media/processors/office_processor.py +2 -2
  33. abstractcore/media/processors/text_processor.py +18 -3
  34. abstractcore/media/types.py +164 -7
  35. abstractcore/media/utils/image_scaler.py +2 -2
  36. abstractcore/media/vision_fallback.py +2 -2
  37. abstractcore/providers/__init__.py +18 -0
  38. abstractcore/providers/anthropic_provider.py +228 -8
  39. abstractcore/providers/base.py +378 -11
  40. abstractcore/providers/huggingface_provider.py +563 -23
  41. abstractcore/providers/lmstudio_provider.py +284 -4
  42. abstractcore/providers/mlx_provider.py +27 -2
  43. abstractcore/providers/model_capabilities.py +352 -0
  44. abstractcore/providers/ollama_provider.py +282 -6
  45. abstractcore/providers/openai_provider.py +286 -8
  46. abstractcore/providers/registry.py +85 -13
  47. abstractcore/providers/streaming.py +2 -2
  48. abstractcore/server/app.py +91 -81
  49. abstractcore/tools/common_tools.py +2 -2
  50. abstractcore/tools/handler.py +2 -2
  51. abstractcore/tools/parser.py +2 -2
  52. abstractcore/tools/registry.py +2 -2
  53. abstractcore/tools/syntax_rewriter.py +2 -2
  54. abstractcore/tools/tag_rewriter.py +3 -3
  55. abstractcore/utils/__init__.py +4 -1
  56. abstractcore/utils/self_fixes.py +2 -2
  57. abstractcore/utils/trace_export.py +287 -0
  58. abstractcore/utils/version.py +1 -1
  59. abstractcore/utils/vlm_token_calculator.py +655 -0
  60. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
  61. abstractcore-2.6.0.dist-info/RECORD +108 -0
  62. abstractcore-2.5.2.dist-info/RECORD +0 -90
  63. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
  64. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
  65. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
  66. {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ Ollama provider implementation.
5
5
  import json
6
6
  import httpx
7
7
  import time
8
- from typing import List, Dict, Any, Optional, Union, Iterator, Type
8
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
9
9
 
10
10
  try:
11
11
  from pydantic import BaseModel
@@ -15,7 +15,7 @@ except ImportError:
15
15
  BaseModel = None
16
16
  from .base import BaseProvider
17
17
  from ..core.types import GenerateResponse
18
- from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
18
+ from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
19
19
  from ..tools import UniversalToolHandler, ToolDefinition, execute_tools
20
20
  from ..events import EventType
21
21
 
@@ -29,10 +29,21 @@ class OllamaProvider(BaseProvider):
29
29
 
30
30
  self.base_url = base_url.rstrip('/')
31
31
  self.client = httpx.Client(timeout=self._timeout)
32
+ self._async_client = None # Lazy-loaded async client
32
33
 
33
34
  # Initialize tool handler
34
35
  self.tool_handler = UniversalToolHandler(model)
35
36
 
37
+ @property
38
+ def async_client(self):
39
+ """Lazy-load async HTTP client for native async operations."""
40
+ if self._async_client is None:
41
+ self._async_client = httpx.AsyncClient(
42
+ base_url=self.base_url,
43
+ timeout=self._timeout
44
+ )
45
+ return self._async_client
46
+
36
47
  def unload(self) -> None:
37
48
  """
38
49
  Unload the model from Ollama server memory.
@@ -59,6 +70,17 @@ class OllamaProvider(BaseProvider):
59
70
  if hasattr(self, 'client') and self.client is not None:
60
71
  self.client.close()
61
72
 
73
+ # Close async client if it was created
74
+ if self._async_client is not None:
75
+ import asyncio
76
+ try:
77
+ loop = asyncio.get_running_loop()
78
+ loop.create_task(self._async_client.aclose())
79
+ except RuntimeError:
80
+ # No running loop, close synchronously
81
+ import asyncio
82
+ asyncio.run(self._async_client.aclose())
83
+
62
84
  except Exception as e:
63
85
  # Log but don't raise - unload should be best-effort
64
86
  if hasattr(self, 'logger'):
@@ -114,6 +136,7 @@ class OllamaProvider(BaseProvider):
114
136
  media: Optional[List['MediaContent']] = None,
115
137
  stream: bool = False,
116
138
  response_model: Optional[Type[BaseModel]] = None,
139
+ media_metadata: Optional[List[Dict[str, Any]]] = None,
117
140
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
118
141
  """Internal generation with Ollama"""
119
142
 
@@ -224,9 +247,9 @@ class OllamaProvider(BaseProvider):
224
247
  if stream:
225
248
  return self._stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
226
249
  else:
227
- return self._single_generate(endpoint, payload, tools)
250
+ return self._single_generate(endpoint, payload, tools, media_metadata)
228
251
 
229
- def _single_generate(self, endpoint: str, payload: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
252
+ def _single_generate(self, endpoint: str, payload: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None, media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
230
253
  """Generate single response"""
231
254
  try:
232
255
  # Track generation time
@@ -262,6 +285,12 @@ class OllamaProvider(BaseProvider):
262
285
  },
263
286
  gen_time=gen_time
264
287
  )
288
+
289
+ # Attach media metadata if available
290
+ if media_metadata:
291
+ if not generate_response.metadata:
292
+ generate_response.metadata = {}
293
+ generate_response.metadata['media_metadata'] = media_metadata
265
294
 
266
295
  # Execute tools if enabled and tools are present
267
296
  if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
@@ -370,6 +399,226 @@ class OllamaProvider(BaseProvider):
370
399
  finish_reason="error"
371
400
  )
372
401
 
402
+ async def _agenerate_internal(self,
403
+ prompt: str,
404
+ messages: Optional[List[Dict]],
405
+ system_prompt: Optional[str],
406
+ tools: Optional[List],
407
+ media: Optional[List],
408
+ stream: bool,
409
+ **kwargs):
410
+ """Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
411
+ # Handle tools for prompted models
412
+ effective_system_prompt = system_prompt
413
+ if tools and self.tool_handler.supports_prompted:
414
+ tool_prompt = self.tool_handler.format_tools_prompt(tools)
415
+ if effective_system_prompt:
416
+ effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
417
+ else:
418
+ effective_system_prompt = tool_prompt
419
+
420
+ # Build request payload (same logic as sync)
421
+ generation_kwargs = self._prepare_generation_kwargs(**kwargs)
422
+ max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
423
+ response_model = kwargs.get('response_model')
424
+
425
+ payload = {
426
+ "model": self.model,
427
+ "stream": stream,
428
+ "options": {
429
+ "temperature": kwargs.get("temperature", self.temperature),
430
+ "num_predict": max_output_tokens,
431
+ }
432
+ }
433
+
434
+ seed_value = kwargs.get("seed", self.seed)
435
+ if seed_value is not None:
436
+ payload["options"]["seed"] = seed_value
437
+
438
+ # Add structured output support
439
+ if response_model and PYDANTIC_AVAILABLE:
440
+ json_schema = response_model.model_json_schema()
441
+ payload["format"] = json_schema
442
+
443
+ # Use chat format
444
+ use_chat_format = tools is not None or messages is not None or True
445
+
446
+ if use_chat_format:
447
+ payload["messages"] = []
448
+
449
+ if effective_system_prompt:
450
+ payload["messages"].append({
451
+ "role": "system",
452
+ "content": effective_system_prompt
453
+ })
454
+
455
+ if messages:
456
+ converted_messages = self._convert_messages_for_ollama(messages)
457
+ payload["messages"].extend(converted_messages)
458
+
459
+ if media:
460
+ user_message_text = prompt.strip() if prompt else ""
461
+ try:
462
+ from ..media.handlers import LocalMediaHandler
463
+ media_handler = LocalMediaHandler("ollama", self.model_capabilities, model_name=self.model)
464
+ multimodal_message = media_handler.create_multimodal_message(user_message_text, media)
465
+
466
+ if isinstance(multimodal_message, str):
467
+ payload["messages"].append({"role": "user", "content": multimodal_message})
468
+ else:
469
+ payload["messages"].append(multimodal_message)
470
+ except Exception as e:
471
+ if hasattr(self, 'logger'):
472
+ self.logger.warning(f"Failed to process media: {e}")
473
+ if user_message_text:
474
+ payload["messages"].append({"role": "user", "content": user_message_text})
475
+
476
+ elif prompt and prompt.strip():
477
+ payload["messages"].append({"role": "user", "content": prompt})
478
+
479
+ endpoint = "/api/chat"
480
+ else:
481
+ full_prompt = prompt
482
+ if effective_system_prompt:
483
+ full_prompt = f"{effective_system_prompt}\n\n{prompt}"
484
+ payload["prompt"] = full_prompt
485
+ endpoint = "/api/generate"
486
+
487
+ if stream:
488
+ return self._async_stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
489
+ else:
490
+ return await self._async_single_generate(endpoint, payload, tools, kwargs.get('media_metadata'))
491
+
492
+ async def _async_single_generate(self, endpoint: str, payload: Dict[str, Any],
493
+ tools: Optional[List[Dict[str, Any]]] = None,
494
+ media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
495
+ """Native async single response generation."""
496
+ try:
497
+ start_time = time.time()
498
+ response = await self.async_client.post(endpoint, json=payload)
499
+ response.raise_for_status()
500
+ gen_time = round((time.time() - start_time) * 1000, 1)
501
+
502
+ result = response.json()
503
+
504
+ if endpoint == "/api/chat":
505
+ content = result.get("message", {}).get("content", "")
506
+ else:
507
+ content = result.get("response", "")
508
+
509
+ generate_response = GenerateResponse(
510
+ content=content,
511
+ model=self.model,
512
+ finish_reason="stop",
513
+ raw_response=result,
514
+ usage={
515
+ "input_tokens": result.get("prompt_eval_count", 0),
516
+ "output_tokens": result.get("eval_count", 0),
517
+ "total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0),
518
+ "prompt_tokens": result.get("prompt_eval_count", 0),
519
+ "completion_tokens": result.get("eval_count", 0)
520
+ },
521
+ gen_time=gen_time
522
+ )
523
+
524
+ if media_metadata:
525
+ if not generate_response.metadata:
526
+ generate_response.metadata = {}
527
+ generate_response.metadata['media_metadata'] = media_metadata
528
+
529
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
530
+ return self._handle_tool_execution(generate_response, tools)
531
+
532
+ return generate_response
533
+
534
+ except Exception as e:
535
+ error_str = str(e).lower()
536
+ if ('404' in error_str or 'not found' in error_str):
537
+ available_models = self.list_available_models(base_url=self.base_url)
538
+ error_message = format_model_error("Ollama", self.model, available_models)
539
+ raise ModelNotFoundError(error_message)
540
+ else:
541
+ return GenerateResponse(
542
+ content=f"Error: {str(e)}",
543
+ model=self.model,
544
+ finish_reason="error"
545
+ )
546
+
547
+ async def _async_stream_generate(self, endpoint: str, payload: Dict[str, Any],
548
+ tools: Optional[List[Dict[str, Any]]] = None,
549
+ tool_call_tags: Optional[str] = None):
550
+ """Native async streaming response generation."""
551
+ try:
552
+ async with self.async_client.stream("POST", endpoint, json=payload) as response:
553
+ response.raise_for_status()
554
+
555
+ full_content = ""
556
+ rewriter = None
557
+ buffer = ""
558
+ if tool_call_tags:
559
+ try:
560
+ from ..tools.tag_rewriter import create_tag_rewriter
561
+ rewriter = create_tag_rewriter(tool_call_tags)
562
+ except ImportError:
563
+ pass
564
+
565
+ async for line in response.aiter_lines():
566
+ if line:
567
+ try:
568
+ chunk = json.loads(line)
569
+
570
+ if endpoint == "/api/chat":
571
+ content = chunk.get("message", {}).get("content", "")
572
+ else:
573
+ content = chunk.get("response", "")
574
+
575
+ done = chunk.get("done", False)
576
+ full_content += content
577
+
578
+ if rewriter and content:
579
+ rewritten_content, buffer = rewriter.rewrite_streaming_chunk(content, buffer)
580
+ content = rewritten_content
581
+
582
+ chunk_response = GenerateResponse(
583
+ content=content,
584
+ model=self.model,
585
+ finish_reason="stop" if done else None,
586
+ raw_response=chunk
587
+ )
588
+
589
+ yield chunk_response
590
+
591
+ if done:
592
+ break
593
+
594
+ except json.JSONDecodeError:
595
+ continue
596
+
597
+ # Execute tools if enabled
598
+ if self.execute_tools and tools and self.tool_handler.supports_prompted and full_content:
599
+ complete_response = GenerateResponse(
600
+ content=full_content,
601
+ model=self.model,
602
+ finish_reason="stop"
603
+ )
604
+
605
+ final_response = self._handle_tool_execution(complete_response, tools)
606
+
607
+ if final_response.content != full_content:
608
+ tool_results_content = final_response.content[len(full_content):]
609
+ yield GenerateResponse(
610
+ content=tool_results_content,
611
+ model=self.model,
612
+ finish_reason="stop"
613
+ )
614
+
615
+ except Exception as e:
616
+ yield GenerateResponse(
617
+ content=f"Error: {str(e)}",
618
+ model=self.model,
619
+ finish_reason="error"
620
+ )
621
+
373
622
  def _handle_tool_execution(self, response: GenerateResponse, tools: List[Dict[str, Any]]) -> GenerateResponse:
374
623
  """Handle tool execution for prompted models"""
375
624
  # Parse tool calls from response
@@ -446,8 +695,21 @@ class OllamaProvider(BaseProvider):
446
695
  self.client = httpx.Client(timeout=self._timeout)
447
696
 
448
697
  def list_available_models(self, **kwargs) -> List[str]:
449
- """List available models from Ollama server."""
698
+ """
699
+ List available models from Ollama server.
700
+
701
+ Args:
702
+ **kwargs: Optional parameters including:
703
+ - base_url: Ollama server URL
704
+ - input_capabilities: List of ModelInputCapability enums to filter by input capability
705
+ - output_capabilities: List of ModelOutputCapability enums to filter by output capability
706
+
707
+ Returns:
708
+ List of model names, optionally filtered by capabilities
709
+ """
450
710
  try:
711
+ from .model_capabilities import filter_models_by_capabilities
712
+
451
713
  # Use provided base_url or fall back to instance base_url
452
714
  base_url = kwargs.get('base_url', self.base_url)
453
715
 
@@ -455,7 +717,21 @@ class OllamaProvider(BaseProvider):
455
717
  if response.status_code == 200:
456
718
  data = response.json()
457
719
  models = [model["name"] for model in data.get("models", [])]
458
- return sorted(models)
720
+ models = sorted(models)
721
+
722
+ # Apply new capability filtering if provided
723
+ input_capabilities = kwargs.get('input_capabilities')
724
+ output_capabilities = kwargs.get('output_capabilities')
725
+
726
+ if input_capabilities or output_capabilities:
727
+ models = filter_models_by_capabilities(
728
+ models,
729
+ input_capabilities=input_capabilities,
730
+ output_capabilities=output_capabilities
731
+ )
732
+
733
+
734
+ return models
459
735
  else:
460
736
  self.logger.warning(f"Ollama API returned status {response.status_code}")
461
737
  return []