abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +19 -1
- abstractcore/architectures/detection.py +252 -6
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +533 -10
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +64 -0
- abstractcore/config/manager.py +100 -5
- abstractcore/core/retry.py +2 -2
- abstractcore/core/session.py +193 -7
- abstractcore/download.py +253 -0
- abstractcore/embeddings/manager.py +2 -2
- abstractcore/events/__init__.py +113 -2
- abstractcore/exceptions/__init__.py +49 -2
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/office_processor.py +2 -2
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/media/utils/image_scaler.py +2 -2
- abstractcore/media/vision_fallback.py +2 -2
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +228 -8
- abstractcore/providers/base.py +378 -11
- abstractcore/providers/huggingface_provider.py +563 -23
- abstractcore/providers/lmstudio_provider.py +284 -4
- abstractcore/providers/mlx_provider.py +27 -2
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +282 -6
- abstractcore/providers/openai_provider.py +286 -8
- abstractcore/providers/registry.py +85 -13
- abstractcore/providers/streaming.py +2 -2
- abstractcore/server/app.py +91 -81
- abstractcore/tools/common_tools.py +2 -2
- abstractcore/tools/handler.py +2 -2
- abstractcore/tools/parser.py +2 -2
- abstractcore/tools/registry.py +2 -2
- abstractcore/tools/syntax_rewriter.py +2 -2
- abstractcore/tools/tag_rewriter.py +3 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/self_fixes.py +2 -2
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
- abstractcore-2.6.0.dist-info/RECORD +108 -0
- abstractcore-2.5.2.dist-info/RECORD +0 -90
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ Ollama provider implementation.
|
|
|
5
5
|
import json
|
|
6
6
|
import httpx
|
|
7
7
|
import time
|
|
8
|
-
from typing import List, Dict, Any, Optional, Union, Iterator, Type
|
|
8
|
+
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
11
|
from pydantic import BaseModel
|
|
@@ -15,7 +15,7 @@ except ImportError:
|
|
|
15
15
|
BaseModel = None
|
|
16
16
|
from .base import BaseProvider
|
|
17
17
|
from ..core.types import GenerateResponse
|
|
18
|
-
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
|
|
18
|
+
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error, format_provider_error
|
|
19
19
|
from ..tools import UniversalToolHandler, ToolDefinition, execute_tools
|
|
20
20
|
from ..events import EventType
|
|
21
21
|
|
|
@@ -29,10 +29,21 @@ class OllamaProvider(BaseProvider):
|
|
|
29
29
|
|
|
30
30
|
self.base_url = base_url.rstrip('/')
|
|
31
31
|
self.client = httpx.Client(timeout=self._timeout)
|
|
32
|
+
self._async_client = None # Lazy-loaded async client
|
|
32
33
|
|
|
33
34
|
# Initialize tool handler
|
|
34
35
|
self.tool_handler = UniversalToolHandler(model)
|
|
35
36
|
|
|
37
|
+
@property
|
|
38
|
+
def async_client(self):
|
|
39
|
+
"""Lazy-load async HTTP client for native async operations."""
|
|
40
|
+
if self._async_client is None:
|
|
41
|
+
self._async_client = httpx.AsyncClient(
|
|
42
|
+
base_url=self.base_url,
|
|
43
|
+
timeout=self._timeout
|
|
44
|
+
)
|
|
45
|
+
return self._async_client
|
|
46
|
+
|
|
36
47
|
def unload(self) -> None:
|
|
37
48
|
"""
|
|
38
49
|
Unload the model from Ollama server memory.
|
|
@@ -59,6 +70,17 @@ class OllamaProvider(BaseProvider):
|
|
|
59
70
|
if hasattr(self, 'client') and self.client is not None:
|
|
60
71
|
self.client.close()
|
|
61
72
|
|
|
73
|
+
# Close async client if it was created
|
|
74
|
+
if self._async_client is not None:
|
|
75
|
+
import asyncio
|
|
76
|
+
try:
|
|
77
|
+
loop = asyncio.get_running_loop()
|
|
78
|
+
loop.create_task(self._async_client.aclose())
|
|
79
|
+
except RuntimeError:
|
|
80
|
+
# No running loop, close synchronously
|
|
81
|
+
import asyncio
|
|
82
|
+
asyncio.run(self._async_client.aclose())
|
|
83
|
+
|
|
62
84
|
except Exception as e:
|
|
63
85
|
# Log but don't raise - unload should be best-effort
|
|
64
86
|
if hasattr(self, 'logger'):
|
|
@@ -114,6 +136,7 @@ class OllamaProvider(BaseProvider):
|
|
|
114
136
|
media: Optional[List['MediaContent']] = None,
|
|
115
137
|
stream: bool = False,
|
|
116
138
|
response_model: Optional[Type[BaseModel]] = None,
|
|
139
|
+
media_metadata: Optional[List[Dict[str, Any]]] = None,
|
|
117
140
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
118
141
|
"""Internal generation with Ollama"""
|
|
119
142
|
|
|
@@ -224,9 +247,9 @@ class OllamaProvider(BaseProvider):
|
|
|
224
247
|
if stream:
|
|
225
248
|
return self._stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
|
|
226
249
|
else:
|
|
227
|
-
return self._single_generate(endpoint, payload, tools)
|
|
250
|
+
return self._single_generate(endpoint, payload, tools, media_metadata)
|
|
228
251
|
|
|
229
|
-
def _single_generate(self, endpoint: str, payload: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
|
|
252
|
+
def _single_generate(self, endpoint: str, payload: Dict[str, Any], tools: Optional[List[Dict[str, Any]]] = None, media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
|
|
230
253
|
"""Generate single response"""
|
|
231
254
|
try:
|
|
232
255
|
# Track generation time
|
|
@@ -262,6 +285,12 @@ class OllamaProvider(BaseProvider):
|
|
|
262
285
|
},
|
|
263
286
|
gen_time=gen_time
|
|
264
287
|
)
|
|
288
|
+
|
|
289
|
+
# Attach media metadata if available
|
|
290
|
+
if media_metadata:
|
|
291
|
+
if not generate_response.metadata:
|
|
292
|
+
generate_response.metadata = {}
|
|
293
|
+
generate_response.metadata['media_metadata'] = media_metadata
|
|
265
294
|
|
|
266
295
|
# Execute tools if enabled and tools are present
|
|
267
296
|
if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
|
|
@@ -370,6 +399,226 @@ class OllamaProvider(BaseProvider):
|
|
|
370
399
|
finish_reason="error"
|
|
371
400
|
)
|
|
372
401
|
|
|
402
|
+
async def _agenerate_internal(self,
|
|
403
|
+
prompt: str,
|
|
404
|
+
messages: Optional[List[Dict]],
|
|
405
|
+
system_prompt: Optional[str],
|
|
406
|
+
tools: Optional[List],
|
|
407
|
+
media: Optional[List],
|
|
408
|
+
stream: bool,
|
|
409
|
+
**kwargs):
|
|
410
|
+
"""Native async implementation using httpx.AsyncClient - 3-10x faster for batch operations."""
|
|
411
|
+
# Handle tools for prompted models
|
|
412
|
+
effective_system_prompt = system_prompt
|
|
413
|
+
if tools and self.tool_handler.supports_prompted:
|
|
414
|
+
tool_prompt = self.tool_handler.format_tools_prompt(tools)
|
|
415
|
+
if effective_system_prompt:
|
|
416
|
+
effective_system_prompt = f"{effective_system_prompt}\n\n{tool_prompt}"
|
|
417
|
+
else:
|
|
418
|
+
effective_system_prompt = tool_prompt
|
|
419
|
+
|
|
420
|
+
# Build request payload (same logic as sync)
|
|
421
|
+
generation_kwargs = self._prepare_generation_kwargs(**kwargs)
|
|
422
|
+
max_output_tokens = self._get_provider_max_tokens_param(generation_kwargs)
|
|
423
|
+
response_model = kwargs.get('response_model')
|
|
424
|
+
|
|
425
|
+
payload = {
|
|
426
|
+
"model": self.model,
|
|
427
|
+
"stream": stream,
|
|
428
|
+
"options": {
|
|
429
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
430
|
+
"num_predict": max_output_tokens,
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
seed_value = kwargs.get("seed", self.seed)
|
|
435
|
+
if seed_value is not None:
|
|
436
|
+
payload["options"]["seed"] = seed_value
|
|
437
|
+
|
|
438
|
+
# Add structured output support
|
|
439
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
440
|
+
json_schema = response_model.model_json_schema()
|
|
441
|
+
payload["format"] = json_schema
|
|
442
|
+
|
|
443
|
+
# Use chat format
|
|
444
|
+
use_chat_format = tools is not None or messages is not None or True
|
|
445
|
+
|
|
446
|
+
if use_chat_format:
|
|
447
|
+
payload["messages"] = []
|
|
448
|
+
|
|
449
|
+
if effective_system_prompt:
|
|
450
|
+
payload["messages"].append({
|
|
451
|
+
"role": "system",
|
|
452
|
+
"content": effective_system_prompt
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
if messages:
|
|
456
|
+
converted_messages = self._convert_messages_for_ollama(messages)
|
|
457
|
+
payload["messages"].extend(converted_messages)
|
|
458
|
+
|
|
459
|
+
if media:
|
|
460
|
+
user_message_text = prompt.strip() if prompt else ""
|
|
461
|
+
try:
|
|
462
|
+
from ..media.handlers import LocalMediaHandler
|
|
463
|
+
media_handler = LocalMediaHandler("ollama", self.model_capabilities, model_name=self.model)
|
|
464
|
+
multimodal_message = media_handler.create_multimodal_message(user_message_text, media)
|
|
465
|
+
|
|
466
|
+
if isinstance(multimodal_message, str):
|
|
467
|
+
payload["messages"].append({"role": "user", "content": multimodal_message})
|
|
468
|
+
else:
|
|
469
|
+
payload["messages"].append(multimodal_message)
|
|
470
|
+
except Exception as e:
|
|
471
|
+
if hasattr(self, 'logger'):
|
|
472
|
+
self.logger.warning(f"Failed to process media: {e}")
|
|
473
|
+
if user_message_text:
|
|
474
|
+
payload["messages"].append({"role": "user", "content": user_message_text})
|
|
475
|
+
|
|
476
|
+
elif prompt and prompt.strip():
|
|
477
|
+
payload["messages"].append({"role": "user", "content": prompt})
|
|
478
|
+
|
|
479
|
+
endpoint = "/api/chat"
|
|
480
|
+
else:
|
|
481
|
+
full_prompt = prompt
|
|
482
|
+
if effective_system_prompt:
|
|
483
|
+
full_prompt = f"{effective_system_prompt}\n\n{prompt}"
|
|
484
|
+
payload["prompt"] = full_prompt
|
|
485
|
+
endpoint = "/api/generate"
|
|
486
|
+
|
|
487
|
+
if stream:
|
|
488
|
+
return self._async_stream_generate(endpoint, payload, tools, kwargs.get('tool_call_tags'))
|
|
489
|
+
else:
|
|
490
|
+
return await self._async_single_generate(endpoint, payload, tools, kwargs.get('media_metadata'))
|
|
491
|
+
|
|
492
|
+
async def _async_single_generate(self, endpoint: str, payload: Dict[str, Any],
|
|
493
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
494
|
+
media_metadata: Optional[List[Dict[str, Any]]] = None) -> GenerateResponse:
|
|
495
|
+
"""Native async single response generation."""
|
|
496
|
+
try:
|
|
497
|
+
start_time = time.time()
|
|
498
|
+
response = await self.async_client.post(endpoint, json=payload)
|
|
499
|
+
response.raise_for_status()
|
|
500
|
+
gen_time = round((time.time() - start_time) * 1000, 1)
|
|
501
|
+
|
|
502
|
+
result = response.json()
|
|
503
|
+
|
|
504
|
+
if endpoint == "/api/chat":
|
|
505
|
+
content = result.get("message", {}).get("content", "")
|
|
506
|
+
else:
|
|
507
|
+
content = result.get("response", "")
|
|
508
|
+
|
|
509
|
+
generate_response = GenerateResponse(
|
|
510
|
+
content=content,
|
|
511
|
+
model=self.model,
|
|
512
|
+
finish_reason="stop",
|
|
513
|
+
raw_response=result,
|
|
514
|
+
usage={
|
|
515
|
+
"input_tokens": result.get("prompt_eval_count", 0),
|
|
516
|
+
"output_tokens": result.get("eval_count", 0),
|
|
517
|
+
"total_tokens": result.get("prompt_eval_count", 0) + result.get("eval_count", 0),
|
|
518
|
+
"prompt_tokens": result.get("prompt_eval_count", 0),
|
|
519
|
+
"completion_tokens": result.get("eval_count", 0)
|
|
520
|
+
},
|
|
521
|
+
gen_time=gen_time
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if media_metadata:
|
|
525
|
+
if not generate_response.metadata:
|
|
526
|
+
generate_response.metadata = {}
|
|
527
|
+
generate_response.metadata['media_metadata'] = media_metadata
|
|
528
|
+
|
|
529
|
+
if self.execute_tools and tools and self.tool_handler.supports_prompted and content:
|
|
530
|
+
return self._handle_tool_execution(generate_response, tools)
|
|
531
|
+
|
|
532
|
+
return generate_response
|
|
533
|
+
|
|
534
|
+
except Exception as e:
|
|
535
|
+
error_str = str(e).lower()
|
|
536
|
+
if ('404' in error_str or 'not found' in error_str):
|
|
537
|
+
available_models = self.list_available_models(base_url=self.base_url)
|
|
538
|
+
error_message = format_model_error("Ollama", self.model, available_models)
|
|
539
|
+
raise ModelNotFoundError(error_message)
|
|
540
|
+
else:
|
|
541
|
+
return GenerateResponse(
|
|
542
|
+
content=f"Error: {str(e)}",
|
|
543
|
+
model=self.model,
|
|
544
|
+
finish_reason="error"
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
async def _async_stream_generate(self, endpoint: str, payload: Dict[str, Any],
|
|
548
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
549
|
+
tool_call_tags: Optional[str] = None):
|
|
550
|
+
"""Native async streaming response generation."""
|
|
551
|
+
try:
|
|
552
|
+
async with self.async_client.stream("POST", endpoint, json=payload) as response:
|
|
553
|
+
response.raise_for_status()
|
|
554
|
+
|
|
555
|
+
full_content = ""
|
|
556
|
+
rewriter = None
|
|
557
|
+
buffer = ""
|
|
558
|
+
if tool_call_tags:
|
|
559
|
+
try:
|
|
560
|
+
from ..tools.tag_rewriter import create_tag_rewriter
|
|
561
|
+
rewriter = create_tag_rewriter(tool_call_tags)
|
|
562
|
+
except ImportError:
|
|
563
|
+
pass
|
|
564
|
+
|
|
565
|
+
async for line in response.aiter_lines():
|
|
566
|
+
if line:
|
|
567
|
+
try:
|
|
568
|
+
chunk = json.loads(line)
|
|
569
|
+
|
|
570
|
+
if endpoint == "/api/chat":
|
|
571
|
+
content = chunk.get("message", {}).get("content", "")
|
|
572
|
+
else:
|
|
573
|
+
content = chunk.get("response", "")
|
|
574
|
+
|
|
575
|
+
done = chunk.get("done", False)
|
|
576
|
+
full_content += content
|
|
577
|
+
|
|
578
|
+
if rewriter and content:
|
|
579
|
+
rewritten_content, buffer = rewriter.rewrite_streaming_chunk(content, buffer)
|
|
580
|
+
content = rewritten_content
|
|
581
|
+
|
|
582
|
+
chunk_response = GenerateResponse(
|
|
583
|
+
content=content,
|
|
584
|
+
model=self.model,
|
|
585
|
+
finish_reason="stop" if done else None,
|
|
586
|
+
raw_response=chunk
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
yield chunk_response
|
|
590
|
+
|
|
591
|
+
if done:
|
|
592
|
+
break
|
|
593
|
+
|
|
594
|
+
except json.JSONDecodeError:
|
|
595
|
+
continue
|
|
596
|
+
|
|
597
|
+
# Execute tools if enabled
|
|
598
|
+
if self.execute_tools and tools and self.tool_handler.supports_prompted and full_content:
|
|
599
|
+
complete_response = GenerateResponse(
|
|
600
|
+
content=full_content,
|
|
601
|
+
model=self.model,
|
|
602
|
+
finish_reason="stop"
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
final_response = self._handle_tool_execution(complete_response, tools)
|
|
606
|
+
|
|
607
|
+
if final_response.content != full_content:
|
|
608
|
+
tool_results_content = final_response.content[len(full_content):]
|
|
609
|
+
yield GenerateResponse(
|
|
610
|
+
content=tool_results_content,
|
|
611
|
+
model=self.model,
|
|
612
|
+
finish_reason="stop"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
except Exception as e:
|
|
616
|
+
yield GenerateResponse(
|
|
617
|
+
content=f"Error: {str(e)}",
|
|
618
|
+
model=self.model,
|
|
619
|
+
finish_reason="error"
|
|
620
|
+
)
|
|
621
|
+
|
|
373
622
|
def _handle_tool_execution(self, response: GenerateResponse, tools: List[Dict[str, Any]]) -> GenerateResponse:
|
|
374
623
|
"""Handle tool execution for prompted models"""
|
|
375
624
|
# Parse tool calls from response
|
|
@@ -446,8 +695,21 @@ class OllamaProvider(BaseProvider):
|
|
|
446
695
|
self.client = httpx.Client(timeout=self._timeout)
|
|
447
696
|
|
|
448
697
|
def list_available_models(self, **kwargs) -> List[str]:
|
|
449
|
-
"""
|
|
698
|
+
"""
|
|
699
|
+
List available models from Ollama server.
|
|
700
|
+
|
|
701
|
+
Args:
|
|
702
|
+
**kwargs: Optional parameters including:
|
|
703
|
+
- base_url: Ollama server URL
|
|
704
|
+
- input_capabilities: List of ModelInputCapability enums to filter by input capability
|
|
705
|
+
- output_capabilities: List of ModelOutputCapability enums to filter by output capability
|
|
706
|
+
|
|
707
|
+
Returns:
|
|
708
|
+
List of model names, optionally filtered by capabilities
|
|
709
|
+
"""
|
|
450
710
|
try:
|
|
711
|
+
from .model_capabilities import filter_models_by_capabilities
|
|
712
|
+
|
|
451
713
|
# Use provided base_url or fall back to instance base_url
|
|
452
714
|
base_url = kwargs.get('base_url', self.base_url)
|
|
453
715
|
|
|
@@ -455,7 +717,21 @@ class OllamaProvider(BaseProvider):
|
|
|
455
717
|
if response.status_code == 200:
|
|
456
718
|
data = response.json()
|
|
457
719
|
models = [model["name"] for model in data.get("models", [])]
|
|
458
|
-
|
|
720
|
+
models = sorted(models)
|
|
721
|
+
|
|
722
|
+
# Apply new capability filtering if provided
|
|
723
|
+
input_capabilities = kwargs.get('input_capabilities')
|
|
724
|
+
output_capabilities = kwargs.get('output_capabilities')
|
|
725
|
+
|
|
726
|
+
if input_capabilities or output_capabilities:
|
|
727
|
+
models = filter_models_by_capabilities(
|
|
728
|
+
models,
|
|
729
|
+
input_capabilities=input_capabilities,
|
|
730
|
+
output_capabilities=output_capabilities
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
return models
|
|
459
735
|
else:
|
|
460
736
|
self.logger.warning(f"Ollama API returned status {response.status_code}")
|
|
461
737
|
return []
|