abstractcore 2.4.5__py3-none-any.whl → 2.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,6 +70,8 @@ class AbstractCoreInterface(ABC):
70
70
  max_tokens: Optional[int] = None,
71
71
  max_input_tokens: Optional[int] = None,
72
72
  max_output_tokens: int = 2048,
73
+ temperature: float = 0.7,
74
+ seed: Optional[int] = None,
73
75
  debug: bool = False,
74
76
  **kwargs):
75
77
  self.model = model
@@ -79,6 +81,11 @@ class AbstractCoreInterface(ABC):
79
81
  self.max_tokens = max_tokens
80
82
  self.max_input_tokens = max_input_tokens
81
83
  self.max_output_tokens = max_output_tokens
84
+
85
+ # Unified generation parameters
86
+ self.temperature = temperature
87
+ self.seed = seed
88
+
82
89
  self.debug = debug
83
90
 
84
91
  # Validate token parameters
@@ -32,8 +32,23 @@ class BasicSession:
32
32
  tool_timeout: Optional[float] = None,
33
33
  recovery_timeout: Optional[float] = None,
34
34
  auto_compact: bool = False,
35
- auto_compact_threshold: int = 6000):
36
- """Initialize basic session"""
35
+ auto_compact_threshold: int = 6000,
36
+ temperature: Optional[float] = None,
37
+ seed: Optional[int] = None):
38
+ """Initialize basic session
39
+
40
+ Args:
41
+ provider: LLM provider instance
42
+ system_prompt: System prompt for the session
43
+ tools: List of available tools
44
+ timeout: HTTP request timeout
45
+ tool_timeout: Tool execution timeout
46
+ recovery_timeout: Circuit breaker recovery timeout
47
+ auto_compact: Enable automatic conversation compaction
48
+ auto_compact_threshold: Token threshold for auto-compaction
49
+ temperature: Default temperature for generation (0.0-1.0)
50
+ seed: Default seed for deterministic generation
51
+ """
37
52
 
38
53
  self.provider = provider
39
54
  self.id = str(uuid.uuid4())
@@ -45,6 +60,10 @@ class BasicSession:
45
60
  self.auto_compact_threshold = auto_compact_threshold
46
61
  self._original_session = None # Track if this is a compacted session
47
62
 
63
+ # Store session-level generation parameters
64
+ self.temperature = temperature
65
+ self.seed = seed
66
+
48
67
  # Optional analytics fields
49
68
  self.summary = None
50
69
  self.assessment = None
@@ -189,6 +208,12 @@ class BasicSession:
189
208
  # Extract media parameter explicitly (fix for media parameter passing)
190
209
  media = kwargs.pop('media', None)
191
210
 
211
+ # Add session-level parameters if not overridden in kwargs
212
+ if 'temperature' not in kwargs and self.temperature is not None:
213
+ kwargs['temperature'] = self.temperature
214
+ if 'seed' not in kwargs and self.seed is not None:
215
+ kwargs['seed'] = self.seed
216
+
192
217
  # Call provider
193
218
  response = self.provider.generate(
194
219
  prompt=prompt,
@@ -47,8 +47,7 @@ class AnthropicProvider(BaseProvider):
47
47
  # Initialize tool handler
48
48
  self.tool_handler = UniversalToolHandler(model)
49
49
 
50
- # Store configuration (remove duplicate max_tokens)
51
- self.temperature = kwargs.get("temperature", 0.7)
50
+ # Store provider-specific configuration
52
51
  self.top_p = kwargs.get("top_p", 1.0)
53
52
  self.top_k = kwargs.get("top_k", None)
54
53
 
@@ -132,6 +131,19 @@ class AnthropicProvider(BaseProvider):
132
131
  if kwargs.get("top_k") or self.top_k:
133
132
  call_params["top_k"] = kwargs.get("top_k", self.top_k)
134
133
 
134
+ # Handle seed parameter (Anthropic doesn't support seed natively)
135
+ seed_value = kwargs.get("seed", self.seed)
136
+ if seed_value is not None:
137
+ import warnings
138
+ warnings.warn(
139
+ f"Seed parameter ({seed_value}) is not supported by Anthropic Claude API. "
140
+ f"For deterministic outputs, use temperature=0.0 which may provide more consistent results, "
141
+ f"though true determinism is not guaranteed.",
142
+ UserWarning,
143
+ stacklevel=3
144
+ )
145
+ self.logger.warning(f"Seed {seed_value} requested but not supported by Anthropic API")
146
+
135
147
  # Handle structured output using the "tool trick"
136
148
  structured_tool_name = None
137
149
  if response_model and PYDANTIC_AVAILABLE:
@@ -570,8 +570,32 @@ class BaseProvider(AbstractCoreInterface, ABC):
570
570
  result_kwargs = kwargs.copy()
571
571
  result_kwargs["max_output_tokens"] = effective_max_output
572
572
 
573
+ # Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
574
+ result_kwargs["temperature"] = result_kwargs.get("temperature", self.temperature)
575
+ if self.seed is not None:
576
+ result_kwargs["seed"] = result_kwargs.get("seed", self.seed)
577
+
573
578
  return result_kwargs
574
579
 
580
+ def _extract_generation_params(self, **kwargs) -> Dict[str, Any]:
581
+ """
582
+ Extract generation parameters with consistent fallback hierarchy.
583
+
584
+ Returns:
585
+ Dict containing temperature, seed, and other generation parameters
586
+ """
587
+ params = {}
588
+
589
+ # Temperature (always present)
590
+ params["temperature"] = kwargs.get("temperature", self.temperature)
591
+
592
+ # Seed (only if not None)
593
+ seed_value = kwargs.get("seed", self.seed)
594
+ if seed_value is not None:
595
+ params["seed"] = seed_value
596
+
597
+ return params
598
+
575
599
  def _get_provider_max_tokens_param(self, kwargs: Dict[str, Any]) -> int:
576
600
  """
577
601
  Extract the appropriate max tokens parameter for this provider.
@@ -68,6 +68,7 @@ class HuggingFaceProvider(BaseProvider):
68
68
  # Initialize tool handler
69
69
  self.tool_handler = UniversalToolHandler(model)
70
70
 
71
+ # Store provider-specific configuration
71
72
  self.n_gpu_layers = n_gpu_layers
72
73
  self.model_type = None # Will be "transformers" or "gguf"
73
74
  self.device = device
@@ -537,14 +538,15 @@ class HuggingFaceProvider(BaseProvider):
537
538
  # Generation parameters using unified system
538
539
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
539
540
  max_new_tokens = self._get_provider_max_tokens_param(generation_kwargs)
540
- temperature = kwargs.get("temperature", 0.7)
541
+ temperature = kwargs.get("temperature", self.temperature)
541
542
  top_p = kwargs.get("top_p", 0.9)
543
+ seed_value = kwargs.get("seed", self.seed)
542
544
 
543
545
  try:
544
546
  if stream:
545
- return self._stream_generate_transformers_with_tools(input_text, max_new_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'))
547
+ return self._stream_generate_transformers_with_tools(input_text, max_new_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
546
548
  else:
547
- response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p)
549
+ response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p, seed_value)
548
550
 
549
551
  # Handle tool execution for prompted models
550
552
  if tools and self.tool_handler.supports_prompted and response.content:
@@ -651,11 +653,16 @@ class HuggingFaceProvider(BaseProvider):
651
653
  generation_kwargs = {
652
654
  "messages": chat_messages,
653
655
  "max_tokens": max_output_tokens, # This is max_output_tokens for llama-cpp
654
- "temperature": kwargs.get("temperature", 0.7),
656
+ "temperature": kwargs.get("temperature", self.temperature),
655
657
  "top_p": kwargs.get("top_p", 0.9),
656
658
  "stream": stream
657
659
  }
658
660
 
661
+ # Add seed if provided (GGUF/llama-cpp supports seed)
662
+ seed_value = kwargs.get("seed", self.seed)
663
+ if seed_value is not None:
664
+ generation_kwargs["seed"] = seed_value
665
+
659
666
  # Handle tools - both native and prompted support
660
667
  has_native_tools = False
661
668
  if tools:
@@ -846,9 +853,16 @@ class HuggingFaceProvider(BaseProvider):
846
853
  )
847
854
 
848
855
  def _single_generate_transformers(self, input_text: str, max_new_tokens: int,
849
- temperature: float, top_p: float) -> GenerateResponse:
856
+ temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
850
857
  """Generate single response using transformers (original implementation)"""
851
858
  try:
859
+ # Set seed for deterministic generation if provided
860
+ if seed is not None:
861
+ import torch
862
+ torch.manual_seed(seed)
863
+ if torch.cuda.is_available():
864
+ torch.cuda.manual_seed_all(seed)
865
+
852
866
  outputs = self.pipeline(
853
867
  input_text,
854
868
  max_new_tokens=max_new_tokens,
@@ -902,11 +916,11 @@ class HuggingFaceProvider(BaseProvider):
902
916
  }
903
917
 
904
918
  def _stream_generate_transformers(self, input_text: str, max_new_tokens: int,
905
- temperature: float, top_p: float, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
919
+ temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
906
920
  """Stream response using transformers (simulated, original implementation) with tool tag rewriting support"""
907
921
  try:
908
922
  # HuggingFace doesn't have native streaming, so we simulate it
909
- full_response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p)
923
+ full_response = self._single_generate_transformers(input_text, max_new_tokens, temperature, top_p, seed)
910
924
 
911
925
  if full_response.content:
912
926
  # Apply tool tag rewriting if enabled
@@ -1039,12 +1053,12 @@ class HuggingFaceProvider(BaseProvider):
1039
1053
  def _stream_generate_transformers_with_tools(self, input_text: str, max_new_tokens: int,
1040
1054
  temperature: float, top_p: float,
1041
1055
  tools: Optional[List[Dict[str, Any]]] = None,
1042
- tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
1056
+ tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
1043
1057
  """Stream generate with tool execution at the end"""
1044
1058
  collected_content = ""
1045
1059
 
1046
1060
  # Stream the response content
1047
- for chunk in self._stream_generate_transformers(input_text, max_new_tokens, temperature, top_p, tool_call_tags):
1061
+ for chunk in self._stream_generate_transformers(input_text, max_new_tokens, temperature, top_p, tool_call_tags, seed):
1048
1062
  collected_content += chunk.content
1049
1063
  yield chunk
1050
1064
 
@@ -196,11 +196,16 @@ class LMStudioProvider(BaseProvider):
196
196
  "model": self.model,
197
197
  "messages": chat_messages,
198
198
  "stream": stream,
199
- "temperature": kwargs.get("temperature", 0.7),
199
+ "temperature": kwargs.get("temperature", self.temperature),
200
200
  "max_tokens": max_output_tokens, # LMStudio uses max_tokens for output tokens
201
201
  "top_p": kwargs.get("top_p", 0.9),
202
202
  }
203
203
 
204
+ # Add seed if provided (LMStudio supports seed via OpenAI-compatible API)
205
+ seed_value = kwargs.get("seed", self.seed)
206
+ if seed_value is not None:
207
+ payload["seed"] = seed_value
208
+
204
209
  if stream:
205
210
  # Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
206
211
  return self._stream_generate(payload)
@@ -189,14 +189,15 @@ class MLXProvider(BaseProvider):
189
189
  # MLX generation parameters using unified system
190
190
  generation_kwargs = self._prepare_generation_kwargs(**kwargs)
191
191
  max_tokens = self._get_provider_max_tokens_param(generation_kwargs)
192
- temperature = kwargs.get("temperature", 0.7)
192
+ temperature = kwargs.get("temperature", self.temperature)
193
193
  top_p = kwargs.get("top_p", 0.9)
194
+ seed_value = kwargs.get("seed", self.seed)
194
195
 
195
196
  try:
196
197
  if stream:
197
- return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'))
198
+ return self._stream_generate_with_tools(full_prompt, max_tokens, temperature, top_p, tools, kwargs.get('tool_call_tags'), seed_value)
198
199
  else:
199
- response = self._single_generate(full_prompt, max_tokens, temperature, top_p)
200
+ response = self._single_generate(full_prompt, max_tokens, temperature, top_p, seed_value)
200
201
 
201
202
  # Handle tool execution for prompted models
202
203
  if tools and self.tool_handler.supports_prompted and response.content:
@@ -256,9 +257,15 @@ class MLXProvider(BaseProvider):
256
257
 
257
258
  return full_prompt
258
259
 
259
- def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float) -> GenerateResponse:
260
+ def _single_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, seed: Optional[int] = None) -> GenerateResponse:
260
261
  """Generate single response"""
261
262
 
263
+ # Handle seed parameter (MLX supports seed via mx.random.seed)
264
+ if seed is not None:
265
+ import mlx.core as mx
266
+ mx.random.seed(seed)
267
+ self.logger.debug(f"Set MLX random seed to {seed} for deterministic generation")
268
+
262
269
  # Try different MLX API signatures
263
270
  try:
264
271
  # Try new mlx-lm API
@@ -305,9 +312,15 @@ class MLXProvider(BaseProvider):
305
312
  "total_tokens": total_tokens
306
313
  }
307
314
 
308
- def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
315
+ def _stream_generate(self, prompt: str, max_tokens: int, temperature: float, top_p: float, tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
309
316
  """Generate real streaming response using MLX stream_generate with tool tag rewriting support"""
310
317
  try:
318
+ # Handle seed parameter (MLX supports seed via mx.random.seed)
319
+ if seed is not None:
320
+ import mlx.core as mx
321
+ mx.random.seed(seed)
322
+ self.logger.debug(f"Set MLX random seed to {seed} for deterministic streaming generation")
323
+
311
324
  # Initialize tool tag rewriter if needed
312
325
  rewriter = None
313
326
  buffer = ""
@@ -366,12 +379,12 @@ class MLXProvider(BaseProvider):
366
379
  def _stream_generate_with_tools(self, full_prompt: str, max_tokens: int,
367
380
  temperature: float, top_p: float,
368
381
  tools: Optional[List[Dict[str, Any]]] = None,
369
- tool_call_tags: Optional[str] = None) -> Iterator[GenerateResponse]:
382
+ tool_call_tags: Optional[str] = None, seed: Optional[int] = None) -> Iterator[GenerateResponse]:
370
383
  """Stream generate with tool execution at the end"""
371
384
  collected_content = ""
372
385
 
373
386
  # Stream the response content
374
- for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags):
387
+ for chunk in self._stream_generate(full_prompt, max_tokens, temperature, top_p, tool_call_tags, seed):
375
388
  collected_content += chunk.content
376
389
  yield chunk
377
390
 
@@ -132,11 +132,16 @@ class OllamaProvider(BaseProvider):
132
132
  "model": self.model,
133
133
  "stream": stream,
134
134
  "options": {
135
- "temperature": kwargs.get("temperature", 0.7),
135
+ "temperature": kwargs.get("temperature", self.temperature),
136
136
  "num_predict": max_output_tokens, # Ollama uses num_predict for max output tokens
137
137
  }
138
138
  }
139
139
 
140
+ # Add seed if provided (Ollama supports seed for deterministic outputs)
141
+ seed_value = kwargs.get("seed", self.seed)
142
+ if seed_value is not None:
143
+ payload["options"]["seed"] = seed_value
144
+
140
145
  # Add structured output support (Ollama native JSON schema)
141
146
  if response_model and PYDANTIC_AVAILABLE:
142
147
  json_schema = response_model.model_json_schema()
@@ -50,8 +50,7 @@ class OpenAIProvider(BaseProvider):
50
50
  # Preflight check: validate model exists
51
51
  self._validate_model_exists()
52
52
 
53
- # Store configuration (remove duplicate max_tokens)
54
- self.temperature = kwargs.get("temperature", 0.7)
53
+ # Store provider-specific configuration
55
54
  self.top_p = kwargs.get("top_p", 1.0)
56
55
  self.frequency_penalty = kwargs.get("frequency_penalty", 0.0)
57
56
  self.presence_penalty = kwargs.get("presence_penalty", 0.0)
@@ -125,6 +124,11 @@ class OpenAIProvider(BaseProvider):
125
124
  call_params["top_p"] = kwargs.get("top_p", self.top_p)
126
125
  call_params["frequency_penalty"] = kwargs.get("frequency_penalty", self.frequency_penalty)
127
126
  call_params["presence_penalty"] = kwargs.get("presence_penalty", self.presence_penalty)
127
+
128
+ # Add seed if provided (OpenAI supports seed for deterministic outputs)
129
+ seed_value = kwargs.get("seed", self.seed)
130
+ if seed_value is not None:
131
+ call_params["seed"] = seed_value
128
132
 
129
133
  # Handle different token parameter names for different model families
130
134
  if self._uses_max_completion_tokens():
@@ -11,17 +11,29 @@ import os
11
11
  import subprocess
12
12
  import requests
13
13
  from pathlib import Path
14
- from typing import Optional
14
+ from typing import Optional, Dict, Any, Union
15
15
  import logging
16
16
  import platform
17
17
  import re
18
18
  import time
19
+ import json
20
+ import base64
21
+ from datetime import datetime
22
+ from urllib.parse import urlparse, urljoin
23
+ import mimetypes
19
24
 
20
25
  try:
21
26
  from bs4 import BeautifulSoup
22
27
  BS4_AVAILABLE = True
28
+ # Try to use lxml parser for better performance
29
+ try:
30
+ import lxml
31
+ BS4_PARSER = 'lxml'
32
+ except ImportError:
33
+ BS4_PARSER = 'html.parser'
23
34
  except ImportError:
24
35
  BS4_AVAILABLE = False
36
+ BS4_PARSER = None
25
37
 
26
38
  try:
27
39
  import psutil
@@ -995,6 +1007,643 @@ def web_search(query: str, num_results: int = 5, safe_search: str = "moderate",
995
1007
  return f"Error searching internet: {str(e)}"
996
1008
 
997
1009
 
1010
+ @tool(
1011
+ description="Fetch and intelligently parse content from URLs with automatic content type detection and metadata extraction",
1012
+ tags=["web", "fetch", "url", "http", "content", "parse", "scraping"],
1013
+ when_to_use="When you need to retrieve and analyze content from specific URLs, including web pages, APIs, documents, or media files",
1014
+ examples=[
1015
+ {
1016
+ "description": "Fetch and parse HTML webpage",
1017
+ "arguments": {
1018
+ "url": "https://example.com/article.html"
1019
+ }
1020
+ },
1021
+ {
1022
+ "description": "Fetch JSON API response",
1023
+ "arguments": {
1024
+ "url": "https://api.github.com/repos/python/cpython",
1025
+ "headers": {"Accept": "application/json"}
1026
+ }
1027
+ },
1028
+ {
1029
+ "description": "POST data to API endpoint",
1030
+ "arguments": {
1031
+ "url": "https://httpbin.org/post",
1032
+ "method": "POST",
1033
+ "data": {"key": "value", "test": "data"}
1034
+ }
1035
+ },
1036
+ {
1037
+ "description": "Fetch binary content with metadata",
1038
+ "arguments": {
1039
+ "url": "https://example.com/document.pdf",
1040
+ "include_binary_preview": True
1041
+ }
1042
+ }
1043
+ ]
1044
+ )
1045
+ def fetch_url(
1046
+ url: str,
1047
+ method: str = "GET",
1048
+ headers: Optional[Dict[str, str]] = None,
1049
+ data: Optional[Union[Dict[str, Any], str]] = None,
1050
+ timeout: int = 30,
1051
+ max_content_length: int = 10485760, # 10MB default
1052
+ follow_redirects: bool = True,
1053
+ include_binary_preview: bool = False,
1054
+ extract_links: bool = True,
1055
+ user_agent: str = "AbstractCore-FetchTool/1.0"
1056
+ ) -> str:
1057
+ """
1058
+ Fetch and intelligently parse content from URLs with comprehensive content type detection.
1059
+
1060
+ This tool automatically detects content types (HTML, JSON, XML, images, etc.) and provides
1061
+ appropriate parsing with metadata extraction including timestamps and response headers.
1062
+
1063
+ Args:
1064
+ url: The URL to fetch content from
1065
+ method: HTTP method to use (default: "GET")
1066
+ headers: Optional custom headers to send with the request
1067
+ data: Optional data to send with POST/PUT requests (dict or string)
1068
+ timeout: Request timeout in seconds (default: 30)
1069
+ max_content_length: Maximum content length to fetch in bytes (default: 10MB)
1070
+ follow_redirects: Whether to follow HTTP redirects (default: True)
1071
+ include_binary_preview: Whether to include base64 preview for binary content (default: False)
1072
+ extract_links: Whether to extract links from HTML content (default: True)
1073
+ user_agent: User-Agent header to use (default: "AbstractCore-FetchTool/1.0")
1074
+
1075
+ Returns:
1076
+ Formatted string with parsed content, metadata, and analysis or error message
1077
+
1078
+ Examples:
1079
+ fetch_url("https://api.github.com/repos/python/cpython") # Fetch and parse JSON API
1080
+ fetch_url("https://example.com", headers={"Accept": "text/html"}) # Fetch HTML with custom headers
1081
+ fetch_url("https://httpbin.org/post", method="POST", data={"test": "value"}) # POST request
1082
+ fetch_url("https://example.com/image.jpg", include_binary_preview=True) # Fetch image with preview
1083
+ """
1084
+ try:
1085
+ # Validate URL
1086
+ parsed_url = urlparse(url)
1087
+ if not parsed_url.scheme or not parsed_url.netloc:
1088
+ return f"❌ Invalid URL format: {url}"
1089
+
1090
+ if parsed_url.scheme not in ['http', 'https']:
1091
+ return f"❌ Unsupported URL scheme: {parsed_url.scheme}. Only HTTP and HTTPS are supported."
1092
+
1093
+ # Prepare request headers
1094
+ request_headers = {
1095
+ 'User-Agent': user_agent,
1096
+ 'Accept': '*/*',
1097
+ 'Accept-Encoding': 'gzip, deflate',
1098
+ 'Connection': 'keep-alive'
1099
+ }
1100
+
1101
+ if headers:
1102
+ request_headers.update(headers)
1103
+
1104
+ # Prepare request parameters
1105
+ request_params = {
1106
+ 'url': url,
1107
+ 'method': method.upper(),
1108
+ 'headers': request_headers,
1109
+ 'timeout': timeout,
1110
+ 'allow_redirects': follow_redirects,
1111
+ 'stream': True # Stream to check content length
1112
+ }
1113
+
1114
+ # Add data for POST/PUT requests
1115
+ if data and method.upper() in ['POST', 'PUT', 'PATCH']:
1116
+ if isinstance(data, dict):
1117
+ # Try JSON first, fallback to form data
1118
+ if request_headers.get('Content-Type', '').startswith('application/json'):
1119
+ request_params['json'] = data
1120
+ else:
1121
+ request_params['data'] = data
1122
+ else:
1123
+ request_params['data'] = data
1124
+
1125
+ # Record fetch timestamp
1126
+ fetch_timestamp = datetime.now().isoformat()
1127
+
1128
+ # Make the request with session for connection reuse
1129
+ with requests.Session() as session:
1130
+ session.headers.update(request_headers)
1131
+ response = session.request(
1132
+ method=method.upper(),
1133
+ url=url,
1134
+ timeout=timeout,
1135
+ allow_redirects=follow_redirects,
1136
+ stream=True,
1137
+ json=request_params.get('json'),
1138
+ data=request_params.get('data')
1139
+ )
1140
+
1141
+ # Check response status
1142
+ if not response.ok:
1143
+ return f"❌ HTTP Error {response.status_code}: {response.reason}\n" \
1144
+ f"URL: {url}\n" \
1145
+ f"Timestamp: {fetch_timestamp}\n" \
1146
+ f"Response headers: {dict(response.headers)}"
1147
+
1148
+ # Get content info
1149
+ content_type = response.headers.get('content-type', '').lower()
1150
+ content_length = response.headers.get('content-length')
1151
+ if content_length:
1152
+ content_length = int(content_length)
1153
+
1154
+ # Check content length before downloading
1155
+ if content_length and content_length > max_content_length:
1156
+ return f"⚠️ Content too large: {content_length:,} bytes (max: {max_content_length:,})\n" \
1157
+ f"URL: {url}\n" \
1158
+ f"Content-Type: {content_type}\n" \
1159
+ f"Timestamp: {fetch_timestamp}\n" \
1160
+ f"Use max_content_length parameter to increase limit if needed"
1161
+
1162
+ # Download content with optimized chunking
1163
+ content_chunks = []
1164
+ downloaded_size = 0
1165
+
1166
+ # Use larger chunks for better performance
1167
+ chunk_size = 32768 if 'image/' in content_type or 'video/' in content_type else 16384
1168
+
1169
+ for chunk in response.iter_content(chunk_size=chunk_size):
1170
+ if chunk:
1171
+ downloaded_size += len(chunk)
1172
+ if downloaded_size > max_content_length:
1173
+ return f"⚠️ Content exceeded size limit during download: {downloaded_size:,} bytes (max: {max_content_length:,})\n" \
1174
+ f"URL: {url}\n" \
1175
+ f"Content-Type: {content_type}\n" \
1176
+ f"Timestamp: {fetch_timestamp}"
1177
+ content_chunks.append(chunk)
1178
+
1179
+ content_bytes = b''.join(content_chunks)
1180
+ actual_size = len(content_bytes)
1181
+
1182
+ # Detect content type and parse accordingly
1183
+ parsed_content = _parse_content_by_type(content_bytes, content_type, url, extract_links, include_binary_preview)
1184
+
1185
+ # Build comprehensive response
1186
+ result_parts = []
1187
+ result_parts.append(f"🌐 URL Fetch Results")
1188
+ result_parts.append(f"📍 URL: {response.url}") # Final URL after redirects
1189
+ if response.url != url:
1190
+ result_parts.append(f"🔄 Original URL: {url}")
1191
+ result_parts.append(f"⏰ Timestamp: {fetch_timestamp}")
1192
+ result_parts.append(f"✅ Status: {response.status_code} {response.reason}")
1193
+ result_parts.append(f"📊 Content-Type: {content_type}")
1194
+ result_parts.append(f"📏 Size: {actual_size:,} bytes")
1195
+
1196
+ # Add important response headers
1197
+ important_headers = ['server', 'last-modified', 'etag', 'cache-control', 'expires', 'location']
1198
+ response_metadata = []
1199
+ for header in important_headers:
1200
+ value = response.headers.get(header)
1201
+ if value:
1202
+ response_metadata.append(f" {header.title()}: {value}")
1203
+
1204
+ if response_metadata:
1205
+ result_parts.append(f"📋 Response Headers:")
1206
+ result_parts.extend(response_metadata)
1207
+
1208
+ # Add parsed content
1209
+ result_parts.append(f"\n📄 Content Analysis:")
1210
+ result_parts.append(parsed_content)
1211
+
1212
+ return "\n".join(result_parts)
1213
+
1214
+ except requests.exceptions.Timeout:
1215
+ return f"⏰ Request timeout after {timeout} seconds\n" \
1216
+ f"URL: {url}\n" \
1217
+ f"Consider increasing timeout parameter"
1218
+
1219
+ except requests.exceptions.ConnectionError as e:
1220
+ return f"🔌 Connection error: {str(e)}\n" \
1221
+ f"URL: {url}\n" \
1222
+ f"Check network connectivity and URL validity"
1223
+
1224
+ except requests.exceptions.TooManyRedirects:
1225
+ return f"🔄 Too many redirects\n" \
1226
+ f"URL: {url}\n" \
1227
+ f"Try setting follow_redirects=False to see redirect chain"
1228
+
1229
+ except requests.exceptions.RequestException as e:
1230
+ return f"❌ Request error: {str(e)}\n" \
1231
+ f"URL: {url}"
1232
+
1233
+ except Exception as e:
1234
+ return f"❌ Unexpected error fetching URL: {str(e)}\n" \
1235
+ f"URL: {url}"
1236
+
1237
+
1238
+ def _parse_content_by_type(content_bytes: bytes, content_type: str, url: str, extract_links: bool = True, include_binary_preview: bool = False) -> str:
1239
+ """
1240
+ Parse content based on detected content type with intelligent fallbacks.
1241
+
1242
+ This function provides robust content type detection and parsing for various formats
1243
+ including HTML, JSON, XML, plain text, images, and other binary formats.
1244
+ """
1245
+ try:
1246
+ # Normalize content type
1247
+ main_type = content_type.split(';')[0].strip().lower()
1248
+
1249
+ # Try to decode as text first for text-based formats
1250
+ text_content = None
1251
+ encoding = 'utf-8'
1252
+
1253
+ # Detect encoding from content-type header
1254
+ if 'charset=' in content_type:
1255
+ try:
1256
+ encoding = content_type.split('charset=')[1].split(';')[0].strip()
1257
+ except:
1258
+ encoding = 'utf-8'
1259
+
1260
+ # Attempt text decoding for text-based content types with better encoding detection
1261
+ text_based_types = [
1262
+ 'text/', 'application/json', 'application/xml', 'application/javascript',
1263
+ 'application/rss+xml', 'application/atom+xml', 'application/xhtml+xml'
1264
+ ]
1265
+
1266
+ is_text_based = any(main_type.startswith(t) for t in text_based_types)
1267
+
1268
+ if is_text_based:
1269
+ # Try multiple encoding strategies
1270
+ for enc in [encoding, 'utf-8', 'iso-8859-1', 'windows-1252']:
1271
+ try:
1272
+ text_content = content_bytes.decode(enc)
1273
+ break
1274
+ except (UnicodeDecodeError, LookupError):
1275
+ continue
1276
+ else:
1277
+ # Final fallback with error replacement
1278
+ text_content = content_bytes.decode('utf-8', errors='replace')
1279
+
1280
+ # Parse based on content type
1281
+ if main_type.startswith('text/html') or main_type.startswith('application/xhtml'):
1282
+ return _parse_html_content(text_content, url, extract_links)
1283
+
1284
+ elif main_type == 'application/json':
1285
+ return _parse_json_content(text_content)
1286
+
1287
+ elif main_type in ['application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml']:
1288
+ return _parse_xml_content(text_content)
1289
+
1290
+ elif main_type.startswith('text/'):
1291
+ return _parse_text_content(text_content, main_type)
1292
+
1293
+ elif main_type.startswith('image/'):
1294
+ return _parse_image_content(content_bytes, main_type, include_binary_preview)
1295
+
1296
+ elif main_type == 'application/pdf':
1297
+ return _parse_pdf_content(content_bytes, include_binary_preview)
1298
+
1299
+ else:
1300
+ return _parse_binary_content(content_bytes, main_type, include_binary_preview)
1301
+
1302
+ except Exception as e:
1303
+ return f"❌ Error parsing content: {str(e)}\n" \
1304
+ f"Content-Type: {content_type}\n" \
1305
+ f"Content size: {len(content_bytes):,} bytes"
1306
+
1307
+
1308
+ def _parse_html_content(html_content: str, url: str, extract_links: bool = True) -> str:
1309
+ """Parse HTML content and extract meaningful information."""
1310
+ if not html_content:
1311
+ return "❌ No HTML content to parse"
1312
+
1313
+ result_parts = []
1314
+ result_parts.append("🌐 HTML Document Analysis")
1315
+
1316
+ # Use BeautifulSoup if available for better parsing
1317
+ if BS4_AVAILABLE:
1318
+ try:
1319
+ soup = BeautifulSoup(html_content, BS4_PARSER)
1320
+
1321
+ # Extract title
1322
+ title = soup.find('title')
1323
+ if title:
1324
+ result_parts.append(f"📰 Title: {title.get_text().strip()}")
1325
+
1326
+ # Extract meta description
1327
+ meta_desc = soup.find('meta', attrs={'name': 'description'})
1328
+ if meta_desc and meta_desc.get('content'):
1329
+ result_parts.append(f"📝 Description: {meta_desc['content'][:200]}...")
1330
+
1331
+ # Extract headings
1332
+ headings = []
1333
+ for i in range(1, 7):
1334
+ h_tags = soup.find_all(f'h{i}')
1335
+ for h in h_tags[:5]: # Limit to first 5 of each level
1336
+ headings.append(f"H{i}: {h.get_text().strip()[:100]}")
1337
+
1338
+ if headings:
1339
+ result_parts.append(f"📋 Headings (first 5 per level):")
1340
+ for heading in headings[:10]: # Limit total headings
1341
+ result_parts.append(f" • {heading}")
1342
+
1343
+ # Extract links if requested
1344
+ if extract_links:
1345
+ links = []
1346
+ for a in soup.find_all('a', href=True)[:20]: # Limit to first 20 links
1347
+ href = a['href']
1348
+ text = a.get_text().strip()[:50]
1349
+ # Convert relative URLs to absolute
1350
+ if href.startswith('/'):
1351
+ href = urljoin(url, href)
1352
+ elif not href.startswith(('http://', 'https://')):
1353
+ href = urljoin(url, href)
1354
+ links.append(f"{text} → {href}")
1355
+
1356
+ if links:
1357
+ result_parts.append(f"🔗 Links (first 20):")
1358
+ for link in links:
1359
+ result_parts.append(f" • {link}")
1360
+
1361
+ # Extract main text content with better cleaning
1362
+ # Remove script, style, nav, footer, header elements for cleaner content
1363
+ for element in soup(["script", "style", "nav", "footer", "header", "aside"]):
1364
+ element.decompose()
1365
+
1366
+ # Try to find main content area first
1367
+ main_content = soup.find(['main', 'article']) or soup.find('div', class_=lambda x: x and any(word in x.lower() for word in ['content', 'article', 'post', 'main']))
1368
+ content_soup = main_content if main_content else soup
1369
+
1370
+ text = content_soup.get_text()
1371
+ # Clean up text more efficiently
1372
+ lines = (line.strip() for line in text.splitlines() if line.strip())
1373
+ text = ' '.join(lines)
1374
+ # Remove excessive whitespace
1375
+ text = ' '.join(text.split())
1376
+
1377
+ if text:
1378
+ preview_length = 500
1379
+ text_preview = text[:preview_length]
1380
+ if len(text) > preview_length:
1381
+ text_preview += "..."
1382
+ result_parts.append(f"📄 Text Content Preview:")
1383
+ result_parts.append(f"{text_preview}")
1384
+ result_parts.append(f"📊 Total text length: {len(text):,} characters")
1385
+
1386
+ except Exception as e:
1387
+ result_parts.append(f"⚠️ BeautifulSoup parsing error: {str(e)}")
1388
+ result_parts.append(f"📄 Raw HTML Preview (first 1000 chars):")
1389
+ result_parts.append(html_content[:1000] + ("..." if len(html_content) > 1000 else ""))
1390
+
1391
+ else:
1392
+ # Fallback parsing without BeautifulSoup
1393
+ result_parts.append("⚠️ BeautifulSoup not available - using basic parsing")
1394
+
1395
+ # Extract title with regex
1396
+ import re
1397
+ title_match = re.search(r'<title[^>]*>(.*?)</title>', html_content, re.IGNORECASE | re.DOTALL)
1398
+ if title_match:
1399
+ result_parts.append(f"📰 Title: {title_match.group(1).strip()}")
1400
+
1401
+ # Show HTML preview
1402
+ result_parts.append(f"📄 HTML Preview (first 1000 chars):")
1403
+ result_parts.append(html_content[:1000] + ("..." if len(html_content) > 1000 else ""))
1404
+
1405
+ return "\n".join(result_parts)
1406
+
1407
+
1408
+ def _parse_json_content(json_content: str) -> str:
1409
+ """Parse JSON content and provide structured analysis."""
1410
+ if not json_content:
1411
+ return "❌ No JSON content to parse"
1412
+
1413
+ result_parts = []
1414
+ result_parts.append("📊 JSON Data Analysis")
1415
+
1416
+ try:
1417
+ data = json.loads(json_content)
1418
+
1419
+ # Analyze JSON structure
1420
+ result_parts.append(f"📋 Structure: {type(data).__name__}")
1421
+
1422
+ if isinstance(data, dict):
1423
+ result_parts.append(f"🔑 Keys ({len(data)}): {', '.join(list(data.keys())[:10])}")
1424
+ if len(data) > 10:
1425
+ result_parts.append(f" ... and {len(data) - 10} more keys")
1426
+ elif isinstance(data, list):
1427
+ result_parts.append(f"📝 Array length: {len(data)}")
1428
+ if data and isinstance(data[0], dict):
1429
+ result_parts.append(f"🔑 First item keys: {', '.join(list(data[0].keys())[:10])}")
1430
+
1431
+ # Pretty print JSON with smart truncation
1432
+ json_str = json.dumps(data, indent=2, ensure_ascii=False, separators=(',', ': '))
1433
+ preview_length = 1500 # Reduced for better readability
1434
+ if len(json_str) > preview_length:
1435
+ # Try to truncate at a logical point (end of object/array)
1436
+ truncate_pos = json_str.rfind('\n', 0, preview_length)
1437
+ if truncate_pos > preview_length - 200: # If close to limit, use it
1438
+ json_preview = json_str[:truncate_pos] + "\n... (truncated)"
1439
+ else:
1440
+ json_preview = json_str[:preview_length] + "\n... (truncated)"
1441
+ else:
1442
+ json_preview = json_str
1443
+
1444
+ result_parts.append(f"📄 JSON Content:")
1445
+ result_parts.append(json_preview)
1446
+ result_parts.append(f"📊 Total size: {len(json_content):,} characters")
1447
+
1448
+ except json.JSONDecodeError as e:
1449
+ result_parts.append(f"❌ JSON parsing error: {str(e)}")
1450
+ result_parts.append(f"📄 Raw content preview (first 1000 chars):")
1451
+ result_parts.append(json_content[:1000] + ("..." if len(json_content) > 1000 else ""))
1452
+
1453
+ return "\n".join(result_parts)
1454
+
1455
+
1456
+ def _parse_xml_content(xml_content: str) -> str:
1457
+ """Parse XML content including RSS/Atom feeds."""
1458
+ if not xml_content:
1459
+ return "❌ No XML content to parse"
1460
+
1461
+ result_parts = []
1462
+ result_parts.append("📄 XML/RSS/Atom Analysis")
1463
+
1464
+ try:
1465
+ # Try to detect if it's RSS/Atom
1466
+ if '<rss' in xml_content.lower() or '<feed' in xml_content.lower():
1467
+ result_parts.append("📡 Detected: RSS/Atom Feed")
1468
+
1469
+ # Basic XML structure analysis
1470
+ import re
1471
+
1472
+ # Find root element
1473
+ root_match = re.search(r'<([^?\s/>]+)', xml_content)
1474
+ if root_match:
1475
+ result_parts.append(f"🏷️ Root element: <{root_match.group(1)}>")
1476
+
1477
+ # Count elements (basic)
1478
+ elements = re.findall(r'<([^/\s>]+)', xml_content)
1479
+ if elements:
1480
+ from collections import Counter
1481
+ element_counts = Counter(elements[:50]) # Limit analysis
1482
+ result_parts.append(f"📊 Top elements: {dict(list(element_counts.most_common(10)))}")
1483
+
1484
+ # Show XML preview
1485
+ preview_length = 1500
1486
+ xml_preview = xml_content[:preview_length]
1487
+ if len(xml_content) > preview_length:
1488
+ xml_preview += "\n... (truncated)"
1489
+
1490
+ result_parts.append(f"📄 XML Content Preview:")
1491
+ result_parts.append(xml_preview)
1492
+ result_parts.append(f"📊 Total size: {len(xml_content):,} characters")
1493
+
1494
+ except Exception as e:
1495
+ result_parts.append(f"❌ XML parsing error: {str(e)}")
1496
+ result_parts.append(f"📄 Raw content preview (first 1000 chars):")
1497
+ result_parts.append(xml_content[:1000] + ("..." if len(xml_content) > 1000 else ""))
1498
+
1499
+ return "\n".join(result_parts)
1500
+
1501
+
1502
+ def _parse_text_content(text_content: str, content_type: str) -> str:
1503
+ """Parse plain text content."""
1504
+ if not text_content:
1505
+ return "❌ No text content to parse"
1506
+
1507
+ result_parts = []
1508
+ result_parts.append(f"📝 Text Content Analysis ({content_type})")
1509
+
1510
+ # Basic text statistics
1511
+ lines = text_content.splitlines()
1512
+ words = text_content.split()
1513
+
1514
+ result_parts.append(f"📊 Statistics:")
1515
+ result_parts.append(f" • Lines: {len(lines):,}")
1516
+ result_parts.append(f" • Words: {len(words):,}")
1517
+ result_parts.append(f" • Characters: {len(text_content):,}")
1518
+
1519
+ # Show text preview
1520
+ preview_length = 2000
1521
+ text_preview = text_content[:preview_length]
1522
+ if len(text_content) > preview_length:
1523
+ text_preview += "\n... (truncated)"
1524
+
1525
+ result_parts.append(f"📄 Content Preview:")
1526
+ result_parts.append(text_preview)
1527
+
1528
+ return "\n".join(result_parts)
1529
+
1530
+
1531
+ def _parse_image_content(image_bytes: bytes, content_type: str, include_preview: bool = False) -> str:
1532
+ """Parse image content and extract metadata."""
1533
+ result_parts = []
1534
+ result_parts.append(f"🖼️ Image Analysis ({content_type})")
1535
+
1536
+ result_parts.append(f"📊 Size: {len(image_bytes):,} bytes")
1537
+
1538
+ # Try to get image dimensions (basic approach)
1539
+ try:
1540
+ if content_type.startswith('image/jpeg') or content_type.startswith('image/jpg'):
1541
+ # Basic JPEG header parsing for dimensions
1542
+ if image_bytes.startswith(b'\xff\xd8\xff'):
1543
+ result_parts.append("✅ Valid JPEG format detected")
1544
+ elif content_type.startswith('image/png'):
1545
+ # Basic PNG header parsing
1546
+ if image_bytes.startswith(b'\x89PNG\r\n\x1a\n'):
1547
+ result_parts.append("✅ Valid PNG format detected")
1548
+ elif content_type.startswith('image/gif'):
1549
+ if image_bytes.startswith(b'GIF87a') or image_bytes.startswith(b'GIF89a'):
1550
+ result_parts.append("✅ Valid GIF format detected")
1551
+ except Exception:
1552
+ pass
1553
+
1554
+ if include_preview:
1555
+ # Provide base64 preview for small images
1556
+ if len(image_bytes) <= 1048576: # 1MB limit for preview
1557
+ b64_preview = base64.b64encode(image_bytes[:1024]).decode('ascii') # First 1KB
1558
+ result_parts.append(f"🔍 Base64 Preview (first 1KB):")
1559
+ result_parts.append(f"{b64_preview}...")
1560
+ else:
1561
+ result_parts.append("⚠️ Image too large for base64 preview")
1562
+
1563
+ result_parts.append("💡 Use image processing tools for detailed analysis")
1564
+
1565
+ return "\n".join(result_parts)
1566
+
1567
+
1568
+ def _parse_pdf_content(pdf_bytes: bytes, include_preview: bool = False) -> str:
1569
+ """Parse PDF content and extract basic metadata."""
1570
+ result_parts = []
1571
+ result_parts.append("📄 PDF Document Analysis")
1572
+
1573
+ result_parts.append(f"📊 Size: {len(pdf_bytes):,} bytes")
1574
+
1575
+ # Check PDF header
1576
+ if pdf_bytes.startswith(b'%PDF-'):
1577
+ try:
1578
+ version_line = pdf_bytes[:20].decode('ascii', errors='ignore')
1579
+ result_parts.append(f"✅ Valid PDF format: {version_line.strip()}")
1580
+ except:
1581
+ result_parts.append("✅ Valid PDF format detected")
1582
+ else:
1583
+ result_parts.append("⚠️ Invalid PDF format - missing PDF header")
1584
+
1585
+ if include_preview:
1586
+ # Show hex preview of first few bytes
1587
+ hex_preview = ' '.join(f'{b:02x}' for b in pdf_bytes[:64])
1588
+ result_parts.append(f"🔍 Hex Preview (first 64 bytes):")
1589
+ result_parts.append(hex_preview)
1590
+
1591
+ result_parts.append("💡 Use PDF processing tools for text extraction and detailed analysis")
1592
+
1593
+ return "\n".join(result_parts)
1594
+
1595
+
1596
+ def _parse_binary_content(binary_bytes: bytes, content_type: str, include_preview: bool = False) -> str:
1597
+ """Parse generic binary content."""
1598
+ result_parts = []
1599
+ result_parts.append(f"📦 Binary Content Analysis ({content_type})")
1600
+
1601
+ result_parts.append(f"📊 Size: {len(binary_bytes):,} bytes")
1602
+
1603
+ # Detect file type by magic bytes
1604
+ magic_signatures = {
1605
+ b'\x50\x4b\x03\x04': 'ZIP archive',
1606
+ b'\x50\x4b\x05\x06': 'ZIP archive (empty)',
1607
+ b'\x50\x4b\x07\x08': 'ZIP archive (spanned)',
1608
+ b'\x1f\x8b\x08': 'GZIP compressed',
1609
+ b'\x42\x5a\x68': 'BZIP2 compressed',
1610
+ b'\x37\x7a\xbc\xaf\x27\x1c': '7-Zip archive',
1611
+ b'\x52\x61\x72\x21\x1a\x07': 'RAR archive',
1612
+ b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a': 'PNG image',
1613
+ b'\xff\xd8\xff': 'JPEG image',
1614
+ b'\x47\x49\x46\x38': 'GIF image',
1615
+ b'\x25\x50\x44\x46': 'PDF document',
1616
+ b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1': 'Microsoft Office document',
1617
+ b'\x4d\x5a': 'Windows executable'
1618
+ }
1619
+
1620
+ detected_type = None
1621
+ for signature, file_type in magic_signatures.items():
1622
+ if binary_bytes.startswith(signature):
1623
+ detected_type = file_type
1624
+ break
1625
+
1626
+ if detected_type:
1627
+ result_parts.append(f"🔍 Detected format: {detected_type}")
1628
+
1629
+ if include_preview:
1630
+ # Show hex preview
1631
+ hex_preview = ' '.join(f'{b:02x}' for b in binary_bytes[:64])
1632
+ result_parts.append(f"🔍 Hex Preview (first 64 bytes):")
1633
+ result_parts.append(hex_preview)
1634
+
1635
+ # Try to show any readable ASCII strings
1636
+ try:
1637
+ ascii_preview = ''.join(chr(b) if 32 <= b <= 126 else '.' for b in binary_bytes[:200])
1638
+ if ascii_preview.strip():
1639
+ result_parts.append(f"📝 ASCII Preview (first 200 bytes):")
1640
+ result_parts.append(ascii_preview)
1641
+ except:
1642
+ pass
1643
+
1644
+ result_parts.append("💡 Use specialized tools for detailed binary analysis")
1645
+
1646
+ return "\n".join(result_parts)
998
1647
 
999
1648
 
1000
1649
  @tool(
@@ -1524,5 +2173,6 @@ __all__ = [
1524
2173
  'write_file',
1525
2174
  'edit_file',
1526
2175
  'web_search',
2176
+ 'fetch_url',
1527
2177
  'execute_command'
1528
2178
  ]
@@ -11,4 +11,4 @@ including when the package is installed from PyPI where pyproject.toml is not av
11
11
 
12
12
  # Package version - update this when releasing new versions
13
13
  # This must be manually synchronized with the version in pyproject.toml
14
- __version__ = "2.4.5"
14
+ __version__ = "2.4.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstractcore
3
- Version: 2.4.5
3
+ Version: 2.4.6
4
4
  Summary: Unified interface to all LLM providers with essential infrastructure for tool calling, streaming, and model management
5
5
  Author-email: Laurent-Philippe Albou <contact@abstractcore.ai>
6
6
  Maintainer-email: Laurent-Philippe Albou <contact@abstractcore.ai>
@@ -122,6 +122,21 @@ response = llm.generate("What is the capital of France?")
122
122
  print(response.content)
123
123
  ```
124
124
 
125
+ ### Deterministic Generation
126
+
127
+ ```python
128
+ from abstractcore import create_llm
129
+
130
+ # Deterministic outputs with seed + temperature=0
131
+ llm = create_llm("openai", model="gpt-3.5-turbo", seed=42, temperature=0.0)
132
+
133
+ # These will produce identical outputs
134
+ response1 = llm.generate("Write exactly 3 words about coding")
135
+ response2 = llm.generate("Write exactly 3 words about coding")
136
+ print(f"Response 1: {response1.content}") # "Innovative, challenging, rewarding."
137
+ print(f"Response 2: {response2.content}") # "Innovative, challenging, rewarding."
138
+ ```
139
+
125
140
  ### Tool Calling
126
141
 
127
142
  ```python
@@ -140,6 +155,39 @@ response = llm.generate(
140
155
  print(response.content)
141
156
  ```
142
157
 
158
+ ### Built-in Tools
159
+
160
+ AbstractCore includes a comprehensive set of ready-to-use tools for common tasks:
161
+
162
+ ```python
163
+ from abstractcore.tools.common_tools import fetch_url, search_files, read_file
164
+
165
+ # Intelligent web content fetching with automatic parsing
166
+ result = fetch_url("https://api.github.com/repos/python/cpython")
167
+ # Automatically detects JSON, HTML, images, PDFs, etc. and provides structured analysis
168
+
169
+ # File system operations
170
+ files = search_files("def.*fetch", ".", file_pattern="*.py") # Find function definitions
171
+ content = read_file("config.json") # Read file contents
172
+
173
+ # Use with any LLM
174
+ llm = create_llm("anthropic", model="claude-3-5-haiku-latest")
175
+ response = llm.generate(
176
+ "Analyze this API response and summarize the key information",
177
+ tools=[fetch_url]
178
+ )
179
+ ```
180
+
181
+ **Available Tools:**
182
+ - `fetch_url` - Intelligent web content fetching with automatic content type detection and parsing
183
+ - `search_files` - Search for text patterns inside files using regex
184
+ - `list_files` - Find and list files by names/paths using glob patterns
185
+ - `read_file` - Read file contents with optional line range selection
186
+ - `write_file` - Write content to files with directory creation
187
+ - `edit_file` - Edit files using pattern matching and replacement
188
+ - `web_search` - Search the web using DuckDuckGo
189
+ - `execute_command` - Execute shell commands safely with security controls
190
+
143
191
  ### Session Management
144
192
 
145
193
  ```python
@@ -228,14 +276,16 @@ response = llm.generate(
228
276
 
229
277
  ## Supported Providers
230
278
 
231
- | Provider | Status | Setup |
232
- |----------|--------|-------|
233
- | **OpenAI** | Full | [Get API key](docs/prerequisites.md#openai-setup) |
234
- | **Anthropic** | Full | [Get API key](docs/prerequisites.md#anthropic-setup) |
235
- | **Ollama** | Full | [Install guide](docs/prerequisites.md#ollama-setup) |
236
- | **LMStudio** | Full | [Install guide](docs/prerequisites.md#lmstudio-setup) |
237
- | **MLX** | Full | [Setup guide](docs/prerequisites.md#mlx-setup) |
238
- | **HuggingFace** | Full | [Setup guide](docs/prerequisites.md#huggingface-setup) |
279
+ | Provider | Status | SEED Support | Setup |
280
+ |----------|--------|-------------|-------|
281
+ | **OpenAI** | Full | ✅ Native | [Get API key](docs/prerequisites.md#openai-setup) |
282
+ | **Anthropic** | Full | ⚠️ Warning* | [Get API key](docs/prerequisites.md#anthropic-setup) |
283
+ | **Ollama** | Full | ✅ Native | [Install guide](docs/prerequisites.md#ollama-setup) |
284
+ | **LMStudio** | Full | ✅ Native | [Install guide](docs/prerequisites.md#lmstudio-setup) |
285
+ | **MLX** | Full | ✅ Native | [Setup guide](docs/prerequisites.md#mlx-setup) |
286
+ | **HuggingFace** | Full | ✅ Native | [Setup guide](docs/prerequisites.md#huggingface-setup) |
287
+
288
+ *Anthropic doesn't support seed parameters but issues a warning when provided. Use `temperature=0.0` for more consistent outputs.
239
289
 
240
290
  ## Server Mode (Optional HTTP REST API)
241
291
 
@@ -17,9 +17,9 @@ abstractcore/cli/vision_config.py,sha256=jJzO4zBexh8SqSKp6YKOXdMDSv4AL4Ztl5Xi-5c
17
17
  abstractcore/core/__init__.py,sha256=2h-86U4QkCQ4gzZ4iRusSTMlkODiUS6tKjZHiEXz6rM,684
18
18
  abstractcore/core/enums.py,sha256=BhkVnHC-X1_377JDmqd-2mnem9GdBLqixWlYzlP_FJU,695
19
19
  abstractcore/core/factory.py,sha256=UdrNwQAvifvFS3LMjF5KO87m-2n1bJBryTs9pvesYcI,2804
20
- abstractcore/core/interface.py,sha256=XTvtP1YY_2dSlSdKWkkDK54VtEeUt97zIDR1tXTtn8Q,13876
20
+ abstractcore/core/interface.py,sha256=-VAY0nlsTnWN_WghiuMC7iE7xUdZfYOg6KlgrAPi14Y,14086
21
21
  abstractcore/core/retry.py,sha256=wNlUAxfmvdO_uVWb4iqkhTqd7O1oRwXxqvVQaLXQOw0,14538
22
- abstractcore/core/session.py,sha256=iU_fU7f7E6HxwkV97QnSMJSF6rOIuJyT6sQGf547S6s,35347
22
+ abstractcore/core/session.py,sha256=cYmQv9m69ivpZXfR-a2xasbBRiP4IZt-9QDuuT6eHKw,36462
23
23
  abstractcore/core/types.py,sha256=KT9Gf9ei4t0QnWBH72fFa8vR7UZSKI-CJyQjU9ynE8g,3642
24
24
  abstractcore/embeddings/__init__.py,sha256=hR3xZyqcRm4c2pq1dIa5lxj_-Bk70Zad802JQN4joWo,637
25
25
  abstractcore/embeddings/manager.py,sha256=uFVbRPHx_R-kVMVA7N7_7EzeUmCJCeN9Dv0EV8Jko24,52245
@@ -48,14 +48,14 @@ abstractcore/processing/basic_extractor.py,sha256=3x-3BdIHgLvqLnLF6K1-P4qVaLIpAn
48
48
  abstractcore/processing/basic_judge.py,sha256=tKWJrg_tY4vCHzWgXxz0ZjgLXBYYfpMcpG7vl03hJcM,32218
49
49
  abstractcore/processing/basic_summarizer.py,sha256=XHNxMQ_8aLStTeUo6_2JaThlct12Htpz7ORmm0iuJsg,25495
50
50
  abstractcore/providers/__init__.py,sha256=t8Kp4flH5GvZEC6dx-iYJSPeSxMODa2spXb8MqtlPy4,1282
51
- abstractcore/providers/anthropic_provider.py,sha256=tcOrARLd1kA4vRkH7MCgy99YIGVaegdCd3-Z8UaKP3Q,20705
52
- abstractcore/providers/base.py,sha256=5YR64kqTYiCvWtIUBul5QfO0XRZr6_Aiho4atpDOh0o,50579
53
- abstractcore/providers/huggingface_provider.py,sha256=mJGfi1lgsvjV3Lj4q7KCQZhQqw_o23af40i5WLg150o,47789
54
- abstractcore/providers/lmstudio_provider.py,sha256=oPL_Y4gkJMAniecdWQVaDi7WozCZumSRs0lE7uFgvQk,20406
55
- abstractcore/providers/mlx_provider.py,sha256=61i5VhpNw_QlhOwPcEcryaGbI45aYyL9q15TrpinIgs,17427
51
+ abstractcore/providers/anthropic_provider.py,sha256=4DsHpfJ5iVnIB6gOL4iZGRjoR0R5kJhKdO5jG06iUmo,21287
52
+ abstractcore/providers/base.py,sha256=YfrqM3c7wLT19vspL7goUO6Bv-z1691ZkCM2wxvQX4s,51501
53
+ abstractcore/providers/huggingface_provider.py,sha256=v12JzpZ0Ra6OGD2aWcNdBMLxWytrW3gsSnzrr7F-rnA,48500
54
+ abstractcore/providers/lmstudio_provider.py,sha256=NbhJMd3RjZ9nSIfy9lVmGAnxH8eGPz5ogRsN8YQfsl0,20629
55
+ abstractcore/providers/mlx_provider.py,sha256=vbuv6lEfAURb6Dvcx7tpjV5woi5oZuZGsqwPBqiZ2EQ,18157
56
56
  abstractcore/providers/mock_provider.py,sha256=tIjA57Hlwu3vNODOZShNn0tY9HWvz1p4z-HyD_bsvbo,5741
57
- abstractcore/providers/ollama_provider.py,sha256=O77Nzx0erQw8D5TlyVaunIOjluaRwi8bgYVO95qK0L4,21129
58
- abstractcore/providers/openai_provider.py,sha256=gHurjXwwKvKQtkK5cqwokW_DUTY9_bsfNm06RPvQ39g,22683
57
+ abstractcore/providers/ollama_provider.py,sha256=7p4BcCZ0UJabjw_lHzBqjQvtoEJYOj_NI511QjjWaSc,21361
58
+ abstractcore/providers/openai_provider.py,sha256=3cm0TG2gbuoBDkoQmsliH9SBZCwL7hnKuzzDmwU3K4E,22853
59
59
  abstractcore/providers/registry.py,sha256=c0hxp9RRa-uipGotaAu48fHXc_HGlLcOxC1k763mzhU,16596
60
60
  abstractcore/providers/streaming.py,sha256=VnffBV_CU9SAKzghL154OoFyEdDsiLwUNXPahyU41Bw,31342
61
61
  abstractcore/server/__init__.py,sha256=1DSAz_YhQtnKv7sNi5TMQV8GFujctDOabgvAdilQE0o,249
@@ -64,7 +64,7 @@ abstractcore/structured/__init__.py,sha256=VXRQHGcm-iaYnLOBPin2kyhvhhQA0kaGt_pcN
64
64
  abstractcore/structured/handler.py,sha256=Vb15smiR81JGDXX2RLkY2Exuj67J7a6C-xwVrZoXp0I,17134
65
65
  abstractcore/structured/retry.py,sha256=BN_PvrWybyU1clMy2cult1-TVxFSMaVqiCPmmXvA5aI,3805
66
66
  abstractcore/tools/__init__.py,sha256=oh6vG0RdM1lqUtOp95mLrTsWLh9VmhJf5_FVjGIP5_M,2259
67
- abstractcore/tools/common_tools.py,sha256=jRVvu-TQbmXBOZHn00zEZb44nenaXVtdemhKmRmt1YY,64496
67
+ abstractcore/tools/common_tools.py,sha256=GkUSnBum3zMm9M-Zd9LlJQmlDp1XDssC7z8ItUcbloc,91692
68
68
  abstractcore/tools/core.py,sha256=lUUGihyceiRYlKUFvEMig9jWFF563d574mSDbYYD3fM,4777
69
69
  abstractcore/tools/handler.py,sha256=GmDenXAJkhceWSGlhvuF90aMb2301tRTh6WxGwBQifc,12022
70
70
  abstractcore/tools/parser.py,sha256=1r5nmEEp1Rid3JU6ct-s3lP-eCln67fvXG5HCjqiRew,27740
@@ -77,10 +77,10 @@ abstractcore/utils/message_preprocessor.py,sha256=GdHkm6tmrgjm3PwHRSCjIsq1XLkbhy
77
77
  abstractcore/utils/self_fixes.py,sha256=QEDwNTW80iQM4ftfEY3Ghz69F018oKwLM9yeRCYZOvw,5886
78
78
  abstractcore/utils/structured_logging.py,sha256=Vm-HviSa42G9DJCWmaEv4a0QG3NMsADD3ictLOs4En0,19952
79
79
  abstractcore/utils/token_utils.py,sha256=eLwFmJ68p9WMFD_MHLMmeJRW6Oqx_4hKELB8FNQ2Mnk,21097
80
- abstractcore/utils/version.py,sha256=WKRMOS_TRIxsNu-vJqfCYHA335-rya9xBBMzhx_S-Z8,605
81
- abstractcore-2.4.5.dist-info/licenses/LICENSE,sha256=PI2v_4HMvd6050uDD_4AY_8PzBnu2asa3RKbdDjowTA,1078
82
- abstractcore-2.4.5.dist-info/METADATA,sha256=9w6Q7NZXePxhRYnmloyJ6Nd2DFQ7YY4294OMF-6QYuY,27596
83
- abstractcore-2.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
84
- abstractcore-2.4.5.dist-info/entry_points.txt,sha256=UdVmchBC_Lt3H4Vlkt5js-QDAkVlBbkCu1yCsswk-KE,454
85
- abstractcore-2.4.5.dist-info/top_level.txt,sha256=DiNHBI35SIawW3N9Z-z0y6cQYNbXd32pvBkW0RLfScs,13
86
- abstractcore-2.4.5.dist-info/RECORD,,
80
+ abstractcore/utils/version.py,sha256=r8Rzbfb6YwcUi9ek9BN0m6rVNtA2oImg_hOzRFnS2CE,605
81
+ abstractcore-2.4.6.dist-info/licenses/LICENSE,sha256=PI2v_4HMvd6050uDD_4AY_8PzBnu2asa3RKbdDjowTA,1078
82
+ abstractcore-2.4.6.dist-info/METADATA,sha256=agFFlRyASBw8uDKCAmchMdvggXjPydMHTfARd4GQqcU,29749
83
+ abstractcore-2.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
84
+ abstractcore-2.4.6.dist-info/entry_points.txt,sha256=UdVmchBC_Lt3H4Vlkt5js-QDAkVlBbkCu1yCsswk-KE,454
85
+ abstractcore-2.4.6.dist-info/top_level.txt,sha256=DiNHBI35SIawW3N9Z-z0y6cQYNbXd32pvBkW0RLfScs,13
86
+ abstractcore-2.4.6.dist-info/RECORD,,