aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +1 -1
- aiecs/config/config.py +2 -1
- aiecs/llm/clients/vertex_client.py +5 -0
- aiecs/main.py +2 -2
- aiecs/scripts/tools_develop/README.md +111 -2
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
- aiecs/scripts/tools_develop/verify_tools.py +347 -0
- aiecs/tools/__init__.py +94 -30
- aiecs/tools/apisource/__init__.py +106 -0
- aiecs/tools/apisource/intelligence/__init__.py +20 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
- aiecs/tools/apisource/monitoring/__init__.py +12 -0
- aiecs/tools/apisource/monitoring/metrics.py +308 -0
- aiecs/tools/apisource/providers/__init__.py +114 -0
- aiecs/tools/apisource/providers/base.py +684 -0
- aiecs/tools/apisource/providers/census.py +412 -0
- aiecs/tools/apisource/providers/fred.py +575 -0
- aiecs/tools/apisource/providers/newsapi.py +402 -0
- aiecs/tools/apisource/providers/worldbank.py +346 -0
- aiecs/tools/apisource/reliability/__init__.py +14 -0
- aiecs/tools/apisource/reliability/error_handler.py +362 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
- aiecs/tools/apisource/tool.py +814 -0
- aiecs/tools/apisource/utils/__init__.py +12 -0
- aiecs/tools/apisource/utils/validators.py +343 -0
- aiecs/tools/langchain_adapter.py +95 -17
- aiecs/tools/search_tool/__init__.py +102 -0
- aiecs/tools/search_tool/analyzers.py +583 -0
- aiecs/tools/search_tool/cache.py +280 -0
- aiecs/tools/search_tool/constants.py +127 -0
- aiecs/tools/search_tool/context.py +219 -0
- aiecs/tools/search_tool/core.py +773 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +257 -0
- aiecs/tools/search_tool/metrics.py +375 -0
- aiecs/tools/search_tool/rate_limiter.py +177 -0
- aiecs/tools/search_tool/schemas.py +297 -0
- aiecs/tools/statistics/data_loader_tool.py +2 -2
- aiecs/tools/statistics/data_transformer_tool.py +1 -1
- aiecs/tools/task_tools/__init__.py +8 -8
- aiecs/tools/task_tools/report_tool.py +1 -1
- aiecs/tools/tool_executor/__init__.py +2 -0
- aiecs/tools/tool_executor/tool_executor.py +284 -14
- aiecs/utils/__init__.py +11 -0
- aiecs/utils/cache_provider.py +698 -0
- aiecs/utils/execution_utils.py +5 -5
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
- aiecs/tools/task_tools/search_tool.py +0 -1123
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared Validation Utilities for API Providers
|
|
3
|
+
|
|
4
|
+
Common validation functions for data quality assessment:
|
|
5
|
+
- Detect outliers in numeric data
|
|
6
|
+
- Find gaps in time series
|
|
7
|
+
- Check data completeness
|
|
8
|
+
- Validate data types and ranges
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timedelta
|
|
13
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DataValidator:
|
|
19
|
+
"""
|
|
20
|
+
Provides common data validation methods for API providers.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def detect_outliers(
|
|
25
|
+
values: List[float],
|
|
26
|
+
method: str = 'iqr',
|
|
27
|
+
threshold: float = 1.5
|
|
28
|
+
) -> List[int]:
|
|
29
|
+
"""
|
|
30
|
+
Detect outliers in numeric data.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
values: List of numeric values
|
|
34
|
+
method: Detection method ('iqr' or 'zscore')
|
|
35
|
+
threshold: Threshold for outlier detection
|
|
36
|
+
- For IQR: typically 1.5 or 3.0
|
|
37
|
+
- For Z-score: typically 2.0 or 3.0
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of indices where outliers were detected
|
|
41
|
+
"""
|
|
42
|
+
if not values or len(values) < 4:
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
outlier_indices = []
|
|
46
|
+
|
|
47
|
+
if method == 'iqr':
|
|
48
|
+
# Interquartile Range method
|
|
49
|
+
sorted_values = sorted(values)
|
|
50
|
+
n = len(sorted_values)
|
|
51
|
+
|
|
52
|
+
q1_idx = n // 4
|
|
53
|
+
q3_idx = 3 * n // 4
|
|
54
|
+
|
|
55
|
+
q1 = sorted_values[q1_idx]
|
|
56
|
+
q3 = sorted_values[q3_idx]
|
|
57
|
+
iqr = q3 - q1
|
|
58
|
+
|
|
59
|
+
lower_bound = q1 - threshold * iqr
|
|
60
|
+
upper_bound = q3 + threshold * iqr
|
|
61
|
+
|
|
62
|
+
for i, value in enumerate(values):
|
|
63
|
+
if value < lower_bound or value > upper_bound:
|
|
64
|
+
outlier_indices.append(i)
|
|
65
|
+
|
|
66
|
+
elif method == 'zscore':
|
|
67
|
+
# Z-score method
|
|
68
|
+
mean = sum(values) / len(values)
|
|
69
|
+
variance = sum((x - mean) ** 2 for x in values) / len(values)
|
|
70
|
+
std_dev = variance ** 0.5
|
|
71
|
+
|
|
72
|
+
if std_dev == 0:
|
|
73
|
+
return []
|
|
74
|
+
|
|
75
|
+
for i, value in enumerate(values):
|
|
76
|
+
z_score = abs((value - mean) / std_dev)
|
|
77
|
+
if z_score > threshold:
|
|
78
|
+
outlier_indices.append(i)
|
|
79
|
+
|
|
80
|
+
return outlier_indices
|
|
81
|
+
|
|
82
|
+
@staticmethod
|
|
83
|
+
def detect_time_gaps(
|
|
84
|
+
data: List[Dict[str, Any]],
|
|
85
|
+
date_field: str = 'date',
|
|
86
|
+
expected_frequency: Optional[str] = None
|
|
87
|
+
) -> List[Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
Detect gaps in time series data.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
data: List of data items with date fields
|
|
93
|
+
date_field: Name of the date field
|
|
94
|
+
expected_frequency: Expected frequency ('daily', 'weekly', 'monthly', 'quarterly', 'annual')
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of gap information dictionaries
|
|
98
|
+
"""
|
|
99
|
+
if len(data) < 2:
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
gaps = []
|
|
103
|
+
|
|
104
|
+
# Parse dates
|
|
105
|
+
dates = []
|
|
106
|
+
for i, item in enumerate(data):
|
|
107
|
+
if date_field in item:
|
|
108
|
+
try:
|
|
109
|
+
date_str = str(item[date_field])
|
|
110
|
+
if 'T' in date_str:
|
|
111
|
+
date_obj = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
|
|
112
|
+
else:
|
|
113
|
+
date_obj = datetime.strptime(date_str[:10], '%Y-%m-%d')
|
|
114
|
+
dates.append((i, date_obj))
|
|
115
|
+
except (ValueError, TypeError):
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
if len(dates) < 2:
|
|
119
|
+
return []
|
|
120
|
+
|
|
121
|
+
# Sort by date
|
|
122
|
+
dates.sort(key=lambda x: x[1])
|
|
123
|
+
|
|
124
|
+
# Determine expected gap if not specified
|
|
125
|
+
if expected_frequency is None:
|
|
126
|
+
# Estimate from first few intervals
|
|
127
|
+
if len(dates) >= 3:
|
|
128
|
+
intervals = [
|
|
129
|
+
(dates[i+1][1] - dates[i][1]).days
|
|
130
|
+
for i in range(min(3, len(dates) - 1))
|
|
131
|
+
]
|
|
132
|
+
avg_interval = sum(intervals) / len(intervals)
|
|
133
|
+
|
|
134
|
+
if avg_interval <= 2:
|
|
135
|
+
expected_frequency = 'daily'
|
|
136
|
+
elif avg_interval <= 10:
|
|
137
|
+
expected_frequency = 'weekly'
|
|
138
|
+
elif avg_interval <= 40:
|
|
139
|
+
expected_frequency = 'monthly'
|
|
140
|
+
elif avg_interval <= 120:
|
|
141
|
+
expected_frequency = 'quarterly'
|
|
142
|
+
else:
|
|
143
|
+
expected_frequency = 'annual'
|
|
144
|
+
|
|
145
|
+
# Define expected gaps in days
|
|
146
|
+
frequency_gaps = {
|
|
147
|
+
'daily': 1,
|
|
148
|
+
'weekly': 7,
|
|
149
|
+
'monthly': 31,
|
|
150
|
+
'quarterly': 92,
|
|
151
|
+
'annual': 365
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
expected_gap_days = frequency_gaps.get(expected_frequency, 31)
|
|
155
|
+
tolerance = expected_gap_days * 0.5 # 50% tolerance
|
|
156
|
+
|
|
157
|
+
# Check for gaps
|
|
158
|
+
for i in range(len(dates) - 1):
|
|
159
|
+
idx1, date1 = dates[i]
|
|
160
|
+
idx2, date2 = dates[i + 1]
|
|
161
|
+
|
|
162
|
+
gap_days = (date2 - date1).days
|
|
163
|
+
|
|
164
|
+
if gap_days > expected_gap_days + tolerance:
|
|
165
|
+
gaps.append({
|
|
166
|
+
'start_index': idx1,
|
|
167
|
+
'end_index': idx2,
|
|
168
|
+
'start_date': date1.isoformat(),
|
|
169
|
+
'end_date': date2.isoformat(),
|
|
170
|
+
'gap_days': gap_days,
|
|
171
|
+
'expected_days': expected_gap_days
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
return gaps
|
|
175
|
+
|
|
176
|
+
@staticmethod
|
|
177
|
+
def check_data_completeness(
|
|
178
|
+
data: List[Dict[str, Any]],
|
|
179
|
+
value_field: str = 'value',
|
|
180
|
+
missing_indicators: Optional[List[Any]] = None
|
|
181
|
+
) -> Dict[str, Any]:
|
|
182
|
+
"""
|
|
183
|
+
Check completeness of data.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
data: List of data items
|
|
187
|
+
value_field: Name of the value field to check
|
|
188
|
+
missing_indicators: Values that indicate missing data (e.g., ['.', None, 'NA'])
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Completeness statistics dictionary
|
|
192
|
+
"""
|
|
193
|
+
if missing_indicators is None:
|
|
194
|
+
missing_indicators = ['.', None, 'NA', 'N/A', '', 'null']
|
|
195
|
+
|
|
196
|
+
total_records = len(data)
|
|
197
|
+
if total_records == 0:
|
|
198
|
+
return {
|
|
199
|
+
'total_records': 0,
|
|
200
|
+
'missing_count': 0,
|
|
201
|
+
'completeness': 1.0,
|
|
202
|
+
'missing_indices': []
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
missing_count = 0
|
|
206
|
+
missing_indices = []
|
|
207
|
+
|
|
208
|
+
for i, item in enumerate(data):
|
|
209
|
+
if value_field in item:
|
|
210
|
+
value = item[value_field]
|
|
211
|
+
# Check if value is missing
|
|
212
|
+
if value in missing_indicators:
|
|
213
|
+
missing_count += 1
|
|
214
|
+
missing_indices.append(i)
|
|
215
|
+
elif isinstance(value, str) and value.strip() in missing_indicators:
|
|
216
|
+
missing_count += 1
|
|
217
|
+
missing_indices.append(i)
|
|
218
|
+
else:
|
|
219
|
+
# Field doesn't exist
|
|
220
|
+
missing_count += 1
|
|
221
|
+
missing_indices.append(i)
|
|
222
|
+
|
|
223
|
+
completeness = (total_records - missing_count) / total_records
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
'total_records': total_records,
|
|
227
|
+
'missing_count': missing_count,
|
|
228
|
+
'present_count': total_records - missing_count,
|
|
229
|
+
'completeness': round(completeness, 4),
|
|
230
|
+
'missing_indices': missing_indices[:10] # Limit to first 10
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
@staticmethod
|
|
234
|
+
def calculate_value_range(
|
|
235
|
+
data: List[Dict[str, Any]],
|
|
236
|
+
value_field: str = 'value',
|
|
237
|
+
missing_indicators: Optional[List[Any]] = None
|
|
238
|
+
) -> Optional[Dict[str, float]]:
|
|
239
|
+
"""
|
|
240
|
+
Calculate min, max, mean of numeric values.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
data: List of data items
|
|
244
|
+
value_field: Name of the value field
|
|
245
|
+
missing_indicators: Values to skip
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Dictionary with min, max, mean, or None if no valid data
|
|
249
|
+
"""
|
|
250
|
+
if missing_indicators is None:
|
|
251
|
+
missing_indicators = ['.', None, 'NA', 'N/A', '', 'null']
|
|
252
|
+
|
|
253
|
+
numeric_values = []
|
|
254
|
+
|
|
255
|
+
for item in data:
|
|
256
|
+
if value_field in item:
|
|
257
|
+
value = item[value_field]
|
|
258
|
+
|
|
259
|
+
# Skip missing indicators
|
|
260
|
+
if value in missing_indicators:
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
# Try to convert to float
|
|
264
|
+
try:
|
|
265
|
+
if isinstance(value, (int, float)):
|
|
266
|
+
numeric_values.append(float(value))
|
|
267
|
+
elif isinstance(value, str):
|
|
268
|
+
# Clean string (remove commas, etc.)
|
|
269
|
+
cleaned = value.strip().replace(',', '')
|
|
270
|
+
if cleaned and cleaned not in missing_indicators:
|
|
271
|
+
numeric_values.append(float(cleaned))
|
|
272
|
+
except (ValueError, TypeError):
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
if not numeric_values:
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
'min': min(numeric_values),
|
|
280
|
+
'max': max(numeric_values),
|
|
281
|
+
'mean': sum(numeric_values) / len(numeric_values),
|
|
282
|
+
'count': len(numeric_values)
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
@staticmethod
|
|
286
|
+
def infer_data_frequency(
|
|
287
|
+
data: List[Dict[str, Any]],
|
|
288
|
+
date_field: str = 'date'
|
|
289
|
+
) -> Optional[str]:
|
|
290
|
+
"""
|
|
291
|
+
Infer the frequency of time series data.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
data: List of data items with dates
|
|
295
|
+
date_field: Name of the date field
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Frequency string or None
|
|
299
|
+
"""
|
|
300
|
+
if len(data) < 3:
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
# Parse dates
|
|
304
|
+
dates = []
|
|
305
|
+
for item in data:
|
|
306
|
+
if date_field in item:
|
|
307
|
+
try:
|
|
308
|
+
date_str = str(item[date_field])
|
|
309
|
+
if 'T' in date_str:
|
|
310
|
+
date_obj = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
|
|
311
|
+
else:
|
|
312
|
+
date_obj = datetime.strptime(date_str[:10], '%Y-%m-%d')
|
|
313
|
+
dates.append(date_obj)
|
|
314
|
+
except (ValueError, TypeError):
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
if len(dates) < 3:
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
# Sort dates
|
|
321
|
+
dates.sort()
|
|
322
|
+
|
|
323
|
+
# Calculate intervals
|
|
324
|
+
intervals = [(dates[i+1] - dates[i]).days for i in range(len(dates) - 1)]
|
|
325
|
+
|
|
326
|
+
# Calculate median interval
|
|
327
|
+
intervals.sort()
|
|
328
|
+
median_interval = intervals[len(intervals) // 2]
|
|
329
|
+
|
|
330
|
+
# Classify frequency
|
|
331
|
+
if median_interval <= 2:
|
|
332
|
+
return 'daily'
|
|
333
|
+
elif median_interval <= 10:
|
|
334
|
+
return 'weekly'
|
|
335
|
+
elif median_interval <= 40:
|
|
336
|
+
return 'monthly'
|
|
337
|
+
elif median_interval <= 120:
|
|
338
|
+
return 'quarterly'
|
|
339
|
+
elif median_interval <= 400:
|
|
340
|
+
return 'annual'
|
|
341
|
+
else:
|
|
342
|
+
return 'irregular'
|
|
343
|
+
|
aiecs/tools/langchain_adapter.py
CHANGED
|
@@ -37,7 +37,8 @@ class LangchainToolAdapter(LangchainBaseTool):
|
|
|
37
37
|
"""
|
|
38
38
|
Langchain tool adapter for single operation
|
|
39
39
|
|
|
40
|
-
Wraps one operation method of BaseTool as an independent Langchain tool
|
|
40
|
+
Wraps one operation method of BaseTool as an independent Langchain tool.
|
|
41
|
+
Supports both tool-level operations and provider-level operations.
|
|
41
42
|
"""
|
|
42
43
|
|
|
43
44
|
# Define class attributes
|
|
@@ -46,13 +47,19 @@ class LangchainToolAdapter(LangchainBaseTool):
|
|
|
46
47
|
base_tool_name: str = ""
|
|
47
48
|
operation_name: str = ""
|
|
48
49
|
operation_schema: Optional[Type[BaseModel]] = None
|
|
50
|
+
is_provider_operation: bool = False
|
|
51
|
+
provider_name: Optional[str] = None
|
|
52
|
+
method_name: Optional[str] = None
|
|
49
53
|
|
|
50
54
|
def __init__(
|
|
51
55
|
self,
|
|
52
56
|
base_tool_name: str,
|
|
53
57
|
operation_name: str,
|
|
54
58
|
operation_schema: Optional[Type[BaseModel]] = None,
|
|
55
|
-
description: Optional[str] = None
|
|
59
|
+
description: Optional[str] = None,
|
|
60
|
+
is_provider_operation: bool = False,
|
|
61
|
+
provider_name: Optional[str] = None,
|
|
62
|
+
method_name: Optional[str] = None
|
|
56
63
|
):
|
|
57
64
|
"""
|
|
58
65
|
Initialize adapter
|
|
@@ -62,6 +69,9 @@ class LangchainToolAdapter(LangchainBaseTool):
|
|
|
62
69
|
operation_name: Operation name
|
|
63
70
|
operation_schema: Pydantic Schema for the operation
|
|
64
71
|
description: Tool description
|
|
72
|
+
is_provider_operation: Whether this is a provider-level operation
|
|
73
|
+
provider_name: Provider name (for provider operations)
|
|
74
|
+
method_name: Original method name (for provider operations)
|
|
65
75
|
"""
|
|
66
76
|
# Construct tool name and description
|
|
67
77
|
tool_name = f"{base_tool_name}_{operation_name}"
|
|
@@ -74,7 +84,10 @@ class LangchainToolAdapter(LangchainBaseTool):
|
|
|
74
84
|
base_tool_name=base_tool_name,
|
|
75
85
|
operation_name=operation_name,
|
|
76
86
|
operation_schema=operation_schema,
|
|
77
|
-
args_schema=operation_schema
|
|
87
|
+
args_schema=operation_schema,
|
|
88
|
+
is_provider_operation=is_provider_operation,
|
|
89
|
+
provider_name=provider_name,
|
|
90
|
+
method_name=method_name
|
|
78
91
|
)
|
|
79
92
|
|
|
80
93
|
def _run(
|
|
@@ -87,8 +100,18 @@ class LangchainToolAdapter(LangchainBaseTool):
|
|
|
87
100
|
# Get original tool instance
|
|
88
101
|
base_tool = get_tool(self.base_tool_name)
|
|
89
102
|
|
|
90
|
-
#
|
|
91
|
-
|
|
103
|
+
# Handle provider operations differently
|
|
104
|
+
if self.is_provider_operation:
|
|
105
|
+
# For provider operations, call the query method with provider and operation
|
|
106
|
+
result = base_tool.run(
|
|
107
|
+
'query',
|
|
108
|
+
provider=self.provider_name,
|
|
109
|
+
operation=self.method_name,
|
|
110
|
+
params=kwargs
|
|
111
|
+
)
|
|
112
|
+
else:
|
|
113
|
+
# For tool-level operations, call directly
|
|
114
|
+
result = base_tool.run(self.operation_name, **kwargs)
|
|
92
115
|
|
|
93
116
|
logger.info(f"Successfully executed {self.name} with result type: {type(result)}")
|
|
94
117
|
return result
|
|
@@ -125,7 +148,10 @@ class ToolRegistry:
|
|
|
125
148
|
|
|
126
149
|
def discover_operations(self, base_tool_class: Type[BaseTool]) -> List[Dict[str, Any]]:
|
|
127
150
|
"""
|
|
128
|
-
Discover all operation methods and Schemas of BaseTool class
|
|
151
|
+
Discover all operation methods and Schemas of BaseTool class.
|
|
152
|
+
|
|
153
|
+
Enhanced to support provider-level operations for tools like APISourceTool
|
|
154
|
+
that expose fine-grained operations from underlying providers.
|
|
129
155
|
|
|
130
156
|
Args:
|
|
131
157
|
base_tool_class: BaseTool subclass
|
|
@@ -135,6 +161,49 @@ class ToolRegistry:
|
|
|
135
161
|
"""
|
|
136
162
|
operations = []
|
|
137
163
|
|
|
164
|
+
# 1. Discover tool-level operations (existing logic)
|
|
165
|
+
tool_operations = self._discover_tool_operations(base_tool_class)
|
|
166
|
+
operations.extend(tool_operations)
|
|
167
|
+
|
|
168
|
+
# 2. Discover provider-level operations (new logic)
|
|
169
|
+
if hasattr(base_tool_class, '_discover_provider_operations'):
|
|
170
|
+
try:
|
|
171
|
+
provider_operations = base_tool_class._discover_provider_operations()
|
|
172
|
+
|
|
173
|
+
# Convert provider operations to the expected format
|
|
174
|
+
for provider_op in provider_operations:
|
|
175
|
+
operation_info = {
|
|
176
|
+
'name': provider_op['name'],
|
|
177
|
+
'method': None, # Will be handled specially in create_langchain_tools
|
|
178
|
+
'schema': provider_op['schema'],
|
|
179
|
+
'description': provider_op['description'],
|
|
180
|
+
'is_async': False,
|
|
181
|
+
'is_provider_operation': True, # Mark as provider operation
|
|
182
|
+
'provider_name': provider_op.get('provider_name'),
|
|
183
|
+
'method_name': provider_op.get('method_name')
|
|
184
|
+
}
|
|
185
|
+
operations.append(operation_info)
|
|
186
|
+
logger.debug(f"Added provider operation: {provider_op['name']}")
|
|
187
|
+
|
|
188
|
+
logger.info(f"Discovered {len(provider_operations)} provider operations for {base_tool_class.__name__}")
|
|
189
|
+
|
|
190
|
+
except Exception as e:
|
|
191
|
+
logger.warning(f"Error discovering provider operations for {base_tool_class.__name__}: {e}")
|
|
192
|
+
|
|
193
|
+
return operations
|
|
194
|
+
|
|
195
|
+
def _discover_tool_operations(self, base_tool_class: Type[BaseTool]) -> List[Dict[str, Any]]:
|
|
196
|
+
"""
|
|
197
|
+
Discover tool-level operations (original logic extracted to separate method).
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
base_tool_class: BaseTool subclass
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
List of tool-level operation information
|
|
204
|
+
"""
|
|
205
|
+
operations = []
|
|
206
|
+
|
|
138
207
|
# Get all Schema classes
|
|
139
208
|
# Build a mapping from normalized names to Schema classes
|
|
140
209
|
# Check both class-level and module-level schemas
|
|
@@ -205,7 +274,8 @@ class ToolRegistry:
|
|
|
205
274
|
'method': method,
|
|
206
275
|
'schema': matching_schema,
|
|
207
276
|
'description': inspect.getdoc(method) or f"Execute {method_name} operation",
|
|
208
|
-
'is_async': inspect.iscoroutinefunction(method)
|
|
277
|
+
'is_async': inspect.iscoroutinefunction(method),
|
|
278
|
+
'is_provider_operation': False # Mark as tool-level operation
|
|
209
279
|
}
|
|
210
280
|
|
|
211
281
|
operations.append(operation_info)
|
|
@@ -280,23 +350,31 @@ class ToolRegistry:
|
|
|
280
350
|
langchain_tools = []
|
|
281
351
|
for op_info in operations:
|
|
282
352
|
# Generate enhanced description
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
353
|
+
# For provider operations, use the description directly
|
|
354
|
+
if op_info.get('is_provider_operation', False):
|
|
355
|
+
enhanced_description = op_info['description']
|
|
356
|
+
else:
|
|
357
|
+
enhanced_description = self._extract_description(
|
|
358
|
+
op_info['method'],
|
|
359
|
+
tool_name,
|
|
360
|
+
op_info['name'],
|
|
361
|
+
op_info['schema']
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Create adapter with provider operation support
|
|
290
365
|
adapter = LangchainToolAdapter(
|
|
291
366
|
base_tool_name=tool_name,
|
|
292
367
|
operation_name=op_info['name'],
|
|
293
368
|
operation_schema=op_info['schema'],
|
|
294
|
-
description=enhanced_description
|
|
369
|
+
description=enhanced_description,
|
|
370
|
+
is_provider_operation=op_info.get('is_provider_operation', False),
|
|
371
|
+
provider_name=op_info.get('provider_name'),
|
|
372
|
+
method_name=op_info.get('method_name')
|
|
295
373
|
)
|
|
296
|
-
|
|
374
|
+
|
|
297
375
|
langchain_tools.append(adapter)
|
|
298
376
|
self._langchain_tools[adapter.name] = adapter
|
|
299
|
-
|
|
377
|
+
|
|
300
378
|
logger.info(f"Created {len(langchain_tools)} Langchain tools for {tool_name}")
|
|
301
379
|
return langchain_tools
|
|
302
380
|
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Enhanced Search Tool Package
|
|
3
|
+
|
|
4
|
+
A comprehensive, production-ready web search tool that integrates Google Custom Search API
|
|
5
|
+
with advanced features including:
|
|
6
|
+
|
|
7
|
+
- Result quality scoring and ranking
|
|
8
|
+
- Query intent analysis and optimization
|
|
9
|
+
- Result deduplication
|
|
10
|
+
- Context-aware search with history tracking
|
|
11
|
+
- Intelligent Redis caching with intent-aware TTL
|
|
12
|
+
- Comprehensive metrics and monitoring
|
|
13
|
+
- Agent-friendly error handling
|
|
14
|
+
|
|
15
|
+
Features:
|
|
16
|
+
- Multiple search types: web, image, news, video
|
|
17
|
+
- Dual authentication: API key and service account
|
|
18
|
+
- Rate limiting with token bucket algorithm
|
|
19
|
+
- Circuit breaker pattern for API resilience
|
|
20
|
+
- Intelligent caching with Redis backend
|
|
21
|
+
- Quality analysis with authority, relevance, and freshness scoring
|
|
22
|
+
- Query enhancement based on detected intent
|
|
23
|
+
- Structured result summaries
|
|
24
|
+
- Search context tracking and preference learning
|
|
25
|
+
- Enhanced metrics and health scoring
|
|
26
|
+
- Agent-optimized error messages with actionable suggestions
|
|
27
|
+
|
|
28
|
+
Usage:
|
|
29
|
+
from aiecs.tools.search_tool import SearchTool
|
|
30
|
+
|
|
31
|
+
# Create search tool instance
|
|
32
|
+
search_tool = SearchTool()
|
|
33
|
+
|
|
34
|
+
# Perform enhanced web search
|
|
35
|
+
results = search_tool.search_web(
|
|
36
|
+
query="machine learning tutorial",
|
|
37
|
+
auto_enhance=True,
|
|
38
|
+
return_summary=True
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Access results and quality analysis
|
|
42
|
+
for result in results['results']:
|
|
43
|
+
print(f"Title: {result['title']}")
|
|
44
|
+
print(f"Quality: {result['_quality_summary']['score']:.2f}")
|
|
45
|
+
print(f"Credibility: {result['_quality_summary']['level']}")
|
|
46
|
+
|
|
47
|
+
# Check metrics
|
|
48
|
+
print(search_tool.get_metrics_report())
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
from aiecs.tools import register_tool
|
|
52
|
+
from .core import SearchTool
|
|
53
|
+
from .constants import (
|
|
54
|
+
SearchType,
|
|
55
|
+
SafeSearch,
|
|
56
|
+
ImageSize,
|
|
57
|
+
ImageType,
|
|
58
|
+
ImageColorType,
|
|
59
|
+
QueryIntentType,
|
|
60
|
+
CredibilityLevel,
|
|
61
|
+
CircuitState,
|
|
62
|
+
# Exceptions
|
|
63
|
+
SearchToolError,
|
|
64
|
+
AuthenticationError,
|
|
65
|
+
QuotaExceededError,
|
|
66
|
+
RateLimitError,
|
|
67
|
+
CircuitBreakerOpenError,
|
|
68
|
+
SearchAPIError,
|
|
69
|
+
ValidationError,
|
|
70
|
+
CacheError
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Register the tool with the AIECS tool registry
|
|
74
|
+
register_tool("search")(SearchTool)
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
# Main class
|
|
78
|
+
'SearchTool',
|
|
79
|
+
|
|
80
|
+
# Enums
|
|
81
|
+
'SearchType',
|
|
82
|
+
'SafeSearch',
|
|
83
|
+
'ImageSize',
|
|
84
|
+
'ImageType',
|
|
85
|
+
'ImageColorType',
|
|
86
|
+
'QueryIntentType',
|
|
87
|
+
'CredibilityLevel',
|
|
88
|
+
'CircuitState',
|
|
89
|
+
|
|
90
|
+
# Exceptions
|
|
91
|
+
'SearchToolError',
|
|
92
|
+
'AuthenticationError',
|
|
93
|
+
'QuotaExceededError',
|
|
94
|
+
'RateLimitError',
|
|
95
|
+
'CircuitBreakerOpenError',
|
|
96
|
+
'SearchAPIError',
|
|
97
|
+
'ValidationError',
|
|
98
|
+
'CacheError',
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
__version__ = '2.0.0'
|
|
102
|
+
|