aiecs 1.2.2__py3-none-any.whl → 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (55) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/llm/clients/vertex_client.py +22 -2
  3. aiecs/main.py +2 -2
  4. aiecs/scripts/tools_develop/README.md +111 -2
  5. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  6. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  7. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  8. aiecs/tools/__init__.py +94 -30
  9. aiecs/tools/apisource/__init__.py +106 -0
  10. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  11. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  12. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  13. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  14. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  15. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  16. aiecs/tools/apisource/providers/__init__.py +114 -0
  17. aiecs/tools/apisource/providers/base.py +684 -0
  18. aiecs/tools/apisource/providers/census.py +412 -0
  19. aiecs/tools/apisource/providers/fred.py +575 -0
  20. aiecs/tools/apisource/providers/newsapi.py +402 -0
  21. aiecs/tools/apisource/providers/worldbank.py +346 -0
  22. aiecs/tools/apisource/reliability/__init__.py +14 -0
  23. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  24. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  25. aiecs/tools/apisource/tool.py +814 -0
  26. aiecs/tools/apisource/utils/__init__.py +12 -0
  27. aiecs/tools/apisource/utils/validators.py +343 -0
  28. aiecs/tools/langchain_adapter.py +95 -17
  29. aiecs/tools/search_tool/__init__.py +102 -0
  30. aiecs/tools/search_tool/analyzers.py +583 -0
  31. aiecs/tools/search_tool/cache.py +280 -0
  32. aiecs/tools/search_tool/constants.py +127 -0
  33. aiecs/tools/search_tool/context.py +219 -0
  34. aiecs/tools/search_tool/core.py +773 -0
  35. aiecs/tools/search_tool/deduplicator.py +123 -0
  36. aiecs/tools/search_tool/error_handler.py +257 -0
  37. aiecs/tools/search_tool/metrics.py +375 -0
  38. aiecs/tools/search_tool/rate_limiter.py +177 -0
  39. aiecs/tools/search_tool/schemas.py +297 -0
  40. aiecs/tools/statistics/data_loader_tool.py +2 -2
  41. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  42. aiecs/tools/task_tools/__init__.py +8 -8
  43. aiecs/tools/task_tools/report_tool.py +1 -1
  44. aiecs/tools/tool_executor/__init__.py +2 -0
  45. aiecs/tools/tool_executor/tool_executor.py +284 -14
  46. aiecs/utils/__init__.py +11 -0
  47. aiecs/utils/cache_provider.py +698 -0
  48. aiecs/utils/execution_utils.py +5 -5
  49. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/METADATA +1 -1
  50. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/RECORD +54 -22
  51. aiecs/tools/task_tools/search_tool.py +0 -1123
  52. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/WHEEL +0 -0
  53. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/entry_points.txt +0 -0
  54. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/licenses/LICENSE +0 -0
  55. {aiecs-1.2.2.dist-info → aiecs-1.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,387 @@
1
+ """
2
+ Query Intent Analysis and Enhancement
3
+
4
+ Provides intelligent query understanding and parameter auto-completion:
5
+ - Detect query intent (time_series, comparison, search, metadata)
6
+ - Extract entities (economic indicators, countries, etc.)
7
+ - Parse time ranges and geographic scope
8
+ - Suggest appropriate providers and operations
9
+ - Auto-complete missing parameters based on intent
10
+ """
11
+
12
+ import logging
13
+ import re
14
+ from datetime import datetime
15
+ from typing import Any, Dict, List, Optional, Set
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class QueryIntentAnalyzer:
21
+ """
22
+ Analyzes query intent to help route requests and optimize parameters.
23
+ """
24
+
25
+ # Intent keywords
26
+ INTENT_KEYWORDS = {
27
+ 'time_series': ['trend', 'over time', 'historical', 'series', 'change', 'growth', 'history'],
28
+ 'comparison': ['compare', 'versus', 'vs', 'difference', 'between', 'against', 'relative to'],
29
+ 'search': ['search', 'find', 'look for', 'list', 'show me', 'what are'],
30
+ 'metadata': ['info', 'information', 'about', 'describe', 'details', 'metadata'],
31
+ 'recent': ['recent', 'latest', 'current', 'now', 'today', 'this week', 'this month'],
32
+ 'forecast': ['forecast', 'predict', 'future', 'project', 'estimate']
33
+ }
34
+
35
+ # Economic indicators mapping
36
+ ECONOMIC_INDICATORS = {
37
+ 'gdp': {
38
+ 'keywords': ['gdp', 'gross domestic product', 'economic output'],
39
+ 'providers': ['fred', 'worldbank'],
40
+ 'fred_series': ['GDP', 'GDPC1'],
41
+ 'wb_indicator': 'NY.GDP.MKTP.CD'
42
+ },
43
+ 'unemployment': {
44
+ 'keywords': ['unemployment', 'jobless', 'labor force'],
45
+ 'providers': ['fred'],
46
+ 'fred_series': ['UNRATE', 'UNEMPLOY']
47
+ },
48
+ 'inflation': {
49
+ 'keywords': ['inflation', 'cpi', 'consumer price', 'price index'],
50
+ 'providers': ['fred', 'worldbank'],
51
+ 'fred_series': ['CPIAUCSL', 'CPILFESL'],
52
+ 'wb_indicator': 'FP.CPI.TOTL'
53
+ },
54
+ 'interest_rate': {
55
+ 'keywords': ['interest rate', 'fed rate', 'federal funds', 'treasury'],
56
+ 'providers': ['fred'],
57
+ 'fred_series': ['DFF', 'DGS10', 'DGS30']
58
+ },
59
+ 'population': {
60
+ 'keywords': ['population', 'demographic', 'people count'],
61
+ 'providers': ['census', 'worldbank'],
62
+ 'wb_indicator': 'SP.POP.TOTL'
63
+ },
64
+ 'trade': {
65
+ 'keywords': ['trade', 'export', 'import', 'trade balance'],
66
+ 'providers': ['fred', 'worldbank'],
67
+ 'fred_series': ['BOPGSTB'],
68
+ 'wb_indicator': 'NE.EXP.GNFS.CD'
69
+ }
70
+ }
71
+
72
+ # Country codes and names
73
+ COUNTRIES = {
74
+ 'us': ['us', 'usa', 'united states', 'america'],
75
+ 'uk': ['uk', 'united kingdom', 'britain'],
76
+ 'china': ['china', 'cn'],
77
+ 'japan': ['japan', 'jp'],
78
+ 'germany': ['germany', 'de'],
79
+ 'france': ['france', 'fr'],
80
+ 'india': ['india', 'in'],
81
+ 'canada': ['canada', 'ca']
82
+ }
83
+
84
+ def analyze_intent(self, query_text: str) -> Dict[str, Any]:
85
+ """
86
+ Analyze query intent and extract key information.
87
+
88
+ Args:
89
+ query_text: Natural language query string
90
+
91
+ Returns:
92
+ Dictionary with:
93
+ - intent_type: Primary intent (time_series, comparison, search, etc.)
94
+ - entities: Extracted entities (indicators, countries, etc.)
95
+ - time_range: Extracted time information
96
+ - geographic_scope: Geographic context
97
+ - suggested_providers: Recommended providers
98
+ - suggested_operations: Recommended operations
99
+ - confidence: Confidence score (0-1)
100
+ """
101
+ query_lower = query_text.lower()
102
+
103
+ intent_result = {
104
+ 'intent_type': 'search', # Default
105
+ 'entities': [],
106
+ 'time_range': None,
107
+ 'geographic_scope': None,
108
+ 'suggested_providers': [],
109
+ 'suggested_operations': [],
110
+ 'confidence': 0.0,
111
+ 'keywords_matched': []
112
+ }
113
+
114
+ # Detect intent type
115
+ intent_scores = {}
116
+ for intent_type, keywords in self.INTENT_KEYWORDS.items():
117
+ score = sum(1 for kw in keywords if kw in query_lower)
118
+ if score > 0:
119
+ intent_scores[intent_type] = score
120
+
121
+ if intent_scores:
122
+ # Primary intent is the one with highest score
123
+ primary_intent = max(intent_scores.items(), key=lambda x: x[1])
124
+ intent_result['intent_type'] = primary_intent[0]
125
+ intent_result['confidence'] += 0.3
126
+
127
+ # Extract economic indicators
128
+ for indicator_name, indicator_info in self.ECONOMIC_INDICATORS.items():
129
+ for keyword in indicator_info['keywords']:
130
+ if keyword in query_lower:
131
+ intent_result['entities'].append({
132
+ 'type': 'indicator',
133
+ 'name': indicator_name,
134
+ 'matched_keyword': keyword
135
+ })
136
+ intent_result['suggested_providers'].extend(
137
+ indicator_info['providers']
138
+ )
139
+ intent_result['confidence'] += 0.2
140
+ intent_result['keywords_matched'].append(keyword)
141
+ break
142
+
143
+ # Extract countries
144
+ for country_code, country_names in self.COUNTRIES.items():
145
+ for country_name in country_names:
146
+ if country_name in query_lower:
147
+ intent_result['geographic_scope'] = country_code.upper()
148
+ intent_result['confidence'] += 0.2
149
+ break
150
+
151
+ # Extract time range
152
+ time_info = self._extract_time_range(query_lower)
153
+ if time_info:
154
+ intent_result['time_range'] = time_info
155
+ intent_result['confidence'] += 0.2
156
+
157
+ # Suggest operations based on intent
158
+ intent_result['suggested_operations'] = self._suggest_operations(
159
+ intent_result['intent_type'],
160
+ intent_result['suggested_providers']
161
+ )
162
+
163
+ # Remove duplicates from providers
164
+ intent_result['suggested_providers'] = list(set(
165
+ intent_result['suggested_providers']
166
+ ))
167
+
168
+ # Cap confidence at 1.0
169
+ intent_result['confidence'] = min(1.0, intent_result['confidence'])
170
+
171
+ return intent_result
172
+
173
+ def _extract_time_range(self, query_lower: str) -> Optional[Dict[str, Any]]:
174
+ """
175
+ Extract time range information from query.
176
+
177
+ Args:
178
+ query_lower: Lowercase query string
179
+
180
+ Returns:
181
+ Dictionary with start_date, end_date, or None
182
+ """
183
+ time_range = {}
184
+
185
+ # Look for year patterns (4 digits)
186
+ year_pattern = r'\b(19|20)\d{2}\b'
187
+ years = re.findall(year_pattern, query_lower)
188
+
189
+ if len(years) >= 2:
190
+ # Found multiple years
191
+ years_int = sorted([int(y) for y in years])
192
+ time_range['start_date'] = f"{years_int[0]}-01-01"
193
+ time_range['end_date'] = f"{years_int[-1]}-12-31"
194
+ time_range['type'] = 'explicit_range'
195
+ elif len(years) == 1:
196
+ # Single year mentioned
197
+ year = int(years[0])
198
+ time_range['start_date'] = f"{year}-01-01"
199
+ time_range['end_date'] = f"{year}-12-31"
200
+ time_range['type'] = 'single_year'
201
+
202
+ # Look for relative time expressions
203
+ if 'last' in query_lower or 'past' in query_lower:
204
+ # Extract number
205
+ number_pattern = r'(last|past)\s+(\d+)\s+(year|month|day|week)'
206
+ match = re.search(number_pattern, query_lower)
207
+ if match:
208
+ quantity = int(match.group(2))
209
+ unit = match.group(3)
210
+ time_range['type'] = 'relative'
211
+ time_range['quantity'] = quantity
212
+ time_range['unit'] = unit
213
+
214
+ return time_range if time_range else None
215
+
216
+ def _suggest_operations(
217
+ self,
218
+ intent_type: str,
219
+ providers: List[str]
220
+ ) -> List[Dict[str, str]]:
221
+ """
222
+ Suggest appropriate operations based on intent and providers.
223
+
224
+ Args:
225
+ intent_type: Detected intent type
226
+ providers: List of suggested providers
227
+
228
+ Returns:
229
+ List of {provider, operation} dictionaries
230
+ """
231
+ suggestions = []
232
+
233
+ for provider in providers:
234
+ if intent_type == 'time_series':
235
+ if provider == 'fred':
236
+ suggestions.append({
237
+ 'provider': 'fred',
238
+ 'operation': 'get_series_observations'
239
+ })
240
+ elif provider == 'worldbank':
241
+ suggestions.append({
242
+ 'provider': 'worldbank',
243
+ 'operation': 'get_indicator'
244
+ })
245
+
246
+ elif intent_type == 'search':
247
+ if provider == 'fred':
248
+ suggestions.append({
249
+ 'provider': 'fred',
250
+ 'operation': 'search_series'
251
+ })
252
+ elif provider == 'worldbank':
253
+ suggestions.append({
254
+ 'provider': 'worldbank',
255
+ 'operation': 'search_indicators'
256
+ })
257
+ elif provider == 'newsapi':
258
+ suggestions.append({
259
+ 'provider': 'newsapi',
260
+ 'operation': 'search_everything'
261
+ })
262
+
263
+ elif intent_type == 'metadata':
264
+ if provider == 'fred':
265
+ suggestions.append({
266
+ 'provider': 'fred',
267
+ 'operation': 'get_series_info'
268
+ })
269
+
270
+ return suggestions
271
+
272
+
273
+ class QueryEnhancer:
274
+ """
275
+ Enhances queries by auto-completing parameters based on intent.
276
+ """
277
+
278
+ def __init__(self, intent_analyzer: Optional[QueryIntentAnalyzer] = None):
279
+ """
280
+ Initialize query enhancer.
281
+
282
+ Args:
283
+ intent_analyzer: Intent analyzer instance (creates new if not provided)
284
+ """
285
+ self.intent_analyzer = intent_analyzer or QueryIntentAnalyzer()
286
+
287
+ def auto_complete_params(
288
+ self,
289
+ provider: str,
290
+ operation: str,
291
+ params: Dict[str, Any],
292
+ query_text: Optional[str] = None
293
+ ) -> Dict[str, Any]:
294
+ """
295
+ Auto-complete missing parameters based on query intent.
296
+
297
+ Args:
298
+ provider: Provider name
299
+ operation: Operation name
300
+ params: Current parameters
301
+ query_text: Optional natural language query for intent analysis
302
+
303
+ Returns:
304
+ Enhanced parameters dictionary
305
+ """
306
+ completed_params = params.copy()
307
+
308
+ # Analyze intent if query text provided
309
+ intent = None
310
+ if query_text:
311
+ intent = self.intent_analyzer.analyze_intent(query_text)
312
+
313
+ # Add time range parameters if detected and not present
314
+ if intent and intent.get('time_range') and provider == 'fred':
315
+ time_range = intent['time_range']
316
+ if time_range.get('type') in ['explicit_range', 'single_year']:
317
+ if 'observation_start' not in params and 'start_date' in time_range:
318
+ completed_params['observation_start'] = time_range['start_date']
319
+ if 'observation_end' not in params and 'end_date' in time_range:
320
+ completed_params['observation_end'] = time_range['end_date']
321
+
322
+ # Add reasonable limits if not specified
323
+ if 'limit' not in params and 'page_size' not in params:
324
+ if intent and intent.get('intent_type') == 'time_series':
325
+ # Time series typically need more data
326
+ if provider == 'fred':
327
+ completed_params['limit'] = 1000 # FRED API max is 100000, but 1000 is reasonable default
328
+ elif provider == 'worldbank':
329
+ completed_params['per_page'] = 1000
330
+ else:
331
+ # Search results typically need fewer
332
+ if provider == 'fred':
333
+ completed_params['limit'] = 20
334
+ elif provider == 'worldbank':
335
+ completed_params['limit'] = 20
336
+ elif provider == 'newsapi':
337
+ completed_params['page_size'] = 10
338
+
339
+ # Add sort order for time series
340
+ if intent and intent.get('intent_type') == 'time_series':
341
+ if provider == 'fred' and 'sort_order' not in params:
342
+ completed_params['sort_order'] = 'desc' # Most recent first
343
+
344
+ # Add country code if detected and needed
345
+ if intent and intent.get('geographic_scope'):
346
+ if provider == 'worldbank' and 'country_code' not in params:
347
+ completed_params['country_code'] = intent['geographic_scope']
348
+
349
+ return completed_params
350
+
351
+ def enhance_query_text(
352
+ self,
353
+ query_text: str,
354
+ provider: str
355
+ ) -> str:
356
+ """
357
+ Enhance query text for better search results.
358
+
359
+ Args:
360
+ query_text: Original query text
361
+ provider: Target provider
362
+
363
+ Returns:
364
+ Enhanced query text
365
+ """
366
+ # Analyze intent
367
+ intent = self.intent_analyzer.analyze_intent(query_text)
368
+
369
+ # For searches, add indicator-specific terms
370
+ enhanced = query_text
371
+
372
+ if provider == 'fred' and intent.get('entities'):
373
+ # Add FRED series IDs if we recognize the indicator
374
+ for entity in intent['entities']:
375
+ if entity['type'] == 'indicator':
376
+ indicator_name = entity['name']
377
+ indicator_info = QueryIntentAnalyzer.ECONOMIC_INDICATORS.get(
378
+ indicator_name, {}
379
+ )
380
+ if 'fred_series' in indicator_info:
381
+ # Add common series IDs to improve search
382
+ series_ids = ' '.join(indicator_info['fred_series'])
383
+ enhanced = f"{query_text} {series_ids}"
384
+ break
385
+
386
+ return enhanced
387
+