aiecs 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (56) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -1
  3. aiecs/llm/clients/vertex_client.py +5 -0
  4. aiecs/main.py +2 -2
  5. aiecs/scripts/tools_develop/README.md +111 -2
  6. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  7. aiecs/scripts/tools_develop/validate_tool_schemas.py +80 -21
  8. aiecs/scripts/tools_develop/verify_tools.py +347 -0
  9. aiecs/tools/__init__.py +94 -30
  10. aiecs/tools/apisource/__init__.py +106 -0
  11. aiecs/tools/apisource/intelligence/__init__.py +20 -0
  12. aiecs/tools/apisource/intelligence/data_fusion.py +378 -0
  13. aiecs/tools/apisource/intelligence/query_analyzer.py +387 -0
  14. aiecs/tools/apisource/intelligence/search_enhancer.py +384 -0
  15. aiecs/tools/apisource/monitoring/__init__.py +12 -0
  16. aiecs/tools/apisource/monitoring/metrics.py +308 -0
  17. aiecs/tools/apisource/providers/__init__.py +114 -0
  18. aiecs/tools/apisource/providers/base.py +684 -0
  19. aiecs/tools/apisource/providers/census.py +412 -0
  20. aiecs/tools/apisource/providers/fred.py +575 -0
  21. aiecs/tools/apisource/providers/newsapi.py +402 -0
  22. aiecs/tools/apisource/providers/worldbank.py +346 -0
  23. aiecs/tools/apisource/reliability/__init__.py +14 -0
  24. aiecs/tools/apisource/reliability/error_handler.py +362 -0
  25. aiecs/tools/apisource/reliability/fallback_strategy.py +420 -0
  26. aiecs/tools/apisource/tool.py +814 -0
  27. aiecs/tools/apisource/utils/__init__.py +12 -0
  28. aiecs/tools/apisource/utils/validators.py +343 -0
  29. aiecs/tools/langchain_adapter.py +95 -17
  30. aiecs/tools/search_tool/__init__.py +102 -0
  31. aiecs/tools/search_tool/analyzers.py +583 -0
  32. aiecs/tools/search_tool/cache.py +280 -0
  33. aiecs/tools/search_tool/constants.py +127 -0
  34. aiecs/tools/search_tool/context.py +219 -0
  35. aiecs/tools/search_tool/core.py +773 -0
  36. aiecs/tools/search_tool/deduplicator.py +123 -0
  37. aiecs/tools/search_tool/error_handler.py +257 -0
  38. aiecs/tools/search_tool/metrics.py +375 -0
  39. aiecs/tools/search_tool/rate_limiter.py +177 -0
  40. aiecs/tools/search_tool/schemas.py +297 -0
  41. aiecs/tools/statistics/data_loader_tool.py +2 -2
  42. aiecs/tools/statistics/data_transformer_tool.py +1 -1
  43. aiecs/tools/task_tools/__init__.py +8 -8
  44. aiecs/tools/task_tools/report_tool.py +1 -1
  45. aiecs/tools/tool_executor/__init__.py +2 -0
  46. aiecs/tools/tool_executor/tool_executor.py +284 -14
  47. aiecs/utils/__init__.py +11 -0
  48. aiecs/utils/cache_provider.py +698 -0
  49. aiecs/utils/execution_utils.py +5 -5
  50. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/METADATA +1 -1
  51. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/RECORD +55 -23
  52. aiecs/tools/task_tools/search_tool.py +0 -1123
  53. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/WHEEL +0 -0
  54. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/entry_points.txt +0 -0
  55. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/licenses/LICENSE +0 -0
  56. {aiecs-1.2.1.dist-info → aiecs-1.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,814 @@
1
+ """
2
+ API Source Tool
3
+
4
+ Unified interface for querying various external real-time API data sources including
5
+ economic indicators, news, public databases, and custom APIs with plugin architecture.
6
+
7
+ Enhanced Features:
8
+ - Auto-discovery of API providers
9
+ - Unified query interface with intelligent parameter enhancement
10
+ - Intelligent caching with TTL strategies
11
+ - Cross-provider data fusion
12
+ - Automatic fallback to alternative providers
13
+ - Advanced search with relevance scoring
14
+ - Comprehensive error handling with recovery suggestions
15
+ - Detailed metrics and health monitoring
16
+ """
17
+
18
+ import logging
19
+ from datetime import datetime, timedelta
20
+ from typing import Any, Dict, List, Optional
21
+
22
+ from pydantic import BaseModel, Field, ConfigDict
23
+
24
+ from aiecs.tools import register_tool
25
+ from aiecs.tools.base_tool import BaseTool
26
+ from aiecs.tools.tool_executor import cache_result_with_strategy, cache_result, measure_execution_time
27
+ from aiecs.tools.apisource.providers import get_provider, list_providers, PROVIDER_REGISTRY
28
+ from aiecs.tools.apisource.intelligence import (
29
+ QueryIntentAnalyzer,
30
+ QueryEnhancer,
31
+ DataFusionEngine,
32
+ SearchEnhancer
33
+ )
34
+ from aiecs.tools.apisource.reliability import FallbackStrategy
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ # Custom exceptions
40
+ class APISourceError(Exception):
41
+ """Base exception for API Source Tool errors"""
42
+ pass
43
+
44
+
45
+ class ProviderNotFoundError(APISourceError):
46
+ """Raised when requested provider is not found"""
47
+ pass
48
+
49
+
50
+ class APIRateLimitError(APISourceError):
51
+ """Raised when API rate limit is exceeded"""
52
+ pass
53
+
54
+
55
+ class APIAuthenticationError(APISourceError):
56
+ """Raised when API authentication fails"""
57
+ pass
58
+
59
+
60
+ @register_tool("apisource")
61
+ class APISourceTool(BaseTool):
62
+ """
63
+ Query external real-time API data sources including economic indicators, news, public databases, and custom APIs.
64
+
65
+ Supports multiple data providers through a plugin architecture:
66
+ - FRED: Federal Reserve Economic Data (US economic indicators)
67
+ - World Bank: Global development indicators
68
+ - News API: News articles and headlines
69
+ - Census: US Census Bureau demographic and economic data
70
+
71
+ Provides unified interface with automatic rate limiting, caching, and error handling.
72
+ """
73
+
74
+ # Configuration schema
75
+ class Config(BaseModel):
76
+ """Configuration for the API Source Tool"""
77
+ model_config = ConfigDict(env_prefix="APISOURCE_TOOL_")
78
+
79
+ cache_ttl: int = Field(
80
+ default=300,
81
+ description="Cache time-to-live in seconds for API responses"
82
+ )
83
+ default_timeout: int = Field(
84
+ default=30,
85
+ description="Default timeout in seconds for API requests"
86
+ )
87
+ max_retries: int = Field(
88
+ default=3,
89
+ description="Maximum number of retries for failed requests"
90
+ )
91
+ enable_rate_limiting: bool = Field(
92
+ default=True,
93
+ description="Whether to enable rate limiting for API requests"
94
+ )
95
+ enable_fallback: bool = Field(
96
+ default=True,
97
+ description="Enable automatic fallback to alternative providers"
98
+ )
99
+ enable_data_fusion: bool = Field(
100
+ default=True,
101
+ description="Enable cross-provider data fusion in search"
102
+ )
103
+ enable_query_enhancement: bool = Field(
104
+ default=True,
105
+ description="Enable intelligent query parameter enhancement"
106
+ )
107
+ fred_api_key: Optional[str] = Field(
108
+ default=None,
109
+ description="API key for Federal Reserve Economic Data (FRED)"
110
+ )
111
+ newsapi_api_key: Optional[str] = Field(
112
+ default=None,
113
+ description="API key for News API"
114
+ )
115
+ census_api_key: Optional[str] = Field(
116
+ default=None,
117
+ description="API key for US Census Bureau"
118
+ )
119
+
120
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
121
+ """
122
+ Initialize API Source Tool with enhanced intelligence features.
123
+
124
+ Args:
125
+ config: Configuration dictionary with API keys and settings
126
+ """
127
+ super().__init__(config)
128
+
129
+ # Parse configuration
130
+ self.config = self.Config(**(config or {}))
131
+
132
+ self.logger = logging.getLogger(__name__)
133
+ if not self.logger.handlers:
134
+ handler = logging.StreamHandler()
135
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
136
+ self.logger.addHandler(handler)
137
+
138
+ # Initialize intelligence components
139
+ self.query_analyzer = QueryIntentAnalyzer()
140
+ self.query_enhancer = QueryEnhancer(self.query_analyzer)
141
+ self.data_fusion = DataFusionEngine()
142
+ self.fallback_strategy = FallbackStrategy()
143
+ self.search_enhancer = SearchEnhancer(
144
+ relevance_weight=0.5,
145
+ popularity_weight=0.3,
146
+ recency_weight=0.2
147
+ )
148
+
149
+ # Load providers (they auto-discover on import)
150
+ self._providers = {}
151
+ self._load_providers()
152
+
153
+ def _load_providers(self):
154
+ """Load and cache provider instances"""
155
+ for provider_name in PROVIDER_REGISTRY.keys():
156
+ try:
157
+ # Create provider config from tool config
158
+ provider_config = {
159
+ 'timeout': self.config.default_timeout,
160
+ }
161
+
162
+ # Add provider-specific API key if available
163
+ api_key_attr = f'{provider_name}_api_key'
164
+ if hasattr(self.config, api_key_attr):
165
+ api_key = getattr(self.config, api_key_attr)
166
+ if api_key:
167
+ provider_config['api_key'] = api_key
168
+
169
+ provider = get_provider(provider_name, provider_config)
170
+ self._providers[provider_name] = provider
171
+ self.logger.debug(f"Loaded provider: {provider_name}")
172
+ except Exception as e:
173
+ self.logger.warning(f"Failed to load provider {provider_name}: {e}")
174
+
175
+ @classmethod
176
+ def _discover_provider_operations(cls) -> List[Dict[str, Any]]:
177
+ """
178
+ Discover all exposed operations from all registered providers.
179
+
180
+ This method enables the LangChain adapter to automatically create individual
181
+ tools for each provider operation, giving AI agents fine-grained visibility
182
+ into provider capabilities.
183
+
184
+ Returns:
185
+ List of operation dictionaries, each containing:
186
+ - name: Full operation name (e.g., 'fred_get_series_observations')
187
+ - schema: Pydantic schema for the operation
188
+ - description: Operation description
189
+ - method: Callable method to execute the operation
190
+ """
191
+ operations = []
192
+
193
+ for provider_name, provider_class in PROVIDER_REGISTRY.items():
194
+ try:
195
+ # Get exposed operations from provider
196
+ exposed_ops = provider_class.get_exposed_operations()
197
+
198
+ for op in exposed_ops:
199
+ # Convert Dict-based schema to Pydantic schema
200
+ pydantic_schema = cls._convert_dict_schema_to_pydantic(
201
+ op['schema'],
202
+ f"{provider_name}_{op['name']}"
203
+ ) if op['schema'] else None
204
+
205
+ # Create operation info
206
+ operation_info = {
207
+ 'name': f"{provider_name}_{op['name']}",
208
+ 'schema': pydantic_schema,
209
+ 'description': op['description'],
210
+ 'method_name': op['name'], # Store original operation name
211
+ 'provider_name': provider_name # Store provider name
212
+ }
213
+
214
+ operations.append(operation_info)
215
+ logger.debug(f"Discovered provider operation: {operation_info['name']}")
216
+
217
+ except Exception as e:
218
+ logger.warning(f"Error discovering operations for provider {provider_name}: {e}")
219
+
220
+ logger.info(f"Discovered {len(operations)} provider operations across {len(PROVIDER_REGISTRY)} providers")
221
+ return operations
222
+
223
+ @staticmethod
224
+ def _convert_dict_schema_to_pydantic(
225
+ dict_schema: Optional[Dict[str, Any]],
226
+ schema_name: str
227
+ ) -> Optional[type[BaseModel]]:
228
+ """
229
+ Convert Dict-based provider schema to Pydantic BaseModel schema.
230
+
231
+ This enables provider operation schemas to be used by the LangChain adapter
232
+ and exposed to AI agents with full type information.
233
+
234
+ Args:
235
+ dict_schema: Dictionary schema from provider.get_operation_schema()
236
+ schema_name: Name for the generated Pydantic schema class
237
+
238
+ Returns:
239
+ Pydantic BaseModel class or None if schema is invalid
240
+ """
241
+ if not dict_schema or 'parameters' not in dict_schema:
242
+ return None
243
+
244
+ try:
245
+ from pydantic import create_model
246
+
247
+ fields = {}
248
+ parameters = dict_schema.get('parameters', {})
249
+
250
+ for param_name, param_info in parameters.items():
251
+ # Determine field type from schema
252
+ param_type_str = param_info.get('type', 'string')
253
+
254
+ # Map schema types to Python types
255
+ type_mapping = {
256
+ 'string': str,
257
+ 'integer': int,
258
+ 'number': float,
259
+ 'boolean': bool,
260
+ 'array': List[Any],
261
+ 'object': Dict[str, Any]
262
+ }
263
+
264
+ field_type = type_mapping.get(param_type_str, str)
265
+
266
+ # Make optional if not required
267
+ is_required = param_info.get('required', False)
268
+ if not is_required:
269
+ field_type = Optional[field_type]
270
+
271
+ # Build field description
272
+ description_parts = [param_info.get('description', '')]
273
+
274
+ # Add examples if available
275
+ if 'examples' in param_info and param_info['examples']:
276
+ examples_str = ', '.join(str(ex) for ex in param_info['examples'][:3])
277
+ description_parts.append(f"Examples: {examples_str}")
278
+
279
+ # Add validation info if available
280
+ if 'validation' in param_info:
281
+ validation = param_info['validation']
282
+ if 'pattern' in validation:
283
+ description_parts.append(f"Pattern: {validation['pattern']}")
284
+ if 'min' in validation or 'max' in validation:
285
+ range_str = f"Range: {validation.get('min', 'any')}-{validation.get('max', 'any')}"
286
+ description_parts.append(range_str)
287
+
288
+ full_description = '. '.join(filter(None, description_parts))
289
+
290
+ # Create field with default value if not required
291
+ if is_required:
292
+ fields[param_name] = (field_type, Field(description=full_description))
293
+ else:
294
+ fields[param_name] = (field_type, Field(default=None, description=full_description))
295
+
296
+ # Create the Pydantic model
297
+ schema_class = create_model(
298
+ f"{schema_name.replace('_', '').title()}Schema",
299
+ __doc__=dict_schema.get('description', ''),
300
+ **fields
301
+ )
302
+
303
+ logger.debug(f"Created Pydantic schema: {schema_class.__name__} with {len(fields)} fields")
304
+ return schema_class
305
+
306
+ except Exception as e:
307
+ logger.error(f"Error converting schema {schema_name}: {e}")
308
+ return None
309
+
310
+ def _create_query_ttl_strategy(self):
311
+ """
312
+ Create intelligent TTL strategy for API query results.
313
+
314
+ This strategy calculates TTL based on:
315
+ 1. Data type (historical vs real-time)
316
+ 2. Provider characteristics
317
+ 3. Operation type
318
+ 4. Data quality and freshness
319
+
320
+ Returns:
321
+ Callable: TTL strategy function compatible with cache_result_with_strategy
322
+ """
323
+ def calculate_query_ttl(result: Any, args: tuple, kwargs: dict) -> int:
324
+ """
325
+ Calculate intelligent TTL for API query results.
326
+
327
+ Args:
328
+ result: The query result dictionary
329
+ args: Positional arguments (not used)
330
+ kwargs: Keyword arguments containing provider, operation, params
331
+
332
+ Returns:
333
+ int: TTL in seconds
334
+ """
335
+ provider = kwargs.get('provider', '')
336
+ operation = kwargs.get('operation', '')
337
+
338
+ # Default TTL
339
+ default_ttl = 600 # 10 minutes
340
+
341
+ # Extract metadata if available
342
+ metadata = result.get('metadata', {}) if isinstance(result, dict) else {}
343
+ quality = metadata.get('quality', {})
344
+ freshness_hours = quality.get('freshness_hours', 24)
345
+
346
+ # Historical time series data - cache longer
347
+ if operation in ['get_series_observations', 'get_indicator', 'get_series']:
348
+ # Check if data is historical (older than 24 hours)
349
+ if freshness_hours > 24:
350
+ # Historical data: cache for 7 days
351
+ ttl = 86400 * 7
352
+ self.logger.debug(f"Historical data detected, TTL: {ttl}s (7 days)")
353
+ return ttl
354
+ else:
355
+ # Recent data: cache for 1 hour
356
+ ttl = 3600
357
+ self.logger.debug(f"Recent time series data, TTL: {ttl}s (1 hour)")
358
+ return ttl
359
+
360
+ # News data - cache very short time
361
+ elif operation in ['get_top_headlines', 'search_everything', 'get_everything']:
362
+ ttl = 300 # 5 minutes
363
+ self.logger.debug(f"News data, TTL: {ttl}s (5 minutes)")
364
+ return ttl
365
+
366
+ # Metadata operations - cache longer
367
+ elif operation in ['list_countries', 'list_indicators', 'get_sources',
368
+ 'get_categories', 'get_releases', 'list_sources']:
369
+ ttl = 86400 # 1 day
370
+ self.logger.debug(f"Metadata operation, TTL: {ttl}s (1 day)")
371
+ return ttl
372
+
373
+ # Search operations - moderate cache time
374
+ elif operation in ['search_series', 'search_indicators', 'search']:
375
+ ttl = 600 # 10 minutes
376
+ self.logger.debug(f"Search operation, TTL: {ttl}s (10 minutes)")
377
+ return ttl
378
+
379
+ # Info operations - cache longer
380
+ elif operation in ['get_series_info', 'get_indicator_info']:
381
+ ttl = 3600 # 1 hour
382
+ self.logger.debug(f"Info operation, TTL: {ttl}s (1 hour)")
383
+ return ttl
384
+
385
+ # Default
386
+ self.logger.debug(f"Default TTL: {default_ttl}s (10 minutes)")
387
+ return default_ttl
388
+
389
+ return calculate_query_ttl
390
+
391
+ def _create_search_ttl_strategy(self):
392
+ """
393
+ Create intelligent TTL strategy for multi-provider search results.
394
+
395
+ This strategy calculates TTL based on:
396
+ 1. Query intent type
397
+ 2. Number of providers queried
398
+ 3. Whether data fusion was applied
399
+
400
+ Returns:
401
+ Callable: TTL strategy function compatible with cache_result_with_strategy
402
+ """
403
+ def calculate_search_ttl(result: Any, args: tuple, kwargs: dict) -> int:
404
+ """
405
+ Calculate intelligent TTL for search results.
406
+
407
+ Args:
408
+ result: The search result dictionary
409
+ args: Positional arguments (not used)
410
+ kwargs: Keyword arguments containing query, providers, etc.
411
+
412
+ Returns:
413
+ int: TTL in seconds
414
+ """
415
+ # Default TTL for search results
416
+ default_ttl = 300 # 5 minutes
417
+
418
+ if not isinstance(result, dict):
419
+ return default_ttl
420
+
421
+ # Extract metadata
422
+ metadata = result.get('metadata', {})
423
+ intent_analysis = metadata.get('intent_analysis', {})
424
+ intent_type = intent_analysis.get('intent_type', 'general')
425
+
426
+ # Adjust TTL based on intent type
427
+ if intent_type in ['metadata', 'definition']:
428
+ # Metadata and definitions change rarely
429
+ ttl = 3600 # 1 hour
430
+ self.logger.debug(f"Search intent: {intent_type}, TTL: {ttl}s (1 hour)")
431
+ return ttl
432
+
433
+ elif intent_type in ['time_series', 'comparison']:
434
+ # Time series and comparisons - moderate cache
435
+ ttl = 600 # 10 minutes
436
+ self.logger.debug(f"Search intent: {intent_type}, TTL: {ttl}s (10 minutes)")
437
+ return ttl
438
+
439
+ elif intent_type == 'search':
440
+ # General search - short cache
441
+ ttl = 300 # 5 minutes
442
+ self.logger.debug(f"Search intent: {intent_type}, TTL: {ttl}s (5 minutes)")
443
+ return ttl
444
+
445
+ # Default
446
+ self.logger.debug(f"Search default TTL: {default_ttl}s (5 minutes)")
447
+ return default_ttl
448
+
449
+ return calculate_search_ttl
450
+
451
+ # Schema definitions
452
+ class QuerySchema(BaseModel):
453
+ """Schema for query operation"""
454
+ provider: str = Field(description="API provider name (e.g., 'fred', 'worldbank', 'newsapi', 'census')")
455
+ operation: str = Field(description="Provider-specific operation to perform (e.g., 'get_series', 'search_indicators')")
456
+ params: Dict[str, Any] = Field(description="Operation-specific parameters as key-value pairs")
457
+
458
+ class ListProvidersSchema(BaseModel):
459
+ """Schema for list_providers operation (no parameters required)"""
460
+ pass
461
+
462
+ class GetProviderInfoSchema(BaseModel):
463
+ """Schema for get_provider_info operation"""
464
+ provider: str = Field(description="API provider name to get information about")
465
+
466
+ class SearchSchema(BaseModel):
467
+ """Schema for search operation"""
468
+ query: str = Field(description="Search query text to find across providers")
469
+ providers: Optional[List[str]] = Field(
470
+ default=None,
471
+ description="List of provider names to search (searches all if not specified)"
472
+ )
473
+ limit: int = Field(
474
+ default=10,
475
+ description="Maximum number of results to return per provider"
476
+ )
477
+
478
+ @cache_result_with_strategy(ttl_strategy=lambda self, result, args, kwargs:
479
+ self._create_query_ttl_strategy()(result, args, kwargs))
480
+ @measure_execution_time
481
+ def query(self, provider: str, operation: str, params: Dict[str, Any],
482
+ query_text: Optional[str] = None, enable_fallback: Optional[bool] = None) -> Dict[str, Any]:
483
+ """
484
+ Query a specific API provider with intelligent parameter enhancement and automatic fallback.
485
+
486
+ Args:
487
+ provider: API provider name (e.g., 'fred', 'worldbank', 'newsapi', 'census')
488
+ operation: Provider-specific operation (e.g., 'get_series', 'search_indicators')
489
+ params: Operation-specific parameters as dictionary
490
+ query_text: Optional natural language query for intelligent parameter enhancement
491
+ enable_fallback: Override config setting for fallback (defaults to config value)
492
+
493
+ Returns:
494
+ Dictionary containing response data with enhanced metadata
495
+
496
+ Raises:
497
+ ProviderNotFoundError: If the specified provider is not available
498
+ ValueError: If operation or parameters are invalid
499
+ APISourceError: If the API request fails after all retries and fallbacks
500
+ """
501
+ if provider not in self._providers:
502
+ available = ', '.join(self._providers.keys())
503
+ raise ProviderNotFoundError(
504
+ f"Provider '{provider}' not found. Available providers: {available}"
505
+ )
506
+
507
+ # Apply query enhancement if enabled
508
+ enhanced_params = params
509
+ if self.config.enable_query_enhancement and query_text:
510
+ try:
511
+ enhanced_params = self.query_enhancer.auto_complete_params(
512
+ provider, operation, params, query_text
513
+ )
514
+ if enhanced_params != params:
515
+ self.logger.debug(
516
+ f"Enhanced parameters from {params} to {enhanced_params}"
517
+ )
518
+ except Exception as e:
519
+ self.logger.warning(f"Parameter enhancement failed: {e}")
520
+ enhanced_params = params
521
+
522
+ # Determine if fallback should be used
523
+ use_fallback = (
524
+ enable_fallback if enable_fallback is not None
525
+ else self.config.enable_fallback
526
+ )
527
+
528
+ if use_fallback:
529
+ # Use fallback strategy
530
+ def provider_executor(prov: str, op: str, par: Dict[str, Any]) -> Dict[str, Any]:
531
+ """Execute provider operation"""
532
+ return self._providers[prov].execute(op, par)
533
+
534
+ result = self.fallback_strategy.execute_with_fallback(
535
+ primary_provider=provider,
536
+ operation=operation,
537
+ params=enhanced_params,
538
+ provider_executor=provider_executor,
539
+ providers_available=list(self._providers.keys())
540
+ )
541
+
542
+ if result['success']:
543
+ return result['data']
544
+ else:
545
+ # Build comprehensive error message
546
+ error_msg = f"Failed to execute {provider}.{operation}"
547
+ if result['attempts']:
548
+ error_msg += f" after {len(result['attempts'])} attempts"
549
+ if result.get('fallback_used'):
550
+ error_msg += " (including fallback providers)"
551
+
552
+ raise APISourceError(error_msg)
553
+ else:
554
+ # Direct execution without fallback
555
+ try:
556
+ provider_instance = self._providers[provider]
557
+ result = provider_instance.execute(operation, enhanced_params)
558
+ return result
559
+ except Exception as e:
560
+ self.logger.error(f"Error querying {provider}.{operation}: {e}")
561
+ raise APISourceError(f"Failed to query {provider}: {str(e)}")
562
+
563
+ @cache_result(ttl=3600) # Cache provider list for 1 hour
564
+ @measure_execution_time
565
+ def list_providers(self) -> List[Dict[str, Any]]:
566
+ """
567
+ List all available API providers with their metadata.
568
+
569
+ Returns:
570
+ List of provider metadata dictionaries containing name, description, supported operations, and statistics
571
+ """
572
+ return list_providers()
573
+
574
+ @cache_result(ttl=1800) # Cache provider info for 30 minutes
575
+ @measure_execution_time
576
+ def get_provider_info(self, provider: str) -> Dict[str, Any]:
577
+ """
578
+ Get detailed information about a specific API provider.
579
+
580
+ Args:
581
+ provider: API provider name to get information about
582
+
583
+ Returns:
584
+ Dictionary with provider metadata including name, description, operations, and configuration
585
+
586
+ Raises:
587
+ ProviderNotFoundError: If the specified provider is not found
588
+ """
589
+ if provider not in self._providers:
590
+ available = ', '.join(self._providers.keys())
591
+ raise ProviderNotFoundError(
592
+ f"Provider '{provider}' not found. Available providers: {available}"
593
+ )
594
+
595
+ provider_instance = self._providers[provider]
596
+ return provider_instance.get_metadata()
597
+
598
+ @cache_result_with_strategy(ttl_strategy=lambda self, result, args, kwargs:
599
+ self._create_search_ttl_strategy()(result, args, kwargs))
600
+ @measure_execution_time
601
+ def search(
602
+ self,
603
+ query: str,
604
+ providers: Optional[List[str]] = None,
605
+ limit: int = 10,
606
+ enable_fusion: Optional[bool] = None,
607
+ enable_enhancement: Optional[bool] = None,
608
+ fusion_strategy: str = 'best_quality',
609
+ search_options: Optional[Dict[str, Any]] = None
610
+ ) -> Dict[str, Any]:
611
+ """
612
+ Search across multiple API providers with intelligent fusion and enhancement.
613
+
614
+ Args:
615
+ query: Search query text to find relevant data
616
+ providers: List of provider names to search (searches all if not specified)
617
+ limit: Maximum number of results to return per provider
618
+ enable_fusion: Override config for data fusion (defaults to config value)
619
+ enable_enhancement: Override config for search enhancement (defaults to config value)
620
+ fusion_strategy: Strategy for data fusion ('best_quality', 'merge_all', 'consensus')
621
+ search_options: Options for search enhancement:
622
+ - relevance_threshold: Minimum relevance score (0-1)
623
+ - sort_by: Sort method ('relevance', 'popularity', 'recency', 'composite')
624
+ - max_results: Maximum results after enhancement
625
+
626
+ Returns:
627
+ Dictionary with:
628
+ - results: Enhanced and potentially fused search results
629
+ - metadata: Search metadata including fusion info and query analysis
630
+ - providers_queried: List of providers that were queried
631
+ """
632
+ if providers is None:
633
+ providers = list(self._providers.keys())
634
+
635
+ # Analyze query intent
636
+ intent_analysis = self.query_analyzer.analyze_intent(query)
637
+ self.logger.info(
638
+ f"Query intent: {intent_analysis['intent_type']} "
639
+ f"(confidence: {intent_analysis['confidence']:.2f})"
640
+ )
641
+
642
+ # Get provider suggestions from intent analysis
643
+ if intent_analysis.get('suggested_providers'):
644
+ suggested = [
645
+ p for p in intent_analysis['suggested_providers']
646
+ if p in self._providers
647
+ ]
648
+ if suggested:
649
+ providers = suggested
650
+ self.logger.debug(f"Using suggested providers: {providers}")
651
+
652
+ results = []
653
+ providers_queried = []
654
+
655
+ for provider_name in providers:
656
+ if provider_name not in self._providers:
657
+ self.logger.warning(f"Skipping unknown provider: {provider_name}")
658
+ continue
659
+
660
+ try:
661
+ provider_instance = self._providers[provider_name]
662
+
663
+ # Enhance query for provider if enabled
664
+ enhanced_query = query
665
+ if self.config.enable_query_enhancement:
666
+ enhanced_query = self.query_enhancer.enhance_query_text(
667
+ query, provider_name
668
+ )
669
+
670
+ # Try provider-specific search operations
671
+ if provider_name == 'fred':
672
+ result = provider_instance.execute(
673
+ 'search_series',
674
+ {'search_text': enhanced_query, 'limit': limit}
675
+ )
676
+ elif provider_name == 'worldbank':
677
+ result = provider_instance.execute(
678
+ 'search_indicators',
679
+ {'search_text': enhanced_query, 'limit': limit}
680
+ )
681
+ elif provider_name == 'newsapi':
682
+ result = provider_instance.execute(
683
+ 'search_everything',
684
+ {'q': enhanced_query, 'page_size': limit}
685
+ )
686
+ else:
687
+ # Skip providers without search capability
688
+ continue
689
+
690
+ results.append(result)
691
+ providers_queried.append(provider_name)
692
+
693
+ except Exception as e:
694
+ self.logger.warning(f"Search failed for provider {provider_name}: {e}")
695
+ # Continue with other providers
696
+
697
+ if not results:
698
+ return {
699
+ 'results': [],
700
+ 'metadata': {
701
+ 'query': query,
702
+ 'intent_analysis': intent_analysis,
703
+ 'providers_queried': providers_queried,
704
+ 'total_results': 0
705
+ },
706
+ 'providers_queried': providers_queried
707
+ }
708
+
709
+ # Apply data fusion if enabled
710
+ use_fusion = (
711
+ enable_fusion if enable_fusion is not None
712
+ else self.config.enable_data_fusion
713
+ )
714
+
715
+ if use_fusion and len(results) > 1:
716
+ fused_result = self.data_fusion.fuse_multi_provider_results(
717
+ results, fusion_strategy
718
+ )
719
+ final_data = fused_result.get('data', []) if fused_result else []
720
+ else:
721
+ # Use single result or merge without fusion logic
722
+ if len(results) == 1:
723
+ final_data = results[0].get('data', [])
724
+ else:
725
+ # Simple merge
726
+ final_data = []
727
+ for result in results:
728
+ data = result.get('data', [])
729
+ if isinstance(data, list):
730
+ final_data.extend(data)
731
+
732
+ # Apply search enhancement if enabled
733
+ use_enhancement = (
734
+ enable_enhancement if enable_enhancement is not None
735
+ else True # Always enhance search results
736
+ )
737
+
738
+ if use_enhancement and isinstance(final_data, list):
739
+ search_opts = search_options or {}
740
+ enhanced_results = self.search_enhancer.enhance_search_results(
741
+ query, final_data, search_opts
742
+ )
743
+ final_data = enhanced_results
744
+
745
+ # Build response
746
+ return {
747
+ 'results': final_data,
748
+ 'metadata': {
749
+ 'query': query,
750
+ 'intent_analysis': intent_analysis,
751
+ 'providers_queried': providers_queried,
752
+ 'total_results': len(final_data) if isinstance(final_data, list) else 1,
753
+ 'fusion_applied': use_fusion and len(results) > 1,
754
+ 'fusion_strategy': fusion_strategy if use_fusion else None,
755
+ 'enhancement_applied': use_enhancement
756
+ },
757
+ 'providers_queried': providers_queried
758
+ }
759
+
760
+ def get_metrics_report(self) -> Dict[str, Any]:
761
+ """
762
+ Get comprehensive metrics report from all providers.
763
+
764
+ Returns:
765
+ Dictionary with metrics from all providers and fallback statistics
766
+ """
767
+ report = {
768
+ 'providers': {},
769
+ 'fallback_stats': self.fallback_strategy.get_fallback_stats(),
770
+ 'total_providers': len(self._providers),
771
+ 'healthy_providers': 0,
772
+ 'degraded_providers': 0
773
+ }
774
+
775
+ for provider_name, provider_instance in self._providers.items():
776
+ try:
777
+ provider_metadata = provider_instance.get_metadata()
778
+ health_score = provider_metadata.get('health', {}).get('score', 0)
779
+
780
+ report['providers'][provider_name] = {
781
+ 'health': provider_metadata.get('health', {}),
782
+ 'stats': provider_metadata.get('stats', {}),
783
+ 'config': provider_metadata.get('config', {})
784
+ }
785
+
786
+ if health_score > 0.7:
787
+ report['healthy_providers'] += 1
788
+ else:
789
+ report['degraded_providers'] += 1
790
+
791
+ except Exception as e:
792
+ self.logger.warning(f"Failed to get metrics for {provider_name}: {e}")
793
+ report['providers'][provider_name] = {
794
+ 'error': str(e),
795
+ 'status': 'unavailable'
796
+ }
797
+
798
+ # Add overall health assessment
799
+ if report['total_providers'] > 0:
800
+ health_ratio = report['healthy_providers'] / report['total_providers']
801
+ if health_ratio >= 0.8:
802
+ report['overall_status'] = 'healthy'
803
+ elif health_ratio >= 0.5:
804
+ report['overall_status'] = 'degraded'
805
+ else:
806
+ report['overall_status'] = 'unhealthy'
807
+ else:
808
+ report['overall_status'] = 'no_providers'
809
+
810
+ return report
811
+
812
+
813
+ # Register the tool (done via decorator)
814
+