aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,381 @@
1
+ """
2
+ Data Fusion Engine for Cross-Provider Results
3
+
4
+ Intelligently merges results from multiple API providers:
5
+ - Detect and handle duplicate data
6
+ - Resolve conflicts based on quality scores
7
+ - Support multiple fusion strategies
8
+ - Preserve provenance information
9
+ """
10
+
11
+ import logging
12
+ from typing import Any, Dict, List, Optional, Tuple
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class DataFusionEngine:
18
+ """
19
+ Fuses data from multiple providers intelligently.
20
+
21
+ Handles duplicate detection, conflict resolution, and data quality
22
+ optimization when combining results from different sources.
23
+ """
24
+
25
+ # Fusion strategies
26
+ STRATEGY_BEST_QUALITY = "best_quality"
27
+ STRATEGY_MERGE_ALL = "merge_all"
28
+ STRATEGY_CONSENSUS = "consensus"
29
+ STRATEGY_FIRST_SUCCESS = "first_success"
30
+
31
+ def __init__(self):
32
+ """Initialize data fusion engine"""
33
+
34
+ def fuse_multi_provider_results(
35
+ self,
36
+ results: List[Dict[str, Any]],
37
+ fusion_strategy: str = STRATEGY_BEST_QUALITY,
38
+ ) -> Optional[Dict[str, Any]]:
39
+ """
40
+ Fuse results from multiple providers.
41
+
42
+ Args:
43
+ results: List of results from different providers
44
+ fusion_strategy: Strategy to use for fusion:
45
+ - 'best_quality': Select result with highest quality score
46
+ - 'merge_all': Merge all results, preserving sources
47
+ - 'consensus': Use data points agreed upon by multiple sources
48
+ - 'first_success': Use first successful result
49
+
50
+ Returns:
51
+ Fused result dictionary or None if no valid results
52
+ """
53
+ if not results:
54
+ return None
55
+
56
+ # Filter out failed results
57
+ valid_results = [r for r in results if r.get("data") is not None]
58
+
59
+ if not valid_results:
60
+ return None
61
+
62
+ if fusion_strategy == self.STRATEGY_BEST_QUALITY:
63
+ return self._fuse_best_quality(valid_results)
64
+
65
+ elif fusion_strategy == self.STRATEGY_MERGE_ALL:
66
+ return self._fuse_merge_all(valid_results)
67
+
68
+ elif fusion_strategy == self.STRATEGY_CONSENSUS:
69
+ return self._fuse_consensus(valid_results)
70
+
71
+ elif fusion_strategy == self.STRATEGY_FIRST_SUCCESS:
72
+ return valid_results[0]
73
+
74
+ else:
75
+ logger.warning(f"Unknown fusion strategy: {fusion_strategy}, using best_quality")
76
+ return self._fuse_best_quality(valid_results)
77
+
78
+ def _fuse_best_quality(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
79
+ """
80
+ Select result with highest quality score.
81
+
82
+ Args:
83
+ results: List of valid results
84
+
85
+ Returns:
86
+ Result with best quality
87
+ """
88
+
89
+ def get_quality_score(result: Dict[str, Any]) -> float:
90
+ """Extract quality score from result"""
91
+ metadata = result.get("metadata", {})
92
+ quality = metadata.get("quality", {})
93
+ return quality.get("score", 0.5)
94
+
95
+ best_result = max(results, key=get_quality_score)
96
+
97
+ # Add fusion metadata
98
+ best_result["metadata"]["fusion_info"] = {
99
+ "strategy": self.STRATEGY_BEST_QUALITY,
100
+ "total_providers_queried": len(results),
101
+ "selected_provider": best_result.get("provider"),
102
+ "quality_score": get_quality_score(best_result),
103
+ "alternative_providers": [
104
+ r.get("provider")
105
+ for r in results
106
+ if r.get("provider") != best_result.get("provider")
107
+ ],
108
+ }
109
+
110
+ return best_result
111
+
112
+ def _fuse_merge_all(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
113
+ """
114
+ Merge all results, preserving source information.
115
+
116
+ Args:
117
+ results: List of valid results
118
+
119
+ Returns:
120
+ Merged result with all data
121
+ """
122
+ merged = {
123
+ "operation": "multi_provider_search",
124
+ "data": [],
125
+ "metadata": {
126
+ "fusion_info": {
127
+ "strategy": self.STRATEGY_MERGE_ALL,
128
+ "total_providers": len(results),
129
+ "sources": [],
130
+ }
131
+ },
132
+ }
133
+
134
+ # Collect all data with source tags
135
+ for result in results:
136
+ provider = result.get("provider", "unknown")
137
+ data = result.get("data", [])
138
+ metadata = result.get("metadata", {})
139
+
140
+ # Handle different data structures
141
+ if isinstance(data, list):
142
+ for item in data:
143
+ if isinstance(item, dict):
144
+ # Add source information to each item
145
+ enriched_item = item.copy()
146
+ enriched_item["_source_provider"] = provider
147
+ enriched_item["_source_quality"] = metadata.get("quality", {})
148
+ enriched_item["_source_timestamp"] = metadata.get("timestamp")
149
+ merged["data"].append(enriched_item)
150
+ else:
151
+ # Handle non-dict items
152
+ merged["data"].append(
153
+ {
154
+ "value": item,
155
+ "_source_provider": provider,
156
+ "_source_quality": metadata.get("quality", {}),
157
+ }
158
+ )
159
+ elif isinstance(data, dict):
160
+ # Single dict result
161
+ enriched_data = data.copy()
162
+ enriched_data["_source_provider"] = provider
163
+ enriched_data["_source_quality"] = metadata.get("quality", {})
164
+ merged["data"].append(enriched_data)
165
+
166
+ # Record source info
167
+ merged["metadata"]["fusion_info"]["sources"].append(
168
+ {
169
+ "provider": provider,
170
+ "operation": result.get("operation"),
171
+ "record_count": len(data) if isinstance(data, list) else 1,
172
+ "quality": metadata.get("quality", {}),
173
+ }
174
+ )
175
+
176
+ return merged
177
+
178
+ def _fuse_consensus(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
179
+ """
180
+ Use consensus-based fusion (data agreed upon by multiple sources).
181
+
182
+ Args:
183
+ results: List of valid results
184
+
185
+ Returns:
186
+ Consensus result
187
+ """
188
+ # For now, implement a simple version
189
+ # TODO: Implement more sophisticated consensus logic
190
+
191
+ # Use best quality as baseline
192
+ consensus = self._fuse_best_quality(results)
193
+
194
+ # Update strategy in metadata
195
+ consensus["metadata"]["fusion_info"]["strategy"] = self.STRATEGY_CONSENSUS
196
+ consensus["metadata"]["fusion_info"][
197
+ "note"
198
+ ] = "Consensus strategy currently uses best quality baseline"
199
+
200
+ return consensus
201
+
202
+ def detect_duplicate_data(
203
+ self,
204
+ data1: Dict[str, Any],
205
+ data2: Dict[str, Any],
206
+ key_fields: Optional[List[str]] = None,
207
+ ) -> Tuple[bool, float]:
208
+ """
209
+ Detect if two data items are duplicates.
210
+
211
+ Args:
212
+ data1: First data item
213
+ data2: Second data item
214
+ key_fields: Fields to compare (auto-detected if None)
215
+
216
+ Returns:
217
+ Tuple of (is_duplicate, similarity_score)
218
+ """
219
+ if key_fields is None:
220
+ # Auto-detect key fields
221
+ key_fields = [
222
+ "id",
223
+ "series_id",
224
+ "indicator_code",
225
+ "indicator_id",
226
+ "title",
227
+ "name",
228
+ "code",
229
+ ]
230
+
231
+ matches = 0
232
+ total_fields = 0
233
+
234
+ for field in key_fields:
235
+ if field in data1 and field in data2:
236
+ total_fields += 1
237
+ if data1[field] == data2[field]:
238
+ matches += 1
239
+
240
+ if total_fields == 0:
241
+ # No common key fields, check title/name similarity
242
+ return self._check_text_similarity(data1, data2)
243
+
244
+ similarity = matches / total_fields if total_fields > 0 else 0.0
245
+ is_duplicate = similarity > 0.8
246
+
247
+ return is_duplicate, similarity
248
+
249
+ def _check_text_similarity(
250
+ self, data1: Dict[str, Any], data2: Dict[str, Any]
251
+ ) -> Tuple[bool, float]:
252
+ """
253
+ Check text similarity for title/name fields.
254
+
255
+ Args:
256
+ data1: First data item
257
+ data2: Second data item
258
+
259
+ Returns:
260
+ Tuple of (is_duplicate, similarity_score)
261
+ """
262
+ text_fields = ["title", "name", "description"]
263
+
264
+ for field in text_fields:
265
+ if field in data1 and field in data2:
266
+ text1 = str(data1[field]).lower()
267
+ text2 = str(data2[field]).lower()
268
+
269
+ # Simple word-based similarity
270
+ words1 = set(text1.split())
271
+ words2 = set(text2.split())
272
+
273
+ if not words1 or not words2:
274
+ continue
275
+
276
+ intersection = len(words1 & words2)
277
+ union = len(words1 | words2)
278
+
279
+ similarity = intersection / union if union > 0 else 0.0
280
+
281
+ if similarity > 0.7:
282
+ return True, similarity
283
+
284
+ return False, 0.0
285
+
286
+ def resolve_conflict(
287
+ self,
288
+ values: List[Dict[str, Any]],
289
+ resolution_strategy: str = "quality",
290
+ ) -> Any:
291
+ """
292
+ Resolve conflicts when multiple sources provide different values.
293
+
294
+ Args:
295
+ values: List of value dictionaries with {'value': ..., 'quality': ..., 'source': ...}
296
+ resolution_strategy: Strategy for resolution ('quality', 'majority', 'average')
297
+
298
+ Returns:
299
+ Resolved value
300
+ """
301
+ if not values:
302
+ return None
303
+
304
+ if len(values) == 1:
305
+ return values[0].get("value")
306
+
307
+ if resolution_strategy == "quality":
308
+ # Choose value from source with highest quality
309
+ best = max(values, key=lambda v: v.get("quality", {}).get("score", 0))
310
+ return best.get("value")
311
+
312
+ elif resolution_strategy == "majority":
313
+ # Use most common value
314
+ from collections import Counter
315
+
316
+ value_counts = Counter([str(v.get("value")) for v in values])
317
+ most_common = value_counts.most_common(1)[0][0]
318
+ # Return original type
319
+ for v in values:
320
+ if str(v.get("value")) == most_common:
321
+ return v.get("value")
322
+
323
+ elif resolution_strategy == "average":
324
+ # Average numeric values
325
+ try:
326
+ numeric_values = [
327
+ float(v.get("value")) for v in values if v.get("value") is not None
328
+ ]
329
+ if numeric_values:
330
+ return sum(numeric_values) / len(numeric_values)
331
+ except (ValueError, TypeError):
332
+ # Fall back to quality-based
333
+ return self.resolve_conflict(values, "quality")
334
+
335
+ # Default: return first value
336
+ return values[0].get("value")
337
+
338
+ def deduplicate_results(
339
+ self,
340
+ data_list: List[Dict[str, Any]],
341
+ key_fields: Optional[List[str]] = None,
342
+ ) -> List[Dict[str, Any]]:
343
+ """
344
+ Remove duplicate entries from a data list.
345
+
346
+ Args:
347
+ data_list: List of data items
348
+ key_fields: Fields to use for duplicate detection
349
+
350
+ Returns:
351
+ Deduplicated list
352
+ """
353
+ if not data_list:
354
+ return []
355
+
356
+ unique_data = []
357
+ seen_signatures = set()
358
+
359
+ for item in data_list:
360
+ # Create a signature for this item
361
+ if key_fields:
362
+ signature = tuple(item.get(field) for field in key_fields if field in item)
363
+ else:
364
+ # Auto signature from common fields
365
+ signature_fields = [
366
+ "id",
367
+ "series_id",
368
+ "indicator_code",
369
+ "title",
370
+ "name",
371
+ ]
372
+ signature = tuple(item.get(field) for field in signature_fields if field in item)
373
+
374
+ if signature and signature not in seen_signatures:
375
+ seen_signatures.add(signature)
376
+ unique_data.append(item)
377
+ elif not signature:
378
+ # No identifiable signature, include it
379
+ unique_data.append(item)
380
+
381
+ return unique_data