aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,632 @@
1
+ """
2
+ Data Fusion Engine for Cross-Provider Results
3
+
4
+ Intelligently merges results from multiple API providers:
5
+ - Detect and handle duplicate data
6
+ - Resolve conflicts based on quality scores
7
+ - Support multiple fusion strategies
8
+ - Preserve provenance information
9
+ """
10
+
11
+ import logging
12
+ from typing import Any, Dict, List, Optional, Tuple, cast
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class DataFusionEngine:
18
+ """
19
+ Fuses data from multiple providers intelligently.
20
+
21
+ Handles duplicate detection, conflict resolution, and data quality
22
+ optimization when combining results from different sources.
23
+ """
24
+
25
+ # Fusion strategies
26
+ STRATEGY_BEST_QUALITY = "best_quality"
27
+ STRATEGY_MERGE_ALL = "merge_all"
28
+ STRATEGY_CONSENSUS = "consensus"
29
+ STRATEGY_FIRST_SUCCESS = "first_success"
30
+
31
+ def __init__(self):
32
+ """Initialize data fusion engine"""
33
+
34
+ def fuse_multi_provider_results(
35
+ self,
36
+ results: List[Dict[str, Any]],
37
+ fusion_strategy: str = STRATEGY_BEST_QUALITY,
38
+ ) -> Optional[Dict[str, Any]]:
39
+ """
40
+ Fuse results from multiple providers.
41
+
42
+ Args:
43
+ results: List of results from different providers
44
+ fusion_strategy: Strategy to use for fusion:
45
+ - 'best_quality': Select result with highest quality score
46
+ - 'merge_all': Merge all results, preserving sources
47
+ - 'consensus': Use data points agreed upon by multiple sources
48
+ - 'first_success': Use first successful result
49
+
50
+ Returns:
51
+ Fused result dictionary or None if no valid results
52
+ """
53
+ if not results:
54
+ return None
55
+
56
+ # Filter out failed results
57
+ valid_results = [r for r in results if r.get("data") is not None]
58
+
59
+ if not valid_results:
60
+ return None
61
+
62
+ if fusion_strategy == self.STRATEGY_BEST_QUALITY:
63
+ return self._fuse_best_quality(valid_results)
64
+
65
+ elif fusion_strategy == self.STRATEGY_MERGE_ALL:
66
+ return self._fuse_merge_all(valid_results)
67
+
68
+ elif fusion_strategy == self.STRATEGY_CONSENSUS:
69
+ return self._fuse_consensus(valid_results)
70
+
71
+ elif fusion_strategy == self.STRATEGY_FIRST_SUCCESS:
72
+ return valid_results[0]
73
+
74
+ else:
75
+ logger.warning(f"Unknown fusion strategy: {fusion_strategy}, using best_quality")
76
+ return self._fuse_best_quality(valid_results)
77
+
78
+ def _fuse_best_quality(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
79
+ """
80
+ Select result with highest quality score.
81
+
82
+ Args:
83
+ results: List of valid results
84
+
85
+ Returns:
86
+ Result with best quality
87
+ """
88
+
89
+ def get_quality_score(result: Dict[str, Any]) -> float:
90
+ """Extract quality score from result"""
91
+ metadata = result.get("metadata", {})
92
+ quality = metadata.get("quality", {})
93
+ return quality.get("score", 0.5)
94
+
95
+ best_result = max(results, key=get_quality_score)
96
+
97
+ # Add fusion metadata
98
+ best_result["metadata"]["fusion_info"] = {
99
+ "strategy": self.STRATEGY_BEST_QUALITY,
100
+ "total_providers_queried": len(results),
101
+ "selected_provider": best_result.get("provider"),
102
+ "quality_score": get_quality_score(best_result),
103
+ "alternative_providers": [r.get("provider") for r in results if r.get("provider") != best_result.get("provider")],
104
+ }
105
+
106
+ return best_result
107
+
108
+ def _fuse_merge_all(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
109
+ """
110
+ Merge all results, preserving source information.
111
+
112
+ Args:
113
+ results: List of valid results
114
+
115
+ Returns:
116
+ Merged result with all data
117
+ """
118
+ merged: Dict[str, Any] = {
119
+ "operation": "multi_provider_search",
120
+ "data": [],
121
+ "metadata": {
122
+ "fusion_info": {
123
+ "strategy": self.STRATEGY_MERGE_ALL,
124
+ "total_providers": len(results),
125
+ "sources": [],
126
+ }
127
+ },
128
+ }
129
+
130
+ # Collect all data with source tags
131
+ for result in results:
132
+ provider = result.get("provider", "unknown")
133
+ data = result.get("data", [])
134
+ metadata = result.get("metadata", {})
135
+
136
+ # Handle different data structures
137
+ if isinstance(data, list):
138
+ for item in data:
139
+ if isinstance(item, dict):
140
+ # Add source information to each item
141
+ enriched_item = item.copy()
142
+ enriched_item["_source_provider"] = provider
143
+ enriched_item["_source_quality"] = metadata.get("quality", {})
144
+ enriched_item["_source_timestamp"] = metadata.get("timestamp")
145
+ merged["data"].append(enriched_item)
146
+ else:
147
+ # Handle non-dict items
148
+ merged["data"].append(
149
+ {
150
+ "value": item,
151
+ "_source_provider": provider,
152
+ "_source_quality": metadata.get("quality", {}),
153
+ }
154
+ )
155
+ elif isinstance(data, dict):
156
+ # Single dict result
157
+ enriched_data = data.copy()
158
+ enriched_data["_source_provider"] = provider
159
+ enriched_data["_source_quality"] = metadata.get("quality", {})
160
+ merged["data"].append(enriched_data)
161
+
162
+ # Record source info
163
+ fusion_info = cast(Dict[str, Any], merged["metadata"]["fusion_info"])
164
+ sources = cast(List[Dict[str, Any]], fusion_info["sources"])
165
+ sources.append(
166
+ {
167
+ "provider": provider,
168
+ "operation": result.get("operation"),
169
+ "record_count": len(data) if isinstance(data, list) else 1,
170
+ "quality": metadata.get("quality", {}),
171
+ }
172
+ )
173
+
174
+ return merged
175
+
176
+ def _fuse_consensus(self, results: List[Dict[str, Any]]) -> Dict[str, Any]:
177
+ """
178
+ Use consensus-based fusion (data agreed upon by multiple sources).
179
+
180
+ Implements sophisticated consensus logic:
181
+ - Detects data point agreement across providers
182
+ - Uses majority voting for conflicting values
183
+ - Applies quality-weighted consensus calculation
184
+ - Handles partial agreement scenarios
185
+ - Calculates confidence scores
186
+
187
+ Args:
188
+ results: List of valid results
189
+
190
+ Returns:
191
+ Consensus result with confidence scores
192
+ """
193
+ if not results:
194
+ return {}
195
+
196
+ # Extract all data points with provider and quality information
197
+ all_data_points: List[Dict[str, Any]] = []
198
+ for result in results:
199
+ provider = result.get("provider", "unknown")
200
+ data = result.get("data", [])
201
+ metadata = result.get("metadata", {})
202
+ quality_score = metadata.get("quality", {}).get("score", 0.5)
203
+
204
+ # Normalize data to list format
205
+ if isinstance(data, list):
206
+ for item in data:
207
+ if isinstance(item, dict):
208
+ enriched_item = item.copy()
209
+ enriched_item["_provider"] = provider
210
+ enriched_item["_quality"] = quality_score
211
+ all_data_points.append(enriched_item)
212
+ else:
213
+ all_data_points.append({
214
+ "value": item,
215
+ "_provider": provider,
216
+ "_quality": quality_score
217
+ })
218
+ elif isinstance(data, dict):
219
+ enriched_data = data.copy()
220
+ enriched_data["_provider"] = provider
221
+ enriched_data["_quality"] = quality_score
222
+ all_data_points.append(enriched_data)
223
+
224
+ if not all_data_points:
225
+ # Fallback to best quality if no data points
226
+ return self._fuse_best_quality(results)
227
+
228
+ # Group matching data points (agreement detection)
229
+ data_groups = self._group_matching_data_points(all_data_points)
230
+
231
+ # Build consensus result
232
+ consensus_data = []
233
+ total_confidence = 0.0
234
+ agreement_stats = {
235
+ "full_agreement": 0,
236
+ "partial_agreement": 0,
237
+ "conflicts": 0,
238
+ "single_source": 0
239
+ }
240
+
241
+ for group in data_groups:
242
+ if len(group) == 0:
243
+ continue
244
+
245
+ # Build consensus item from group
246
+ consensus_item, confidence, agreement_type = self._build_consensus_item(group)
247
+ consensus_data.append(consensus_item)
248
+ total_confidence += confidence
249
+ agreement_stats[agreement_type] += 1
250
+
251
+ # Calculate average confidence
252
+ avg_confidence = total_confidence / len(consensus_data) if consensus_data else 0.0
253
+
254
+ # Build consensus result
255
+ consensus_result: Dict[str, Any] = {
256
+ "operation": "multi_provider_search",
257
+ "data": consensus_data,
258
+ "metadata": {
259
+ "fusion_info": {
260
+ "strategy": self.STRATEGY_CONSENSUS,
261
+ "total_providers": len(results),
262
+ "providers": [r.get("provider", "unknown") for r in results],
263
+ "consensus_confidence": avg_confidence,
264
+ "agreement_stats": agreement_stats,
265
+ "data_points_analyzed": len(all_data_points),
266
+ "consensus_groups": len(data_groups),
267
+ }
268
+ }
269
+ }
270
+
271
+ return consensus_result
272
+
273
+ def _group_matching_data_points(self, data_points: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
274
+ """
275
+ Group data points that represent the same entity/data point.
276
+
277
+ Uses duplicate detection to identify matching data points across providers.
278
+
279
+ Args:
280
+ data_points: List of data points with provider info
281
+
282
+ Returns:
283
+ List of groups, where each group contains matching data points
284
+ """
285
+ groups: List[List[Dict[str, Any]]] = []
286
+ processed = set()
287
+
288
+ for i, data_point in enumerate(data_points):
289
+ if i in processed:
290
+ continue
291
+
292
+ # Start a new group with this data point
293
+ group = [data_point]
294
+ processed.add(i)
295
+
296
+ # Find matching data points
297
+ for j, other_point in enumerate(data_points[i + 1:], start=i + 1):
298
+ if j in processed:
299
+ continue
300
+
301
+ is_duplicate, similarity = self.detect_duplicate_data(data_point, other_point)
302
+ if is_duplicate:
303
+ group.append(other_point)
304
+ processed.add(j)
305
+
306
+ groups.append(group)
307
+
308
+ return groups
309
+
310
+ def _build_consensus_item(
311
+ self, group: List[Dict[str, Any]]
312
+ ) -> Tuple[Dict[str, Any], float, str]:
313
+ """
314
+ Build a consensus item from a group of matching data points.
315
+
316
+ Args:
317
+ group: List of matching data points from different providers
318
+
319
+ Returns:
320
+ Tuple of (consensus_item, confidence_score, agreement_type)
321
+ """
322
+ if len(group) == 1:
323
+ # Single source - use as-is with lower confidence
324
+ item = group[0].copy()
325
+ item.pop("_provider", None)
326
+ item.pop("_quality", None)
327
+ return item, 0.5, "single_source"
328
+
329
+ # Multiple sources - build consensus
330
+ consensus_item: Dict[str, Any] = {}
331
+ field_agreements: Dict[str, List[Tuple[Any, float]]] = {} # field -> [(value, quality), ...]
332
+
333
+ # Collect all field values with their quality scores
334
+ for data_point in group:
335
+ quality = data_point.get("_quality", 0.5)
336
+ for key, value in data_point.items():
337
+ if key.startswith("_"): # Skip metadata fields
338
+ continue
339
+ if key not in field_agreements:
340
+ field_agreements[key] = []
341
+ field_agreements[key].append((value, quality))
342
+
343
+ # Build consensus for each field
344
+ field_confidences: Dict[str, float] = {}
345
+ full_agreement_count = 0
346
+ partial_agreement_count = 0
347
+ conflict_count = 0
348
+
349
+ for field, value_quality_pairs in field_agreements.items():
350
+ # Detect agreement
351
+ unique_values = {}
352
+ for value, quality in value_quality_pairs:
353
+ value_key = str(value) # Use string for comparison
354
+ if value_key not in unique_values:
355
+ unique_values[value_key] = []
356
+ unique_values[value_key].append((value, quality))
357
+
358
+ if len(unique_values) == 1:
359
+ # Full agreement - all providers have same value
360
+ consensus_item[field] = value_quality_pairs[0][0]
361
+ # Confidence based on number of agreeing sources and quality
362
+ avg_quality = sum(q for _, q in value_quality_pairs) / len(value_quality_pairs)
363
+ agreement_ratio = len(value_quality_pairs) / len(group)
364
+ field_confidences[field] = avg_quality * agreement_ratio
365
+ full_agreement_count += 1
366
+ else:
367
+ # Conflict - resolve using majority voting or quality weighting
368
+ consensus_value, field_confidence = self._resolve_field_conflict(
369
+ unique_values, len(group)
370
+ )
371
+ consensus_item[field] = consensus_value
372
+ field_confidences[field] = field_confidence
373
+
374
+ # Check if majority agrees (>= 50%)
375
+ max_agreement = max(len(vals) for vals in unique_values.values())
376
+ if max_agreement >= len(group) * 0.5:
377
+ partial_agreement_count += 1
378
+ else:
379
+ conflict_count += 1
380
+
381
+ # Calculate overall confidence
382
+ if field_confidences:
383
+ overall_confidence = sum(field_confidences.values()) / len(field_confidences)
384
+ else:
385
+ overall_confidence = 0.5
386
+
387
+ # Determine agreement type
388
+ if conflict_count == 0 and partial_agreement_count == 0:
389
+ agreement_type = "full_agreement"
390
+ elif conflict_count == 0:
391
+ agreement_type = "partial_agreement"
392
+ else:
393
+ agreement_type = "conflicts"
394
+
395
+ # Add consensus metadata
396
+ consensus_item["_consensus_metadata"] = {
397
+ "sources_count": len(group),
398
+ "providers": [dp.get("_provider", "unknown") for dp in group],
399
+ "field_confidences": field_confidences,
400
+ "overall_confidence": overall_confidence,
401
+ "agreement_type": agreement_type,
402
+ }
403
+
404
+ return consensus_item, overall_confidence, agreement_type
405
+
406
+ def _resolve_field_conflict(
407
+ self, unique_values: Dict[str, List[Tuple[Any, float]]], total_sources: int
408
+ ) -> Tuple[Any, float]:
409
+ """
410
+ Resolve conflict for a single field using majority voting and quality weighting.
411
+
412
+ Args:
413
+ unique_values: Dict mapping value strings to list of (value, quality) tuples
414
+ total_sources: Total number of sources
415
+
416
+ Returns:
417
+ Tuple of (resolved_value, confidence_score)
418
+ """
419
+ # Calculate support (count) and quality-weighted scores for each value
420
+ value_scores: List[Tuple[Any, float, int]] = [] # (value, quality_weighted_score, count)
421
+
422
+ for value_str, value_quality_pairs in unique_values.items():
423
+ count = len(value_quality_pairs)
424
+ # Quality-weighted score: average quality * support ratio
425
+ avg_quality = sum(q for _, q in value_quality_pairs) / count
426
+ support_ratio = count / total_sources
427
+ quality_weighted_score = avg_quality * support_ratio
428
+
429
+ # Get original value (not string)
430
+ original_value = value_quality_pairs[0][0]
431
+ value_scores.append((original_value, quality_weighted_score, count))
432
+
433
+ # Sort by quality-weighted score (descending), then by count (descending)
434
+ value_scores.sort(key=lambda x: (x[1], x[2]), reverse=True)
435
+
436
+ # Use majority voting: if majority agrees (>50%), use that value
437
+ best_value, best_score, best_count = value_scores[0]
438
+
439
+ # Check if majority agrees
440
+ if best_count > total_sources / 2:
441
+ # Majority vote wins
442
+ confidence = best_score * (best_count / total_sources)
443
+ else:
444
+ # No clear majority - use quality-weighted consensus
445
+ # Confidence is lower when no majority
446
+ confidence = best_score * 0.7 # Penalty for no majority
447
+
448
+ return best_value, confidence
449
+
450
+ def detect_duplicate_data(
451
+ self,
452
+ data1: Dict[str, Any],
453
+ data2: Dict[str, Any],
454
+ key_fields: Optional[List[str]] = None,
455
+ ) -> Tuple[bool, float]:
456
+ """
457
+ Detect if two data items are duplicates.
458
+
459
+ Args:
460
+ data1: First data item
461
+ data2: Second data item
462
+ key_fields: Fields to compare (auto-detected if None)
463
+
464
+ Returns:
465
+ Tuple of (is_duplicate, similarity_score)
466
+ """
467
+ if key_fields is None:
468
+ # Auto-detect key fields
469
+ key_fields = [
470
+ "id",
471
+ "series_id",
472
+ "indicator_code",
473
+ "indicator_id",
474
+ "title",
475
+ "name",
476
+ "code",
477
+ ]
478
+
479
+ matches = 0
480
+ total_fields = 0
481
+
482
+ for field in key_fields:
483
+ if field in data1 and field in data2:
484
+ total_fields += 1
485
+ if data1[field] == data2[field]:
486
+ matches += 1
487
+
488
+ if total_fields == 0:
489
+ # No common key fields, check title/name similarity
490
+ return self._check_text_similarity(data1, data2)
491
+
492
+ similarity = matches / total_fields if total_fields > 0 else 0.0
493
+ is_duplicate = similarity > 0.8
494
+
495
+ return is_duplicate, similarity
496
+
497
+ def _check_text_similarity(self, data1: Dict[str, Any], data2: Dict[str, Any]) -> Tuple[bool, float]:
498
+ """
499
+ Check text similarity for title/name fields.
500
+
501
+ Args:
502
+ data1: First data item
503
+ data2: Second data item
504
+
505
+ Returns:
506
+ Tuple of (is_duplicate, similarity_score)
507
+ """
508
+ text_fields = ["title", "name", "description"]
509
+
510
+ for field in text_fields:
511
+ if field in data1 and field in data2:
512
+ text1 = str(data1[field]).lower()
513
+ text2 = str(data2[field]).lower()
514
+
515
+ # Simple word-based similarity
516
+ words1 = set(text1.split())
517
+ words2 = set(text2.split())
518
+
519
+ if not words1 or not words2:
520
+ continue
521
+
522
+ intersection = len(words1 & words2)
523
+ union = len(words1 | words2)
524
+
525
+ similarity = intersection / union if union > 0 else 0.0
526
+
527
+ if similarity > 0.7:
528
+ return True, similarity
529
+
530
+ return False, 0.0
531
+
532
+ def resolve_conflict(
533
+ self,
534
+ values: List[Dict[str, Any]],
535
+ resolution_strategy: str = "quality",
536
+ ) -> Any:
537
+ """
538
+ Resolve conflicts when multiple sources provide different values.
539
+
540
+ Args:
541
+ values: List of value dictionaries with {'value': ..., 'quality': ..., 'source': ...}
542
+ resolution_strategy: Strategy for resolution ('quality', 'majority', 'average')
543
+
544
+ Returns:
545
+ Resolved value
546
+ """
547
+ if not values:
548
+ return None
549
+
550
+ if len(values) == 1:
551
+ return values[0].get("value")
552
+
553
+ if resolution_strategy == "quality":
554
+ # Choose value from source with highest quality
555
+ best = max(values, key=lambda v: v.get("quality", {}).get("score", 0))
556
+ return best.get("value")
557
+
558
+ elif resolution_strategy == "majority":
559
+ # Use most common value
560
+ from collections import Counter
561
+
562
+ value_counts = Counter([str(v.get("value")) for v in values])
563
+ most_common = value_counts.most_common(1)[0][0]
564
+ # Return original type
565
+ for v in values:
566
+ if str(v.get("value")) == most_common:
567
+ return v.get("value")
568
+
569
+ elif resolution_strategy == "average":
570
+ # Average numeric values
571
+ try:
572
+ numeric_values = []
573
+ for v in values:
574
+ value = v.get("value")
575
+ if value is not None:
576
+ try:
577
+ numeric_values.append(float(value))
578
+ except (ValueError, TypeError):
579
+ continue
580
+ if numeric_values:
581
+ return sum(numeric_values) / len(numeric_values)
582
+ except (ValueError, TypeError):
583
+ # Fall back to quality-based
584
+ return self.resolve_conflict(values, "quality")
585
+
586
+ # Default: return first value
587
+ return values[0].get("value")
588
+
589
+ def deduplicate_results(
590
+ self,
591
+ data_list: List[Dict[str, Any]],
592
+ key_fields: Optional[List[str]] = None,
593
+ ) -> List[Dict[str, Any]]:
594
+ """
595
+ Remove duplicate entries from a data list.
596
+
597
+ Args:
598
+ data_list: List of data items
599
+ key_fields: Fields to use for duplicate detection
600
+
601
+ Returns:
602
+ Deduplicated list
603
+ """
604
+ if not data_list:
605
+ return []
606
+
607
+ unique_data = []
608
+ seen_signatures = set()
609
+
610
+ for item in data_list:
611
+ # Create a signature for this item
612
+ if key_fields:
613
+ signature = tuple(item.get(field) for field in key_fields if field in item)
614
+ else:
615
+ # Auto signature from common fields
616
+ signature_fields = [
617
+ "id",
618
+ "series_id",
619
+ "indicator_code",
620
+ "title",
621
+ "name",
622
+ ]
623
+ signature = tuple(item.get(field) for field in signature_fields if field in item)
624
+
625
+ if signature and signature not in seen_signatures:
626
+ seen_signatures.add(signature)
627
+ unique_data.append(item)
628
+ elif not signature:
629
+ # No identifiable signature, include it
630
+ unique_data.append(item)
631
+
632
+ return unique_data