aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,632 @@
1
+ """
2
+ Knowledge Fusion Orchestrator
3
+
4
+ High-level orchestrator for cross-document entity merging and knowledge fusion.
5
+ """
6
+
7
+ from typing import List, Dict, Set, Tuple, Any, Optional
8
+ from aiecs.domain.knowledge_graph.models.entity import Entity
9
+ from aiecs.infrastructure.graph_storage.base import GraphStore
10
+ from aiecs.infrastructure.graph_storage.tenant import (
11
+ TenantContext,
12
+ CrossTenantFusionError,
13
+ )
14
+ from aiecs.application.knowledge_graph.fusion.entity_deduplicator import (
15
+ EntityDeduplicator,
16
+ )
17
+
18
+
19
+ class KnowledgeFusion:
20
+ """
21
+ Orchestrate knowledge fusion across multiple documents
22
+
23
+ After extracting entities and relations from multiple documents,
24
+ this class performs cross-document fusion to:
25
+ - Identify entities that appear in multiple documents
26
+ - Merge duplicate entities across documents
27
+ - Resolve conflicts in entity properties
28
+ - Track provenance (which documents contributed to each entity)
29
+
30
+ Example:
31
+ ```python
32
+ fusion = KnowledgeFusion(graph_store)
33
+
34
+ # After processing multiple documents
35
+ await fusion.fuse_cross_document_entities(
36
+ similarity_threshold=0.9
37
+ )
38
+
39
+ print(f"Merged {fusion.entities_merged} entities across documents")
40
+ ```
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ graph_store: GraphStore,
46
+ similarity_threshold: float = 0.90, # High threshold for cross-document fusion
47
+ ):
48
+ """
49
+ Initialize knowledge fusion orchestrator
50
+
51
+ Args:
52
+ graph_store: Graph storage containing entities to fuse
53
+ similarity_threshold: Minimum similarity for cross-document merging
54
+ """
55
+ self.graph_store = graph_store
56
+ self.similarity_threshold = similarity_threshold
57
+ self.entities_merged = 0
58
+ self.conflicts_resolved = 0
59
+
60
+ async def fuse_cross_document_entities(
61
+ self,
62
+ entity_types: Optional[List[str]] = None,
63
+ context: Optional[TenantContext] = None,
64
+ ) -> Dict[str, int]:
65
+ """
66
+ Perform cross-document entity fusion
67
+
68
+ This method identifies and merges entities that appear across multiple documents.
69
+ It uses similarity matching to find duplicate entities and merges them while
70
+ preserving provenance information.
71
+
72
+ **Tenant Isolation**: When context is provided, fusion operates only within the
73
+ specified tenant scope. Entities from different tenants will never be merged.
74
+
75
+ Algorithm:
76
+ 1. Query all entities from graph (optionally filtered by type and tenant)
77
+ 2. Filter entities to ensure tenant isolation (if context provided)
78
+ 3. Group entities by type
79
+ 4. For each type, find similar entities using similarity matching
80
+ 5. Identify merge groups (clusters of similar entities)
81
+ 6. Merge each group into a canonical entity
82
+ 7. Update graph with merged entities and update relations
83
+
84
+ Args:
85
+ entity_types: Optional list of entity types to fuse (None = all types)
86
+ context: Optional tenant context for multi-tenant isolation
87
+
88
+ Returns:
89
+ Dictionary with fusion statistics:
90
+ - entities_analyzed: Total entities analyzed
91
+ - entities_merged: Number of entities merged
92
+ - conflicts_resolved: Number of property conflicts resolved
93
+ - merge_groups: Number of merge groups identified
94
+
95
+ Raises:
96
+ CrossTenantFusionError: If entities from multiple tenants are detected
97
+ """
98
+ stats = {
99
+ "entities_analyzed": 0,
100
+ "entities_merged": 0,
101
+ "conflicts_resolved": 0,
102
+ "merge_groups": 0,
103
+ }
104
+
105
+ # Reset counters
106
+ self.entities_merged = 0
107
+ self.conflicts_resolved = 0
108
+
109
+ # Step 1: Query all entities from graph (with tenant context)
110
+ entities = await self._query_entities(entity_types, context)
111
+
112
+ # Step 2: Filter entities by tenant_id when context provided (defense-in-depth)
113
+ if context:
114
+ entities = self._filter_entities_by_tenant(entities, context.tenant_id)
115
+
116
+ stats["entities_analyzed"] = len(entities)
117
+
118
+ if len(entities) < 2:
119
+ # Nothing to merge
120
+ return stats
121
+
122
+ # Step 3: Group entities by type (only merge within same type)
123
+ entities_by_type = self._group_entities_by_type(entities)
124
+
125
+ # Step 4-7: Process each type group
126
+ for entity_type, type_entities in entities_by_type.items():
127
+ if len(type_entities) < 2:
128
+ continue
129
+
130
+ # Find merge candidates (groups of similar entities)
131
+ merge_groups = await self._find_merge_groups(type_entities)
132
+ stats["merge_groups"] += len(merge_groups)
133
+
134
+ # Merge each group
135
+ for group in merge_groups:
136
+ if len(group) < 2:
137
+ continue
138
+
139
+ # Merge entities in group
140
+ await self._merge_entity_group(group)
141
+ # N entities -> 1 entity
142
+ stats["entities_merged"] += len(group) - 1
143
+
144
+ stats["conflicts_resolved"] = self.conflicts_resolved
145
+
146
+ return stats
147
+
148
+ async def resolve_property_conflicts(self, entities: List[Entity], strategy: str = "most_complete") -> Entity:
149
+ """
150
+ Resolve conflicts when merging entities with different property values
151
+
152
+ Strategies:
153
+ - "most_complete": Prefer non-empty over empty values (default)
154
+ - "most_recent": Prefer most recent value (requires timestamp in provenance)
155
+ - "most_confident": Prefer value from most confident source (requires confidence score)
156
+ - "longest": Prefer longest string value
157
+ - "keep_all": Keep all conflicting values as a list
158
+
159
+ Args:
160
+ entities: List of entities to merge
161
+ strategy: Conflict resolution strategy
162
+
163
+ Returns:
164
+ Merged entity with resolved conflicts
165
+ """
166
+ if not entities:
167
+ raise ValueError("Cannot merge empty entity list")
168
+
169
+ if len(entities) == 1:
170
+ return entities[0]
171
+
172
+ # Create a new merged entity (copy first entity as base)
173
+ merged = Entity(
174
+ id=entities[0].id,
175
+ entity_type=entities[0].entity_type,
176
+ properties=entities[0].properties.copy(),
177
+ embedding=entities[0].embedding,
178
+ tenant_id=entities[0].tenant_id,
179
+ )
180
+
181
+ conflicting_properties = {}
182
+
183
+ # Merge properties from all entities
184
+ for entity in entities[1:]:
185
+ for key, value in entity.properties.items():
186
+ if key.startswith("_"):
187
+ # Skip internal properties (will handle separately)
188
+ continue
189
+
190
+ if key not in merged.properties:
191
+ # Property doesn't exist in merged, add it
192
+ merged.properties[key] = value
193
+ elif merged.properties[key] != value:
194
+ # Conflict detected - apply resolution strategy
195
+ resolved_value = self._resolve_conflict(
196
+ key=key,
197
+ values=[merged.properties[key], value],
198
+ entities=[entities[0], entity],
199
+ strategy=strategy,
200
+ )
201
+
202
+ # Track conflict
203
+ if key not in conflicting_properties:
204
+ conflicting_properties[key] = [merged.properties[key]]
205
+ conflicting_properties[key].append(value)
206
+
207
+ # Update with resolved value
208
+ merged.properties[key] = resolved_value
209
+
210
+ # Store conflicting values for transparency
211
+ if conflicting_properties:
212
+ merged.properties["_property_conflicts"] = conflicting_properties
213
+ self.conflicts_resolved += len(conflicting_properties)
214
+
215
+ # Merge provenance information
216
+ provenances = []
217
+ for entity in entities:
218
+ prov = entity.properties.get("_provenance")
219
+ if prov:
220
+ provenances.append(prov)
221
+ if provenances:
222
+ merged.properties["_provenance_merged"] = provenances
223
+
224
+ # Merge embeddings (average if multiple)
225
+ embeddings = [e.embedding for e in entities if e.embedding]
226
+ if len(embeddings) > 1:
227
+ # Average embeddings
228
+ import numpy as np
229
+
230
+ merged.embedding = list(np.mean(embeddings, axis=0))
231
+ elif embeddings:
232
+ merged.embedding = embeddings[0]
233
+
234
+ return merged
235
+
236
+ def _resolve_conflict(
237
+ self,
238
+ key: str,
239
+ values: List[Any],
240
+ entities: List[Entity],
241
+ strategy: str,
242
+ ) -> Any:
243
+ """
244
+ Resolve a single property conflict using specified strategy
245
+
246
+ Args:
247
+ key: Property key
248
+ values: Conflicting values
249
+ entities: Entities that have these values
250
+ strategy: Resolution strategy
251
+
252
+ Returns:
253
+ Resolved value
254
+ """
255
+ if strategy == "most_complete":
256
+ # Prefer non-empty, non-None values
257
+ # Prefer longer strings
258
+ non_empty = [v for v in values if v not in (None, "", [], {})]
259
+ if non_empty:
260
+ # If strings, prefer longest
261
+ if all(isinstance(v, str) for v in non_empty):
262
+ return max(non_empty, key=len)
263
+ return non_empty[0]
264
+ return values[0]
265
+
266
+ elif strategy == "most_recent":
267
+ # Prefer value from entity with most recent timestamp
268
+ timestamps = []
269
+ for entity in entities:
270
+ prov = entity.properties.get("_provenance", {})
271
+ if isinstance(prov, dict) and "timestamp" in prov:
272
+ timestamps.append(prov["timestamp"])
273
+ else:
274
+ timestamps.append(0) # No timestamp = oldest
275
+
276
+ if timestamps:
277
+ most_recent_idx = timestamps.index(max(timestamps))
278
+ return values[most_recent_idx]
279
+ return values[0]
280
+
281
+ elif strategy == "most_confident":
282
+ # Prefer value from entity with highest confidence
283
+ confidences = []
284
+ for entity in entities:
285
+ prov = entity.properties.get("_provenance", {})
286
+ if isinstance(prov, dict) and "confidence" in prov:
287
+ confidences.append(prov["confidence"])
288
+ else:
289
+ confidences.append(0.0) # No confidence = lowest
290
+
291
+ if confidences:
292
+ most_confident_idx = confidences.index(max(confidences))
293
+ return values[most_confident_idx]
294
+ return values[0]
295
+
296
+ elif strategy == "longest":
297
+ # Prefer longest value (for strings)
298
+ if all(isinstance(v, str) for v in values):
299
+ return max(values, key=len)
300
+ return values[0]
301
+
302
+ elif strategy == "keep_all":
303
+ # Keep all values as a list
304
+ return values
305
+
306
+ else:
307
+ # Default: return first value
308
+ return values[0]
309
+
310
+ async def track_entity_provenance(self, entity_id: str) -> List[str]:
311
+ """
312
+ Get list of documents that contributed to an entity
313
+
314
+ Args:
315
+ entity_id: Entity ID
316
+
317
+ Returns:
318
+ List of document sources
319
+ """
320
+ entity = await self.graph_store.get_entity(entity_id)
321
+ if not entity:
322
+ return []
323
+
324
+ sources = []
325
+
326
+ # Check single provenance
327
+ if "_provenance" in entity.properties:
328
+ prov = entity.properties["_provenance"]
329
+ if isinstance(prov, dict) and "source" in prov:
330
+ sources.append(prov["source"])
331
+
332
+ # Check merged provenances
333
+ if "_provenance_merged" in entity.properties:
334
+ merged_provs = entity.properties["_provenance_merged"]
335
+ if isinstance(merged_provs, list):
336
+ for prov in merged_provs:
337
+ if isinstance(prov, dict) and "source" in prov:
338
+ sources.append(prov["source"])
339
+
340
+ return list(set(sources)) # Remove duplicates
341
+
342
+ # =========================================================================
343
+ # Helper Methods for Cross-Document Fusion
344
+ # =========================================================================
345
+
346
+ async def _query_entities(
347
+ self,
348
+ entity_types: Optional[List[str]] = None,
349
+ context: Optional[TenantContext] = None,
350
+ ) -> List[Entity]:
351
+ """
352
+ Query entities from graph store with tenant filtering
353
+
354
+ Args:
355
+ entity_types: Optional list of entity types to query
356
+ context: Optional tenant context for filtering
357
+
358
+ Returns:
359
+ List of entities (filtered by tenant if context provided)
360
+ """
361
+ entities = []
362
+
363
+ # Check if graph store has get_all_entities method
364
+ if hasattr(self.graph_store, "get_all_entities"):
365
+ if entity_types:
366
+ # Query each type separately
367
+ for entity_type in entity_types:
368
+ # Pass context to ensure tenant filtering at storage layer
369
+ if context:
370
+ type_entities = await self.graph_store.get_all_entities(
371
+ entity_type=entity_type, context=context
372
+ )
373
+ else:
374
+ type_entities = await self.graph_store.get_all_entities(
375
+ entity_type=entity_type
376
+ )
377
+ entities.extend(type_entities)
378
+ else:
379
+ # Query all entities
380
+ if context:
381
+ entities = await self.graph_store.get_all_entities(context=context)
382
+ else:
383
+ entities = await self.graph_store.get_all_entities()
384
+ else:
385
+ # Fallback: graph store doesn't support bulk queries
386
+ # This is a limitation - we can't efficiently query all entities
387
+ # In this case, return empty list
388
+ # Note: Implementations should add get_all_entities() method
389
+ pass
390
+
391
+ return entities
392
+
393
+ def _filter_entities_by_tenant(
394
+ self, entities: List[Entity], tenant_id: str
395
+ ) -> List[Entity]:
396
+ """
397
+ Filter entities to only those belonging to the specified tenant.
398
+
399
+ This is a defense-in-depth mechanism in addition to storage-level filtering.
400
+ Silently filters out entities from other tenants.
401
+
402
+ Args:
403
+ entities: List of entities to filter
404
+ tenant_id: Target tenant ID
405
+
406
+ Returns:
407
+ List of entities belonging to the specified tenant
408
+ """
409
+ return [e for e in entities if e.tenant_id == tenant_id]
410
+
411
+ def _group_entities_by_type(self, entities: List[Entity]) -> Dict[str, List[Entity]]:
412
+ """
413
+ Group entities by their type
414
+
415
+ Args:
416
+ entities: List of entities
417
+
418
+ Returns:
419
+ Dictionary mapping entity type to list of entities
420
+ """
421
+ entities_by_type: Dict[str, List[Entity]] = {}
422
+
423
+ for entity in entities:
424
+ entity_type = entity.entity_type
425
+ if entity_type not in entities_by_type:
426
+ entities_by_type[entity_type] = []
427
+ entities_by_type[entity_type].append(entity)
428
+
429
+ return entities_by_type
430
+
431
+ async def _find_merge_groups(self, entities: List[Entity]) -> List[List[Entity]]:
432
+ """
433
+ Find groups of entities that should be merged together
434
+
435
+ Uses similarity matching to identify clusters of similar entities.
436
+ Entities are grouped using connected components algorithm.
437
+
438
+ Args:
439
+ entities: List of entities (all same type)
440
+
441
+ Returns:
442
+ List of merge groups (each group is a list of entities)
443
+ """
444
+ if len(entities) < 2:
445
+ return []
446
+
447
+ # Build similarity graph
448
+ n = len(entities)
449
+ similar_pairs: Set[Tuple[int, int]] = set()
450
+
451
+ # Compare all pairs
452
+ for i in range(n):
453
+ for j in range(i + 1, n):
454
+ similarity = await self._compute_entity_similarity(entities[i], entities[j])
455
+ if similarity >= self.similarity_threshold:
456
+ similar_pairs.add((i, j))
457
+
458
+ # Find connected components (merge groups)
459
+ merge_groups = self._find_connected_components(n, similar_pairs)
460
+
461
+ # Convert indices to entities
462
+ entity_groups = []
463
+ for group_indices in merge_groups:
464
+ if len(group_indices) >= 2: # Only groups with 2+ entities
465
+ entity_group = [entities[i] for i in group_indices]
466
+ entity_groups.append(entity_group)
467
+
468
+ return entity_groups
469
+
470
+ def _find_connected_components(self, n: int, edges: Set[Tuple[int, int]]) -> List[List[int]]:
471
+ """
472
+ Find connected components in an undirected graph
473
+
474
+ Uses Union-Find (Disjoint Set Union) algorithm.
475
+
476
+ Args:
477
+ n: Number of nodes
478
+ edges: Set of edges (pairs of node indices)
479
+
480
+ Returns:
481
+ List of components (each component is a list of node indices)
482
+ """
483
+ # Initialize parent array for Union-Find
484
+ parent = list(range(n))
485
+
486
+ def find(x: int) -> int:
487
+ """Find root of x with path compression"""
488
+ if parent[x] != x:
489
+ parent[x] = find(parent[x])
490
+ return parent[x]
491
+
492
+ def union(x: int, y: int) -> None:
493
+ """Union two sets"""
494
+ root_x = find(x)
495
+ root_y = find(y)
496
+ if root_x != root_y:
497
+ parent[root_x] = root_y
498
+
499
+ # Build connected components
500
+ for i, j in edges:
501
+ union(i, j)
502
+
503
+ # Group nodes by their root
504
+ components: Dict[int, List[int]] = {}
505
+ for i in range(n):
506
+ root = find(i)
507
+ if root not in components:
508
+ components[root] = []
509
+ components[root].append(i)
510
+
511
+ return list(components.values())
512
+
513
+ async def _compute_entity_similarity(self, entity1: Entity, entity2: Entity) -> float:
514
+ """
515
+ Compute similarity between two entities
516
+
517
+ Uses EntityDeduplicator for similarity computation.
518
+
519
+ Args:
520
+ entity1: First entity
521
+ entity2: Second entity
522
+
523
+ Returns:
524
+ Similarity score (0.0-1.0)
525
+ """
526
+ # Use EntityDeduplicator for similarity computation
527
+ deduplicator = EntityDeduplicator(similarity_threshold=self.similarity_threshold)
528
+ return await deduplicator._compute_similarity(entity1, entity2)
529
+
530
+ async def _merge_entity_group(self, entities: List[Entity]) -> None:
531
+ """
532
+ Merge a group of entities into a single canonical entity
533
+
534
+ Steps:
535
+ 1. Resolve property conflicts to create merged entity
536
+ 2. Update graph: replace all entities with merged entity
537
+ 3. Update relations: redirect to merged entity
538
+ 4. Delete old entities
539
+
540
+ Args:
541
+ entities: List of entities to merge (2 or more)
542
+ """
543
+ if len(entities) < 2:
544
+ return
545
+
546
+ # Step 1: Resolve conflicts and create merged entity
547
+ merged_entity = await self.resolve_property_conflicts(entities)
548
+
549
+ # Track merge provenance
550
+ merged_entity.properties["_merged_from"] = [e.id for e in entities]
551
+ merged_entity.properties["_merge_count"] = len(entities)
552
+
553
+ # Step 2: Add merged entity to graph (use first entity's ID as
554
+ # canonical)
555
+ canonical_id = entities[0].id
556
+ merged_entity.id = canonical_id
557
+
558
+ # Update entity in graph
559
+ # Try update_entity if available, otherwise delete and re-add
560
+ if hasattr(self.graph_store, "update_entity"):
561
+ await self.graph_store.update_entity(merged_entity)
562
+ else:
563
+ # Delete old entity and add merged one
564
+ # For InMemoryGraphStore, we need to manually update
565
+ if hasattr(self.graph_store, "entities"):
566
+ # Direct update for InMemoryGraphStore
567
+ self.graph_store.entities[canonical_id] = merged_entity
568
+ if hasattr(self.graph_store, "graph") and self.graph_store.graph:
569
+ self.graph_store.graph.nodes[canonical_id]["entity"] = merged_entity
570
+ else:
571
+ # Fallback: try to add (may fail if exists)
572
+ try:
573
+ await self.graph_store.add_entity(merged_entity)
574
+ except ValueError:
575
+ # Entity already exists, skip
576
+ pass
577
+
578
+ # Step 3: Update relations pointing to merged entities
579
+ await self._update_relations_for_merge(entities, canonical_id)
580
+
581
+ # Step 4: Delete old entities (except canonical)
582
+ for entity in entities[1:]:
583
+ # Delete entity from graph
584
+ if hasattr(self.graph_store, "delete_entity"):
585
+ await self.graph_store.delete_entity(entity.id)
586
+
587
+ # Update counter
588
+ self.entities_merged += len(entities) - 1
589
+
590
+ async def _update_relations_for_merge(self, merged_entities: List[Entity], canonical_id: str) -> None:
591
+ """
592
+ Update relations to point to canonical merged entity
593
+
594
+ For each merged entity (except canonical):
595
+ - Find all relations where it's source or target
596
+ - Update relation to use canonical_id instead
597
+ - Remove duplicate relations
598
+
599
+ Args:
600
+ merged_entities: List of entities that were merged
601
+ canonical_id: ID of canonical entity
602
+ """
603
+ {e.id for e in merged_entities}
604
+
605
+ # For each merged entity (except canonical)
606
+ for entity in merged_entities:
607
+ if entity.id == canonical_id:
608
+ continue
609
+
610
+ # Get outgoing relations
611
+ if hasattr(self.graph_store, "get_outgoing_relations"):
612
+ outgoing = await self.graph_store.get_outgoing_relations(entity.id)
613
+ for relation in outgoing:
614
+ # Update source to canonical
615
+ relation.source_id = canonical_id
616
+ await self.graph_store.add_relation(relation)
617
+
618
+ # Get incoming relations
619
+ if hasattr(self.graph_store, "get_incoming_relations"):
620
+ incoming = await self.graph_store.get_incoming_relations(entity.id)
621
+ for relation in incoming:
622
+ # Update target to canonical
623
+ relation.target_id = canonical_id
624
+ await self.graph_store.add_relation(relation)
625
+
626
+ # Alternative: use get_neighbors to find relations
627
+ # This is less efficient but works with basic GraphStore interface
628
+ if not hasattr(self.graph_store, "get_outgoing_relations"):
629
+ # Get neighbors (this implicitly uses relations)
630
+ # We can't easily update relations without direct access
631
+ # This is a limitation of the basic interface
632
+ pass