aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,433 @@
1
+ """
2
+ Entity Deduplicator
3
+
4
+ Identifies and merges duplicate entities based on similarity matching.
5
+ """
6
+
7
+ from typing import List, Dict, Optional, Tuple, Set, TYPE_CHECKING
8
+ from difflib import SequenceMatcher
9
+ from aiecs.domain.knowledge_graph.models.entity import Entity
10
+ from aiecs.infrastructure.graph_storage.tenant import (
11
+ TenantContext,
12
+ CrossTenantFusionError,
13
+ )
14
+
15
+ if TYPE_CHECKING:
16
+ from aiecs.application.knowledge_graph.fusion.similarity_pipeline import (
17
+ SimilarityPipeline,
18
+ )
19
+
20
+
21
+ class EntityDeduplicator:
22
+ """
23
+ Deduplicate entities based on similarity
24
+
25
+ When extracting entities from text, it's common to get duplicates:
26
+ - "Apple Inc." vs "Apple" vs "Apple Incorporated"
27
+ - "John Smith" vs "J. Smith" vs "Smith, John"
28
+ - "New York" vs "New York City" vs "NYC"
29
+
30
+ This class identifies such duplicates and merges them into canonical entities.
31
+
32
+ Features:
33
+ - Name-based fuzzy matching
34
+ - Type-aware matching (only match entities of same type)
35
+ - Property-based matching (use properties to improve matching)
36
+ - Configurable similarity threshold
37
+ - Embedding-based matching (when embeddings available)
38
+
39
+ Example:
40
+ ```python
41
+ deduplicator = EntityDeduplicator(similarity_threshold=0.85)
42
+
43
+ entities = [
44
+ Entity(type="Company", properties={"name": "Apple Inc."}),
45
+ Entity(type="Company", properties={"name": "Apple"}),
46
+ Entity(type="Company", properties={"name": "Microsoft"})
47
+ ]
48
+
49
+ deduplicated = await deduplicator.deduplicate(entities)
50
+ # Returns: [
51
+ # Entity(type="Company", properties={"name": "Apple Inc.", "_aliases": ["Apple"]}),
52
+ # Entity(type="Company", properties={"name": "Microsoft"})
53
+ # ]
54
+ ```
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ similarity_threshold: float = 0.85,
60
+ use_embeddings: bool = True,
61
+ embedding_threshold: float = 0.90,
62
+ similarity_pipeline: Optional["SimilarityPipeline"] = None,
63
+ ):
64
+ """
65
+ Initialize entity deduplicator
66
+
67
+ Args:
68
+ similarity_threshold: Minimum similarity score to consider entities as duplicates (0.0-1.0)
69
+ use_embeddings: Whether to use embeddings for similarity (if available)
70
+ embedding_threshold: Minimum embedding similarity for duplicates (0.0-1.0)
71
+ similarity_pipeline: Optional SimilarityPipeline for enhanced matching
72
+ """
73
+ self.similarity_threshold = similarity_threshold
74
+ self.use_embeddings = use_embeddings
75
+ self.embedding_threshold = embedding_threshold
76
+ self._similarity_pipeline = similarity_pipeline
77
+
78
+ async def deduplicate(
79
+ self, entities: List[Entity], context: Optional[TenantContext] = None
80
+ ) -> List[Entity]:
81
+ """
82
+ Deduplicate a list of entities
83
+
84
+ **Tenant Isolation**: When context is provided, deduplication only compares
85
+ entities within the same tenant. Entities from other tenants are filtered out
86
+ (defense-in-depth).
87
+
88
+ Args:
89
+ entities: List of entities to deduplicate
90
+ context: Optional tenant context for multi-tenant isolation
91
+
92
+ Returns:
93
+ List of deduplicated entities (with merged properties and aliases)
94
+ """
95
+ if not entities:
96
+ return []
97
+
98
+ # Filter to only entities in the specified tenant (defense-in-depth)
99
+ if context:
100
+ entities = [e for e in entities if e.tenant_id == context.tenant_id]
101
+
102
+ # Group entities by type (only match within same type)
103
+ entities_by_type: Dict[str, List[Entity]] = {}
104
+ for entity in entities:
105
+ if entity.entity_type not in entities_by_type:
106
+ entities_by_type[entity.entity_type] = []
107
+ entities_by_type[entity.entity_type].append(entity)
108
+
109
+ # Deduplicate within each type
110
+ deduplicated_entities = []
111
+ for entity_type, type_entities in entities_by_type.items():
112
+ deduped = await self._deduplicate_type_group(type_entities)
113
+ deduplicated_entities.extend(deduped)
114
+
115
+ return deduplicated_entities
116
+
117
+ async def _deduplicate_type_group(self, entities: List[Entity]) -> List[Entity]:
118
+ """
119
+ Deduplicate entities of the same type
120
+
121
+ Algorithm:
122
+ 1. Build similarity matrix between all pairs
123
+ 2. Find clusters of similar entities (connected components)
124
+ 3. Merge each cluster into a single canonical entity
125
+
126
+ Note: Assumes all entities in the group are from the same tenant
127
+ (validated by caller if in multi-tenant mode)
128
+
129
+ Args:
130
+ entities: List of entities (all same type and same tenant)
131
+
132
+ Returns:
133
+ List of deduplicated entities
134
+ """
135
+ if len(entities) <= 1:
136
+ return entities
137
+
138
+ # Build similarity graph
139
+ n = len(entities)
140
+ similar_pairs: Set[Tuple[int, int]] = set()
141
+
142
+ for i in range(n):
143
+ for j in range(i + 1, n):
144
+ similarity = await self._compute_similarity(entities[i], entities[j])
145
+ if similarity >= self.similarity_threshold:
146
+ similar_pairs.add((i, j))
147
+
148
+ # Find connected components (clusters of similar entities)
149
+ clusters = self._find_clusters(n, similar_pairs)
150
+
151
+ # Merge each cluster into canonical entity
152
+ deduplicated = []
153
+ for cluster in clusters:
154
+ cluster_entities = [entities[idx] for idx in cluster]
155
+ merged_entity = self._merge_entities(cluster_entities)
156
+ deduplicated.append(merged_entity)
157
+
158
+ return deduplicated
159
+
160
+ async def _compute_similarity(self, entity1: Entity, entity2: Entity) -> float:
161
+ """
162
+ Compute similarity between two entities
163
+
164
+ Uses multiple signals:
165
+ 1. Name similarity (via SimilarityPipeline if available, else fuzzy string matching)
166
+ 2. Property overlap
167
+ 3. Embedding similarity (if available)
168
+
169
+ Args:
170
+ entity1: First entity
171
+ entity2: Second entity
172
+
173
+ Returns:
174
+ Similarity score (0.0-1.0)
175
+ """
176
+ # Get entity names
177
+ name1 = self._get_entity_name(entity1)
178
+ name2 = self._get_entity_name(entity2)
179
+
180
+ if not name1 or not name2:
181
+ return 0.0
182
+
183
+ # 1. Name-based similarity (use pipeline if available)
184
+ if self._similarity_pipeline is not None:
185
+ # Use enhanced similarity pipeline with per-entity-type configuration
186
+ pipeline_result = await self._similarity_pipeline.compute_similarity(
187
+ name1=name1,
188
+ name2=name2,
189
+ entity_type=entity1.entity_type,
190
+ )
191
+ name_similarity = pipeline_result.final_score
192
+ else:
193
+ # Fallback to basic string similarity
194
+ name_similarity = self._string_similarity(name1, name2)
195
+
196
+ # 2. Property overlap
197
+ property_similarity = self._property_similarity(entity1.properties, entity2.properties)
198
+
199
+ # 3. Embedding similarity (if available)
200
+ embedding_similarity = 0.0
201
+ if self.use_embeddings and entity1.embedding and entity2.embedding:
202
+ embedding_similarity = self._cosine_similarity(entity1.embedding, entity2.embedding)
203
+
204
+ # Weighted combination
205
+ if entity1.embedding and entity2.embedding and self.use_embeddings:
206
+ # If embeddings available, give them high weight
207
+ return 0.3 * name_similarity + 0.2 * property_similarity + 0.5 * embedding_similarity
208
+ else:
209
+ # No embeddings, rely on name and properties
210
+ return 0.7 * name_similarity + 0.3 * property_similarity
211
+
212
+ def set_similarity_pipeline(self, pipeline: "SimilarityPipeline") -> None:
213
+ """
214
+ Set the similarity pipeline for enhanced matching.
215
+
216
+ Args:
217
+ pipeline: SimilarityPipeline instance
218
+ """
219
+ self._similarity_pipeline = pipeline
220
+
221
+ @property
222
+ def similarity_pipeline(self) -> Optional["SimilarityPipeline"]:
223
+ """Get the current similarity pipeline."""
224
+ return self._similarity_pipeline
225
+
226
+ def _get_entity_name(self, entity: Entity) -> str:
227
+ """Extract entity name from properties"""
228
+ return entity.properties.get("name") or entity.properties.get("title") or entity.properties.get("text") or ""
229
+
230
+ def _string_similarity(self, str1: str, str2: str) -> float:
231
+ """
232
+ Compute string similarity using multiple methods
233
+
234
+ Combines:
235
+ - Exact match (normalized)
236
+ - SequenceMatcher ratio
237
+ - Token overlap (for multi-word entities)
238
+
239
+ Args:
240
+ str1: First string
241
+ str2: Second string
242
+
243
+ Returns:
244
+ Similarity score (0.0-1.0)
245
+ """
246
+ # Normalize strings
247
+ s1 = str1.lower().strip()
248
+ s2 = str2.lower().strip()
249
+
250
+ # Exact match
251
+ if s1 == s2:
252
+ return 1.0
253
+
254
+ # One is substring of other
255
+ if s1 in s2 or s2 in s1:
256
+ return 0.95
257
+
258
+ # Sequence matcher
259
+ seq_similarity = SequenceMatcher(None, s1, s2).ratio()
260
+
261
+ # Token overlap (for multi-word names)
262
+ tokens1 = set(s1.split())
263
+ tokens2 = set(s2.split())
264
+ if tokens1 and tokens2:
265
+ token_overlap = len(tokens1 & tokens2) / len(tokens1 | tokens2)
266
+ else:
267
+ token_overlap = 0.0
268
+
269
+ # Combine
270
+ return max(seq_similarity, token_overlap)
271
+
272
+ def _property_similarity(self, props1: Dict, props2: Dict) -> float:
273
+ """
274
+ Compute similarity based on property overlap
275
+
276
+ Args:
277
+ props1: Properties of first entity
278
+ props2: Properties of second entity
279
+
280
+ Returns:
281
+ Similarity score (0.0-1.0)
282
+ """
283
+ # Remove internal properties
284
+ keys1 = {k for k in props1.keys() if not k.startswith("_")}
285
+ keys2 = {k for k in props2.keys() if not k.startswith("_")}
286
+
287
+ if not keys1 and not keys2:
288
+ return 0.5 # No properties to compare
289
+
290
+ # Key overlap
291
+ common_keys = keys1 & keys2
292
+ all_keys = keys1 | keys2
293
+
294
+ if not all_keys:
295
+ return 0.5
296
+
297
+ key_overlap = len(common_keys) / len(all_keys)
298
+
299
+ # Value similarity for common keys
300
+ value_matches = 0
301
+ for key in common_keys:
302
+ val1 = str(props1[key]).lower()
303
+ val2 = str(props2[key]).lower()
304
+ if val1 == val2:
305
+ value_matches += 1
306
+
307
+ value_similarity = value_matches / len(common_keys) if common_keys else 0.0
308
+
309
+ # Combine
310
+ return 0.5 * key_overlap + 0.5 * value_similarity
311
+
312
+ def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
313
+ """
314
+ Compute cosine similarity between two vectors
315
+
316
+ Args:
317
+ vec1: First vector
318
+ vec2: Second vector
319
+
320
+ Returns:
321
+ Cosine similarity (0.0-1.0)
322
+ """
323
+ if len(vec1) != len(vec2):
324
+ return 0.0
325
+
326
+ dot_product = sum(a * b for a, b in zip(vec1, vec2))
327
+ magnitude1 = sum(a * a for a in vec1) ** 0.5
328
+ magnitude2 = sum(b * b for b in vec2) ** 0.5
329
+
330
+ if magnitude1 == 0 or magnitude2 == 0:
331
+ return 0.0
332
+
333
+ # Cosine similarity ranges from -1 to 1, normalize to 0 to 1
334
+ similarity = dot_product / (magnitude1 * magnitude2)
335
+ return (similarity + 1) / 2
336
+
337
+ def _find_clusters(self, n: int, edges: Set[Tuple[int, int]]) -> List[List[int]]:
338
+ """
339
+ Find connected components using Union-Find
340
+
341
+ Args:
342
+ n: Number of nodes
343
+ edges: Set of edges (i, j) indicating similarity
344
+
345
+ Returns:
346
+ List of clusters, where each cluster is a list of node indices
347
+ """
348
+ # Union-Find data structure
349
+ parent = list(range(n))
350
+
351
+ def find(x):
352
+ if parent[x] != x:
353
+ parent[x] = find(parent[x]) # Path compression
354
+ return parent[x]
355
+
356
+ def union(x, y):
357
+ px, py = find(x), find(y)
358
+ if px != py:
359
+ parent[px] = py
360
+
361
+ # Build connected components
362
+ for i, j in edges:
363
+ union(i, j)
364
+
365
+ # Group by root
366
+ clusters_dict: Dict[int, List[int]] = {}
367
+ for i in range(n):
368
+ root = find(i)
369
+ if root not in clusters_dict:
370
+ clusters_dict[root] = []
371
+ clusters_dict[root].append(i)
372
+
373
+ return list(clusters_dict.values())
374
+
375
+ def _merge_entities(self, entities: List[Entity]) -> Entity:
376
+ """
377
+ Merge a cluster of similar entities into one canonical entity
378
+
379
+ Strategy:
380
+ - Use the first entity as base
381
+ - Merge all properties (prefer non-empty values)
382
+ - Store alternative names as aliases
383
+ - Keep highest confidence score
384
+
385
+ Args:
386
+ entities: List of entities to merge
387
+
388
+ Returns:
389
+ Merged canonical entity
390
+ """
391
+ if len(entities) == 1:
392
+ return entities[0]
393
+
394
+ # Use first entity as base
395
+ canonical = entities[0]
396
+
397
+ # Collect all names as aliases
398
+ aliases = set()
399
+ for entity in entities:
400
+ name = self._get_entity_name(entity)
401
+ if name and name != self._get_entity_name(canonical):
402
+ aliases.add(name)
403
+
404
+ # Merge properties (prefer non-empty, non-None values)
405
+ merged_properties = dict(canonical.properties)
406
+
407
+ for entity in entities[1:]:
408
+ for key, value in entity.properties.items():
409
+ if key not in merged_properties or not merged_properties[key]:
410
+ merged_properties[key] = value
411
+
412
+ # Add aliases
413
+ if aliases:
414
+ merged_properties["_aliases"] = list(aliases)
415
+
416
+ # Take highest confidence
417
+ confidences = [e.properties.get("_extraction_confidence", 0.5) for e in entities]
418
+ merged_properties["_extraction_confidence"] = max(confidences)
419
+
420
+ # Track merge count
421
+ merged_properties["_merged_count"] = len(entities)
422
+
423
+ # Create merged entity (preserve tenant_id from canonical entity)
424
+ merged_entity = Entity(
425
+ id=canonical.id,
426
+ entity_type=canonical.entity_type,
427
+ properties=merged_properties,
428
+ embedding=canonical.embedding,
429
+ source=canonical.source,
430
+ tenant_id=canonical.tenant_id,
431
+ )
432
+
433
+ return merged_entity