aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,511 @@
1
+ """
2
+ Entity Linker
3
+
4
+ Links newly extracted entities to existing entities in the knowledge graph.
5
+ """
6
+
7
+ from typing import List, Optional, TYPE_CHECKING
8
+ from aiecs.domain.knowledge_graph.models.entity import Entity
9
+ from aiecs.infrastructure.graph_storage.base import GraphStore
10
+ from aiecs.infrastructure.graph_storage.tenant import TenantContext
11
+
12
+ if TYPE_CHECKING:
13
+ from aiecs.application.knowledge_graph.fusion.similarity_pipeline import (
14
+ SimilarityPipeline,
15
+ )
16
+
17
+
18
+ class EntityLinker:
19
+ """
20
+ Link new entities to existing entities in the graph
21
+
22
+ When extracting entities from new documents, many entities may already exist
23
+ in the knowledge graph. This class identifies such matches and links them,
24
+ preventing duplication across the entire graph.
25
+
26
+ Features:
27
+ - Exact ID matching
28
+ - Name-based fuzzy matching
29
+ - Embedding-based similarity search
30
+ - Type-aware linking
31
+ - Confidence scoring
32
+
33
+ Workflow:
34
+ 1. For each new entity, search graph for similar existing entities
35
+ 2. If match found, return existing entity ID (link)
36
+ 3. If no match, entity is new and should be added
37
+
38
+ Example:
39
+ ```python
40
+ linker = EntityLinker(graph_store, similarity_threshold=0.85)
41
+
42
+ new_entity = Entity(type="Person", properties={"name": "Alice Smith"})
43
+
44
+ # Check if Alice already exists
45
+ link_result = await linker.link_entity(new_entity)
46
+
47
+ if link_result.linked:
48
+ print(f"Linked to existing entity: {link_result.existing_entity.id}")
49
+ # Use existing entity instead of creating new one
50
+ else:
51
+ print("New entity - add to graph")
52
+ # Add new_entity to graph
53
+ ```
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ graph_store: GraphStore,
59
+ similarity_threshold: float = 0.85,
60
+ use_embeddings: bool = True,
61
+ embedding_threshold: float = 0.90,
62
+ similarity_pipeline: Optional["SimilarityPipeline"] = None,
63
+ ):
64
+ """
65
+ Initialize entity linker
66
+
67
+ Args:
68
+ graph_store: Graph storage to search for existing entities
69
+ similarity_threshold: Minimum similarity to link entities (0.0-1.0)
70
+ use_embeddings: Use embedding similarity for matching
71
+ embedding_threshold: Minimum embedding similarity for linking (0.0-1.0)
72
+ similarity_pipeline: Optional SimilarityPipeline for enhanced matching
73
+ """
74
+ self.graph_store = graph_store
75
+ self.similarity_threshold = similarity_threshold
76
+ self.use_embeddings = use_embeddings
77
+ self.embedding_threshold = embedding_threshold
78
+ self._similarity_pipeline = similarity_pipeline
79
+
80
+ async def link_entity(
81
+ self,
82
+ new_entity: Entity,
83
+ candidate_limit: int = 10,
84
+ context: Optional[TenantContext] = None,
85
+ ) -> "LinkResult":
86
+ """
87
+ Link a new entity to existing entity in graph (if match found)
88
+
89
+ **Tenant Isolation**: When context is provided, linking only searches for
90
+ matches within the specified tenant. Cross-tenant linking is prevented.
91
+
92
+ Args:
93
+ new_entity: Entity to link
94
+ candidate_limit: Maximum number of candidates to consider
95
+ context: Optional tenant context for multi-tenant isolation
96
+
97
+ Returns:
98
+ LinkResult with linking decision and matched entity (if any)
99
+ """
100
+ # Try exact ID match first
101
+ existing = await self.graph_store.get_entity(new_entity.id, context=context)
102
+ if existing:
103
+ # Validate tenant match if context provided
104
+ if context and existing.tenant_id != context.tenant_id:
105
+ # ID match but wrong tenant - treat as not linked
106
+ pass
107
+ else:
108
+ return LinkResult(
109
+ linked=True,
110
+ existing_entity=existing,
111
+ new_entity=new_entity,
112
+ similarity=1.0,
113
+ link_type="exact_id",
114
+ )
115
+
116
+ # Try embedding-based search (fast, semantic)
117
+ if self.use_embeddings and new_entity.embedding:
118
+ link_result = await self._link_by_embedding(
119
+ new_entity, candidate_limit, context
120
+ )
121
+ if link_result.linked:
122
+ return link_result
123
+
124
+ # Try name-based search (fallback)
125
+ link_result = await self._link_by_name(new_entity, candidate_limit, context)
126
+
127
+ return link_result
128
+
129
+ async def link_entities(
130
+ self,
131
+ new_entities: List[Entity],
132
+ candidate_limit: int = 10,
133
+ context: Optional[TenantContext] = None,
134
+ ) -> List["LinkResult"]:
135
+ """
136
+ Link multiple entities in batch
137
+
138
+ **Tenant Isolation**: When context is provided, all linking operations
139
+ are scoped to the specified tenant.
140
+
141
+ Args:
142
+ new_entities: List of entities to link
143
+ candidate_limit: Maximum candidates per entity
144
+ context: Optional tenant context for multi-tenant isolation
145
+
146
+ Returns:
147
+ List of LinkResult objects (one per input entity)
148
+ """
149
+ results = []
150
+ for entity in new_entities:
151
+ result = await self.link_entity(entity, candidate_limit, context)
152
+ results.append(result)
153
+ return results
154
+
155
+ async def _link_by_embedding(
156
+ self,
157
+ new_entity: Entity,
158
+ candidate_limit: int,
159
+ context: Optional[TenantContext] = None,
160
+ ) -> "LinkResult":
161
+ """
162
+ Link entity using embedding similarity search
163
+
164
+ Args:
165
+ new_entity: Entity to link
166
+ candidate_limit: Maximum candidates to consider
167
+ context: Optional tenant context for scoping search
168
+
169
+ Returns:
170
+ LinkResult
171
+ """
172
+ if not new_entity.embedding:
173
+ return LinkResult(linked=False, new_entity=new_entity)
174
+
175
+ try:
176
+ # Vector search in graph (with tenant context)
177
+ candidates = await self.graph_store.vector_search(
178
+ query_embedding=new_entity.embedding,
179
+ entity_type=new_entity.entity_type,
180
+ max_results=candidate_limit,
181
+ score_threshold=self.embedding_threshold,
182
+ context=context,
183
+ )
184
+
185
+ if not candidates:
186
+ return LinkResult(linked=False, new_entity=new_entity)
187
+
188
+ # Get best candidate
189
+ best_entity, best_score = candidates[0]
190
+
191
+ # Check if score meets threshold
192
+ if best_score >= self.embedding_threshold:
193
+ # Also verify name similarity (sanity check)
194
+ name_match = self._check_name_similarity(new_entity, best_entity)
195
+
196
+ if name_match or best_score >= 0.95: # High embedding score = trust it
197
+ return LinkResult(
198
+ linked=True,
199
+ existing_entity=best_entity,
200
+ new_entity=new_entity,
201
+ similarity=best_score,
202
+ link_type="embedding",
203
+ )
204
+
205
+ except NotImplementedError:
206
+ # Graph store doesn't support vector search
207
+ pass
208
+ except Exception as e:
209
+ # Log error but don't fail
210
+ print(f"Warning: Embedding search failed: {e}")
211
+
212
+ return LinkResult(linked=False, new_entity=new_entity)
213
+
214
+ async def _link_by_name(
215
+ self,
216
+ new_entity: Entity,
217
+ candidate_limit: int,
218
+ context: Optional[TenantContext] = None,
219
+ ) -> "LinkResult":
220
+ """
221
+ Link entity using name-based matching
222
+
223
+ Uses text search when available for efficient name-based queries,
224
+ otherwise falls back to candidate enumeration and fuzzy matching.
225
+
226
+ Strategy:
227
+ 1. Try text_search if available (most efficient for name queries)
228
+ 2. Otherwise get candidate entities and compare names
229
+ 3. Return best match if above threshold
230
+
231
+ Args:
232
+ new_entity: Entity to link
233
+ candidate_limit: Maximum candidates to consider
234
+ context: Optional tenant context for scoping search
235
+
236
+ Returns:
237
+ LinkResult
238
+ """
239
+ new_name = self._get_entity_name(new_entity)
240
+ if not new_name:
241
+ return LinkResult(linked=False, new_entity=new_entity)
242
+
243
+ try:
244
+ # Try text_search first if available (optimized for name-based queries)
245
+ if hasattr(self.graph_store, "text_search"):
246
+ try:
247
+ # Use text_search to find entities with similar names
248
+ # This is more efficient than enumerating all candidates
249
+ text_results = await self.graph_store.text_search(
250
+ query_text=new_name,
251
+ entity_type=new_entity.entity_type,
252
+ max_results=candidate_limit,
253
+ score_threshold=self.similarity_threshold,
254
+ method="levenshtein", # Good for name matching
255
+ context=context,
256
+ )
257
+
258
+ if text_results:
259
+ # Get best match from text search results
260
+ best_entity, best_score = text_results[0]
261
+
262
+ # Verify name similarity meets threshold (text_search may use different scoring)
263
+ candidate_name = self._get_entity_name(best_entity)
264
+ if candidate_name:
265
+ # Recompute similarity using our method for consistency
266
+ name_similarity = self._name_similarity(
267
+ new_name, candidate_name, entity_type=new_entity.entity_type
268
+ )
269
+
270
+ if name_similarity >= self.similarity_threshold:
271
+ return LinkResult(
272
+ linked=True,
273
+ existing_entity=best_entity,
274
+ new_entity=new_entity,
275
+ similarity=name_similarity,
276
+ link_type="name",
277
+ )
278
+ except (ValueError, TypeError, NotImplementedError):
279
+ # text_search may not be available or may not support this pattern
280
+ # Fall through to candidate enumeration approach
281
+ pass
282
+
283
+ # Fallback: Get candidate entities and compare names manually
284
+ candidates = await self._get_candidate_entities(
285
+ new_entity.entity_type, candidate_limit, context
286
+ )
287
+
288
+ if not candidates:
289
+ return LinkResult(linked=False, new_entity=new_entity)
290
+
291
+ # Find best match
292
+ best_match = None
293
+ best_score = 0.0
294
+
295
+ for candidate in candidates:
296
+ candidate_name = self._get_entity_name(candidate)
297
+ if candidate_name:
298
+ score = self._name_similarity(
299
+ new_name, candidate_name, entity_type=new_entity.entity_type
300
+ )
301
+ if score > best_score:
302
+ best_score = score
303
+ best_match = candidate
304
+
305
+ # Check threshold
306
+ if best_score >= self.similarity_threshold and best_match:
307
+ return LinkResult(
308
+ linked=True,
309
+ existing_entity=best_match,
310
+ new_entity=new_entity,
311
+ similarity=best_score,
312
+ link_type="name",
313
+ )
314
+
315
+ except Exception as e:
316
+ print(f"Warning: Name-based linking failed: {e}")
317
+
318
+ return LinkResult(linked=False, new_entity=new_entity)
319
+
320
+ async def _get_candidate_entities(
321
+ self, entity_type: str, limit: int, context: Optional[TenantContext] = None
322
+ ) -> List[Entity]:
323
+ """
324
+ Get candidate entities for linking
325
+
326
+ Retrieves candidate entities of the specified type for entity linking operations.
327
+ Uses efficient methods when available (indexed search, text search) and falls
328
+ back to enumeration when needed.
329
+
330
+ Args:
331
+ entity_type: Entity type to filter by
332
+ limit: Maximum candidates to return
333
+ context: Optional tenant context for scoping search
334
+
335
+ Returns:
336
+ List of candidate entities (filtered by tenant if context provided)
337
+ """
338
+ try:
339
+ # Try to use get_all_entities() if available (most efficient for type filtering)
340
+ if hasattr(self.graph_store, "get_all_entities"):
341
+ candidates = await self.graph_store.get_all_entities(
342
+ entity_type=entity_type,
343
+ limit=limit,
344
+ context=context,
345
+ )
346
+ return candidates
347
+
348
+ # Fallback: Try to use text_search with empty query to get entities by type
349
+ # This works if text_search supports entity_type filtering
350
+ if hasattr(self.graph_store, "text_search"):
351
+ try:
352
+ # Use text_search with empty query to get entities by type
353
+ # Some implementations may support this pattern
354
+ results = await self.graph_store.text_search(
355
+ query_text="", # Empty query to get all
356
+ entity_type=entity_type,
357
+ max_results=limit,
358
+ score_threshold=0.0,
359
+ method="bm25",
360
+ context=context,
361
+ )
362
+ # Extract entities from (entity, score) tuples
363
+ return [entity for entity, _ in results]
364
+ except (ValueError, TypeError):
365
+ # text_search may not support empty queries, continue to next fallback
366
+ pass
367
+
368
+ # Last resort: If store has a way to enumerate entities, we could iterate
369
+ # but this is inefficient, so we return empty and rely on embedding search
370
+ # In production backends (SQLite, PostgreSQL), get_all_entities should be implemented
371
+ return []
372
+
373
+ except Exception as e:
374
+ # Log error but don't fail - entity linking can fall back to embedding search
375
+ print(f"Warning: Candidate entity retrieval failed: {e}")
376
+ return []
377
+
378
+ def _check_name_similarity(self, entity1: Entity, entity2: Entity) -> bool:
379
+ """
380
+ Quick name similarity check
381
+
382
+ Args:
383
+ entity1: First entity
384
+ entity2: Second entity
385
+
386
+ Returns:
387
+ True if names are similar enough
388
+ """
389
+ name1 = self._get_entity_name(entity1)
390
+ name2 = self._get_entity_name(entity2)
391
+
392
+ if not name1 or not name2:
393
+ return False
394
+
395
+ return self._name_similarity(
396
+ name1, name2, entity_type=entity1.entity_type
397
+ ) >= self.similarity_threshold
398
+
399
+ def _get_entity_name(self, entity: Entity) -> str:
400
+ """Extract entity name from properties"""
401
+ return entity.properties.get("name") or entity.properties.get("title") or entity.properties.get("text") or ""
402
+
403
+ def _name_similarity(
404
+ self, name1: str, name2: str, entity_type: Optional[str] = None
405
+ ) -> float:
406
+ """
407
+ Compute name similarity using fuzzy matching or SimilarityPipeline.
408
+
409
+ Args:
410
+ name1: First name
411
+ name2: Second name
412
+ entity_type: Entity type for per-type configuration (optional)
413
+
414
+ Returns:
415
+ Similarity score (0.0-1.0)
416
+ """
417
+ # Use pipeline if available (synchronous version for compatibility)
418
+ if self._similarity_pipeline is not None:
419
+ return self._similarity_pipeline.compute_similarity_sync(
420
+ name1=name1,
421
+ name2=name2,
422
+ entity_type=entity_type,
423
+ )
424
+
425
+ from difflib import SequenceMatcher
426
+
427
+ # Normalize
428
+ n1 = name1.lower().strip()
429
+ n2 = name2.lower().strip()
430
+
431
+ # Exact match
432
+ if n1 == n2:
433
+ return 1.0
434
+
435
+ # Substring match
436
+ if n1 in n2 or n2 in n1:
437
+ return 0.95
438
+
439
+ # Fuzzy match
440
+ return SequenceMatcher(None, n1, n2).ratio()
441
+
442
+ async def _name_similarity_async(
443
+ self, name1: str, name2: str, entity_type: Optional[str] = None
444
+ ) -> float:
445
+ """
446
+ Compute name similarity using SimilarityPipeline (async version).
447
+
448
+ Args:
449
+ name1: First name
450
+ name2: Second name
451
+ entity_type: Entity type for per-type configuration (optional)
452
+
453
+ Returns:
454
+ Similarity score (0.0-1.0)
455
+ """
456
+ if self._similarity_pipeline is not None:
457
+ result = await self._similarity_pipeline.compute_similarity(
458
+ name1=name1,
459
+ name2=name2,
460
+ entity_type=entity_type,
461
+ )
462
+ return result.final_score
463
+
464
+ # Fallback to sync version
465
+ return self._name_similarity(name1, name2, entity_type)
466
+
467
+ def set_similarity_pipeline(self, pipeline: "SimilarityPipeline") -> None:
468
+ """
469
+ Set the similarity pipeline for enhanced matching.
470
+
471
+ Args:
472
+ pipeline: SimilarityPipeline instance
473
+ """
474
+ self._similarity_pipeline = pipeline
475
+
476
+ @property
477
+ def similarity_pipeline(self) -> Optional["SimilarityPipeline"]:
478
+ """Get the current similarity pipeline."""
479
+ return self._similarity_pipeline
480
+
481
+
482
+ class LinkResult:
483
+ """
484
+ Result of entity linking operation
485
+
486
+ Attributes:
487
+ linked: Whether a link was found
488
+ existing_entity: The existing entity (if linked)
489
+ new_entity: The new entity being linked
490
+ similarity: Similarity score (0.0-1.0)
491
+ link_type: Type of link ("exact_id", "embedding", "name", "none")
492
+ """
493
+
494
+ def __init__(
495
+ self,
496
+ linked: bool,
497
+ new_entity: Entity,
498
+ existing_entity: Optional[Entity] = None,
499
+ similarity: float = 0.0,
500
+ link_type: str = "none",
501
+ ):
502
+ self.linked = linked
503
+ self.existing_entity = existing_entity
504
+ self.new_entity = new_entity
505
+ self.similarity = similarity
506
+ self.link_type = link_type
507
+
508
+ def __repr__(self) -> str:
509
+ if self.linked:
510
+ return f"LinkResult(linked=True, type={self.link_type}, similarity={self.similarity:.2f})"
511
+ return "LinkResult(linked=False)"