aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,347 @@
1
+ """
2
+ LLM-based Relation Extractor
3
+
4
+ Extracts relations between entities using Large Language Models.
5
+ """
6
+
7
+ import json
8
+ import uuid
9
+ from typing import List, Optional
10
+ from aiecs.application.knowledge_graph.extractors.base import RelationExtractor
11
+ from aiecs.domain.knowledge_graph.models.entity import Entity
12
+ from aiecs.domain.knowledge_graph.models.relation import Relation
13
+ from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
14
+ from aiecs.llm import get_llm_manager, AIProvider, LLMClientManager
15
+
16
+
17
+ class LLMRelationExtractor(RelationExtractor):
18
+ """
19
+ Extract relations between entities using LLMs
20
+
21
+ Given text and a list of entities, identifies relationships between them.
22
+ Uses LLMs to understand semantic relationships and extract structured relations.
23
+
24
+ Features:
25
+ - Schema-aware extraction (uses relation types from schema)
26
+ - Entity-aware (only extracts relations between known entities)
27
+ - Property extraction (relation properties/attributes)
28
+ - Confidence scoring
29
+ - Directional relation support
30
+
31
+ Example:
32
+ ```python
33
+ extractor = LLMRelationExtractor(schema=graph_schema)
34
+
35
+ alice = Entity(id="e1", type="Person", properties={"name": "Alice"})
36
+ tech_corp = Entity(id="e2", type="Company", properties={"name": "Tech Corp"})
37
+
38
+ relations = await extractor.extract_relations(
39
+ text="Alice works as a senior engineer at Tech Corp.",
40
+ entities=[alice, tech_corp]
41
+ )
42
+ # Returns: [
43
+ # Relation(
44
+ # source_id="e1",
45
+ # target_id="e2",
46
+ # relation_type="WORKS_FOR",
47
+ # properties={"title": "senior engineer"}
48
+ # )
49
+ # ]
50
+ ```
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ schema: Optional[GraphSchema] = None,
56
+ provider: Optional[AIProvider] = None,
57
+ model: Optional[str] = None,
58
+ temperature: float = 0.1,
59
+ max_tokens: Optional[int] = 2000,
60
+ ):
61
+ """
62
+ Initialize LLM relation extractor
63
+
64
+ Args:
65
+ schema: Optional GraphSchema to guide extraction
66
+ provider: LLM provider (default: Vertex AI)
67
+ model: Specific model to use
68
+ temperature: LLM temperature (low for deterministic extraction)
69
+ max_tokens: Maximum tokens in response
70
+ """
71
+ self.schema = schema
72
+ self.provider = provider
73
+ self.model = model
74
+ self.temperature = temperature
75
+ self.max_tokens = max_tokens
76
+ self._llm_manager: Optional[LLMClientManager] = None # Lazy-loaded in async methods
77
+
78
+ async def extract_relations(
79
+ self,
80
+ text: str,
81
+ entities: List[Entity],
82
+ relation_types: Optional[List[str]] = None,
83
+ **kwargs,
84
+ ) -> List[Relation]:
85
+ """
86
+ Extract relations from text given known entities
87
+
88
+ Args:
89
+ text: Input text containing entities
90
+ entities: List of entities already extracted
91
+ relation_types: Optional filter for specific relation types
92
+ **kwargs: Additional parameters
93
+
94
+ Returns:
95
+ List of extracted Relation objects
96
+
97
+ Raises:
98
+ ValueError: If text or entities are empty
99
+ RuntimeError: If LLM extraction fails
100
+ """
101
+ if not text or not text.strip():
102
+ raise ValueError("Input text cannot be empty")
103
+
104
+ if not entities or len(entities) < 2:
105
+ # Need at least 2 entities to have a relation
106
+ return []
107
+
108
+ # Lazy-load LLM manager
109
+ if self._llm_manager is None:
110
+ self._llm_manager = await get_llm_manager()
111
+
112
+ # Build extraction prompt
113
+ prompt = self._build_extraction_prompt(text, entities, relation_types)
114
+
115
+ # Call LLM
116
+ try:
117
+ response = await self._llm_manager.generate_text(
118
+ messages=prompt,
119
+ provider=self.provider,
120
+ model=self.model,
121
+ temperature=self.temperature,
122
+ max_tokens=self.max_tokens,
123
+ )
124
+
125
+ # Parse LLM response to Relation objects
126
+ relations = self._parse_llm_response(response.content, entities)
127
+
128
+ return relations
129
+
130
+ except Exception as e:
131
+ raise RuntimeError(f"LLM relation extraction failed: {str(e)}") from e
132
+
133
+ def _build_extraction_prompt(
134
+ self,
135
+ text: str,
136
+ entities: List[Entity],
137
+ relation_types: Optional[List[str]] = None,
138
+ ) -> str:
139
+ """
140
+ Build prompt for LLM relation extraction
141
+
142
+ The prompt includes:
143
+ 1. Task description
144
+ 2. List of entities to consider
145
+ 3. Relation types to extract (from schema)
146
+ 4. Output format specification
147
+ 5. The text to analyze
148
+
149
+ Args:
150
+ text: Input text
151
+ entities: List of known entities
152
+ relation_types: Optional filter for relation types
153
+
154
+ Returns:
155
+ Formatted prompt string
156
+ """
157
+ # Build entity reference list
158
+ entity_list = []
159
+ entity_index = {}
160
+ for idx, entity in enumerate(entities):
161
+ entity_name = self._get_entity_name(entity)
162
+ entity_list.append(f" [{idx}] {entity.entity_type}: {entity_name} (ID: {entity.id})")
163
+ entity_index[entity.id] = idx
164
+
165
+ entities_section = "\n".join(entity_list)
166
+
167
+ # Build relation type descriptions
168
+ types_to_extract = []
169
+ if self.schema:
170
+ available_types = self.schema.get_relation_type_names()
171
+ if relation_types:
172
+ types_to_extract = [t for t in relation_types if t in available_types]
173
+ else:
174
+ types_to_extract = available_types
175
+ elif relation_types:
176
+ types_to_extract = relation_types
177
+ else:
178
+ # No schema, use common relation types
179
+ types_to_extract = [
180
+ "WORKS_FOR",
181
+ "LOCATED_IN",
182
+ "PART_OF",
183
+ "KNOWS",
184
+ "OWNS",
185
+ "MANAGES",
186
+ "PRODUCES",
187
+ "RELATED_TO",
188
+ ]
189
+
190
+ # Build relation type descriptions
191
+ relation_descriptions = []
192
+ for rel_type in types_to_extract:
193
+ if self.schema and self.schema.has_relation_type(rel_type):
194
+ schema_rel = self.schema.get_relation_type(rel_type)
195
+ if schema_rel is not None:
196
+ desc = schema_rel.description or f"'{rel_type}' relation"
197
+ relation_descriptions.append(f"- {rel_type}: {desc}")
198
+ else:
199
+ relation_descriptions.append(f"- {rel_type}: Extract this type of relationship")
200
+ else:
201
+ relation_descriptions.append(f"- {rel_type}: Extract this type of relationship")
202
+
203
+ relations_section = "\n".join(relation_descriptions)
204
+
205
+ # Build prompt
206
+ prompt = f"""You are an expert at extracting relationships between entities from text.
207
+
208
+ Given the following entities:
209
+ {entities_section}
210
+
211
+ Extract all relationships between these entities from the text.
212
+
213
+ Allowed relation types:
214
+ {relations_section}
215
+
216
+ For each relation, provide:
217
+ 1. source_id: ID of the source entity (from list above)
218
+ 2. target_id: ID of the target entity (from list above)
219
+ 3. relation_type: Type of relation (one of the allowed types)
220
+ 4. properties: Optional dictionary of relation properties (e.g., since="2020", role="engineer")
221
+ 5. confidence: Your confidence in this extraction (0.0 to 1.0)
222
+
223
+ Return ONLY a valid JSON array with this structure:
224
+ [
225
+ {{
226
+ "source_id": "entity_id_here",
227
+ "target_id": "entity_id_here",
228
+ "relation_type": "RELATION_TYPE",
229
+ "properties": {{"property1": "value1"}},
230
+ "confidence": 0.95
231
+ }}
232
+ ]
233
+
234
+ Important:
235
+ - Only extract relations that are explicitly stated or strongly implied in the text
236
+ - Use the exact entity IDs from the list above
237
+ - Relations should be directional (source -> target matters)
238
+ - If unsure about a property, omit it
239
+ - Return empty array [] if no relations found
240
+
241
+ Text to analyze:
242
+ \"\"\"{text}\"\"\"
243
+
244
+ JSON output:"""
245
+
246
+ return prompt
247
+
248
+ def _parse_llm_response(self, response_text: str, entities: List[Entity]) -> List[Relation]:
249
+ """
250
+ Parse LLM response to Relation objects
251
+
252
+ Expected JSON format:
253
+ [
254
+ {
255
+ "source_id": "e1",
256
+ "target_id": "e2",
257
+ "relation_type": "WORKS_FOR",
258
+ "properties": {"title": "engineer"},
259
+ "confidence": 0.95
260
+ }
261
+ ]
262
+
263
+ Args:
264
+ response_text: LLM response string
265
+ entities: List of entities for validation
266
+
267
+ Returns:
268
+ List of Relation objects
269
+ """
270
+ relations = []
271
+ entity_ids = {e.id for e in entities}
272
+
273
+ try:
274
+ # Extract JSON from response
275
+ json_str = self._extract_json_from_text(response_text)
276
+
277
+ # Parse JSON
278
+ extracted_data = json.loads(json_str)
279
+
280
+ if not isinstance(extracted_data, list):
281
+ extracted_data = [extracted_data]
282
+
283
+ # Convert to Relation objects
284
+ for item in extracted_data:
285
+ source_id = item.get("source_id")
286
+ target_id = item.get("target_id")
287
+ relation_type = item.get("relation_type")
288
+ properties = item.get("properties", {})
289
+ confidence = item.get("confidence", 0.5)
290
+
291
+ # Validate required fields
292
+ if not source_id or not target_id:
293
+ continue
294
+ if not relation_type: # relation_type is required and cannot be None
295
+ continue
296
+ if source_id not in entity_ids or target_id not in entity_ids:
297
+ # LLM hallucinated entity IDs
298
+ continue
299
+ if source_id == target_id:
300
+ # Self-loop, skip
301
+ continue
302
+
303
+ # Generate unique ID
304
+ relation_id = str(uuid.uuid4())
305
+
306
+ # Create Relation
307
+ relation = Relation(
308
+ id=relation_id,
309
+ relation_type=relation_type,
310
+ source_id=source_id,
311
+ target_id=target_id,
312
+ properties=properties,
313
+ )
314
+
315
+ # Store confidence
316
+ relation.properties["_extraction_confidence"] = confidence
317
+
318
+ relations.append(relation)
319
+
320
+ except json.JSONDecodeError as e:
321
+ print(f"Warning: Failed to parse LLM response as JSON: {e}")
322
+ print(f"Response was: {response_text[:200]}...")
323
+ return []
324
+
325
+ return relations
326
+
327
+ def _extract_json_from_text(self, text: str) -> str:
328
+ """Extract JSON array from text"""
329
+ # Find JSON array boundaries
330
+ start = text.find("[")
331
+ end = text.rfind("]") + 1
332
+
333
+ if start != -1 and end > start:
334
+ return text[start:end]
335
+
336
+ # Try single object
337
+ start = text.find("{")
338
+ end = text.rfind("}") + 1
339
+
340
+ if start != -1 and end > start:
341
+ return text[start:end]
342
+
343
+ return text
344
+
345
+ def _get_entity_name(self, entity: Entity) -> str:
346
+ """Extract entity name from properties"""
347
+ return entity.properties.get("name") or entity.properties.get("title") or entity.properties.get("text") or f"{entity.entity_type}_{entity.id[:8]}"
@@ -0,0 +1,241 @@
1
+ """
2
+ spaCy NER-based Entity Extractor
3
+
4
+ Extracts entities using spaCy's Named Entity Recognition.
5
+ Fast, offline, and cost-free alternative to LLM extraction.
6
+ """
7
+
8
+ from typing import List, Optional
9
+ import spacy
10
+ from spacy.language import Language
11
+
12
+ from aiecs.application.knowledge_graph.extractors.base import EntityExtractor
13
+ from aiecs.domain.knowledge_graph.models.entity import Entity
14
+
15
+
16
+ class NEREntityExtractor(EntityExtractor):
17
+ """
18
+ Extract entities using spaCy Named Entity Recognition
19
+
20
+ This extractor uses spaCy's pre-trained NER models to identify entities.
21
+ It's fast, free, and works offline, but limited to standard NER types.
22
+
23
+ Features:
24
+ - Fast extraction (no API calls)
25
+ - Works offline
26
+ - No cost
27
+ - Standard NER types (PERSON, ORG, GPE, LOC, DATE, etc.)
28
+
29
+ Limitations:
30
+ - Only standard entity types (no custom types)
31
+ - Limited property extraction (mainly just entity text)
32
+ - Lower quality than LLM extraction
33
+
34
+ Use Cases:
35
+ - Development and testing
36
+ - Cost-sensitive scenarios
37
+ - High-volume extraction where LLM is too expensive
38
+ - Baseline for comparison
39
+
40
+ Example:
41
+ ```python
42
+ extractor = NEREntityExtractor(model="en_core_web_sm")
43
+
44
+ entities = await extractor.extract_entities(
45
+ "Alice works at Tech Corp in San Francisco."
46
+ )
47
+ # Returns: [
48
+ # Entity(type="Person", properties={"name": "Alice", "text": "Alice"}),
49
+ # Entity(type="Organization", properties={"name": "Tech Corp", "text": "Tech Corp"}),
50
+ # Entity(type="Location", properties={"name": "San Francisco", "text": "San Francisco"})
51
+ # ]
52
+ ```
53
+ """
54
+
55
+ # Mapping from spaCy NER labels to generic entity types
56
+ LABEL_MAPPING = {
57
+ "PERSON": "Person",
58
+ "PER": "Person",
59
+ "ORG": "Organization",
60
+ "ORGANIZATION": "Organization",
61
+ "GPE": "Location", # Geo-Political Entity
62
+ "LOC": "Location",
63
+ "LOCATION": "Location",
64
+ "FAC": "Facility",
65
+ "FACILITY": "Facility",
66
+ "PRODUCT": "Product",
67
+ "EVENT": "Event",
68
+ "WORK_OF_ART": "WorkOfArt",
69
+ "LAW": "Law",
70
+ "LANGUAGE": "Language",
71
+ "DATE": "Date",
72
+ "TIME": "Time",
73
+ "PERCENT": "Percentage",
74
+ "MONEY": "Money",
75
+ "QUANTITY": "Quantity",
76
+ "ORDINAL": "Ordinal",
77
+ "CARDINAL": "Cardinal",
78
+ }
79
+
80
+ def __init__(
81
+ self,
82
+ model: str = "en_core_web_sm",
83
+ disable_components: Optional[List[str]] = None,
84
+ ):
85
+ """
86
+ Initialize NER entity extractor
87
+
88
+ Args:
89
+ model: spaCy model name (default: "en_core_web_sm")
90
+ Available models:
91
+ - en_core_web_sm: Small English model (~13MB)
92
+ - en_core_web_md: Medium English model (~40MB)
93
+ - en_core_web_lg: Large English model (~560MB)
94
+ disable_components: spaCy pipeline components to disable (for speed)
95
+ Default: disable all except NER
96
+ """
97
+ self.model_name = model
98
+
99
+ try:
100
+ # Load spaCy model
101
+ if disable_components is None:
102
+ # Disable everything except NER for speed
103
+ disable_components = [
104
+ "tok2vec",
105
+ "tagger",
106
+ "parser",
107
+ "attribute_ruler",
108
+ "lemmatizer",
109
+ ]
110
+
111
+ self.nlp: Language = spacy.load(model, disable=disable_components)
112
+ except OSError as e:
113
+ raise RuntimeError(f"spaCy model '{model}' not found. " f"Install it with: python -m spacy download {model}") from e
114
+
115
+ async def extract_entities(self, text: str, entity_types: Optional[List[str]] = None, **kwargs) -> List[Entity]:
116
+ """
117
+ Extract entities from text using spaCy NER
118
+
119
+ Args:
120
+ text: Input text to extract entities from
121
+ entity_types: Optional filter for specific entity types
122
+ (will be matched against LABEL_MAPPING values)
123
+ **kwargs: Additional parameters (unused for NER)
124
+
125
+ Returns:
126
+ List of extracted Entity objects
127
+
128
+ Raises:
129
+ ValueError: If text is empty
130
+ """
131
+ if not text or not text.strip():
132
+ raise ValueError("Input text cannot be empty")
133
+
134
+ # Process text with spaCy
135
+ doc = self.nlp(text)
136
+
137
+ # Extract entities
138
+ entities = []
139
+ seen_texts = set() # Simple deduplication within same text
140
+
141
+ for ent in doc.ents:
142
+ # Map spaCy label to generic entity type
143
+ entity_type = self.LABEL_MAPPING.get(ent.label_, ent.label_)
144
+
145
+ # Filter by entity type if requested
146
+ if entity_types and entity_type not in entity_types:
147
+ continue
148
+
149
+ # Simple deduplication: skip if we've seen this exact text already
150
+ entity_text = ent.text.strip()
151
+ if entity_text in seen_texts:
152
+ continue
153
+ seen_texts.add(entity_text)
154
+
155
+ # Create entity
156
+ entity = Entity(
157
+ id=self._generate_entity_id(entity_type, entity_text),
158
+ entity_type=entity_type,
159
+ properties={
160
+ "name": entity_text,
161
+ "text": entity_text,
162
+ "label": ent.label_, # Original spaCy label
163
+ "start_char": ent.start_char,
164
+ "end_char": ent.end_char,
165
+ "_extraction_confidence": self._estimate_confidence(ent),
166
+ },
167
+ )
168
+
169
+ entities.append(entity)
170
+
171
+ return entities
172
+
173
+ def _generate_entity_id(self, entity_type: str, text: str) -> str:
174
+ """
175
+ Generate a unique ID for an entity
176
+
177
+ Args:
178
+ entity_type: Entity type name
179
+ text: Entity text
180
+
181
+ Returns:
182
+ Unique entity ID string
183
+ """
184
+ # Create deterministic ID from type + text
185
+ normalized = f"{entity_type}_{text}".lower().replace(" ", "_")
186
+ # Add short hash for uniqueness
187
+ import hashlib
188
+
189
+ hash_suffix = hashlib.md5(normalized.encode()).hexdigest()[:8]
190
+ return f"{normalized}_{hash_suffix}"
191
+
192
+ def _estimate_confidence(self, ent) -> float:
193
+ """
194
+ Estimate confidence for NER extraction
195
+
196
+ spaCy doesn't provide confidence scores directly, so we use heuristics:
197
+ - Longer entities are generally more confident
198
+ - Entities with more context are more confident
199
+ - Capitalized entities (proper nouns) are more confident
200
+
201
+ Args:
202
+ ent: spaCy entity
203
+
204
+ Returns:
205
+ Confidence score (0.0-1.0)
206
+ """
207
+ # Base confidence
208
+ confidence = 0.7
209
+
210
+ # Adjust based on entity length
211
+ if len(ent.text) > 20:
212
+ confidence += 0.1
213
+ elif len(ent.text) < 3:
214
+ confidence -= 0.2
215
+
216
+ # Adjust based on capitalization (proper nouns)
217
+ if ent.text[0].isupper():
218
+ confidence += 0.1
219
+
220
+ # Clamp to [0.0, 1.0]
221
+ return max(0.0, min(1.0, confidence))
222
+
223
+ def get_supported_types(self) -> List[str]:
224
+ """
225
+ Get list of entity types that this extractor can produce
226
+
227
+ Returns:
228
+ List of entity type names
229
+ """
230
+ return list(set(self.LABEL_MAPPING.values()))
231
+
232
+ def get_available_labels(self) -> List[str]:
233
+ """
234
+ Get list of NER labels available in the loaded model
235
+
236
+ Returns:
237
+ List of spaCy NER labels
238
+ """
239
+ ner_pipe = self.nlp.get_pipe("ner")
240
+ # spaCy NER pipe has labels attribute
241
+ return ner_pipe.labels if hasattr(ner_pipe, "labels") else [] # type: ignore[attr-defined]
@@ -0,0 +1,78 @@
1
+ """
2
+ Knowledge Fusion Components
3
+
4
+ Components for deduplicating, merging, and linking entities across documents.
5
+ """
6
+
7
+ from aiecs.application.knowledge_graph.fusion.entity_deduplicator import (
8
+ EntityDeduplicator,
9
+ )
10
+ from aiecs.application.knowledge_graph.fusion.entity_linker import EntityLinker
11
+ from aiecs.application.knowledge_graph.fusion.relation_deduplicator import (
12
+ RelationDeduplicator,
13
+ )
14
+ from aiecs.application.knowledge_graph.fusion.knowledge_fusion import (
15
+ KnowledgeFusion,
16
+ )
17
+ from aiecs.application.knowledge_graph.fusion.matching_config import (
18
+ EntityTypeConfig,
19
+ FusionMatchingConfig,
20
+ load_matching_config,
21
+ load_matching_config_from_dict,
22
+ load_matching_config_from_json,
23
+ load_matching_config_from_yaml,
24
+ save_matching_config_to_dict,
25
+ save_matching_config_to_json,
26
+ save_matching_config_to_yaml,
27
+ VALID_STAGES,
28
+ DEFAULT_ENABLED_STAGES,
29
+ )
30
+ from aiecs.application.knowledge_graph.fusion.similarity_pipeline import (
31
+ SimilarityPipeline,
32
+ MatchStage,
33
+ MatchResult,
34
+ PipelineResult,
35
+ )
36
+ from aiecs.application.knowledge_graph.fusion.ab_testing import (
37
+ ABTestingFramework,
38
+ EvaluationMetrics,
39
+ ExperimentResult,
40
+ )
41
+ from aiecs.application.knowledge_graph.fusion.evaluation_dataset import (
42
+ EntityPair,
43
+ EvaluationDataset,
44
+ create_default_evaluation_dataset,
45
+ create_minimal_evaluation_dataset,
46
+ )
47
+
48
+ __all__ = [
49
+ "EntityDeduplicator",
50
+ "EntityLinker",
51
+ "RelationDeduplicator",
52
+ "KnowledgeFusion",
53
+ # Matching configuration
54
+ "EntityTypeConfig",
55
+ "FusionMatchingConfig",
56
+ "load_matching_config",
57
+ "load_matching_config_from_dict",
58
+ "load_matching_config_from_json",
59
+ "load_matching_config_from_yaml",
60
+ "save_matching_config_to_dict",
61
+ "save_matching_config_to_json",
62
+ "save_matching_config_to_yaml",
63
+ "VALID_STAGES",
64
+ "DEFAULT_ENABLED_STAGES",
65
+ # Similarity pipeline
66
+ "SimilarityPipeline",
67
+ "MatchStage",
68
+ "MatchResult",
69
+ "PipelineResult",
70
+ # Evaluation and testing
71
+ "ABTestingFramework",
72
+ "EvaluationMetrics",
73
+ "ExperimentResult",
74
+ "EntityPair",
75
+ "EvaluationDataset",
76
+ "create_default_evaluation_dataset",
77
+ "create_minimal_evaluation_dataset",
78
+ ]