aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +435 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3949 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1731 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +894 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +377 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +230 -37
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +328 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +415 -0
  199. aiecs/llm/clients/googleai_client.py +314 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +1186 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1464 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1016 -0
  271. aiecs/tools/docs/document_writer_tool.py +2008 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +220 -141
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
  321. aiecs-1.7.17.dist-info/RECORD +337 -0
  322. aiecs-1.7.17.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,490 @@
1
+ """
2
+ Graph Builder - Main Pipeline Orchestrator
3
+
4
+ Orchestrates the full document-to-graph conversion pipeline.
5
+ """
6
+
7
+ import asyncio
8
+ from typing import List, Optional, Dict, Any, Callable, cast, TYPE_CHECKING
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ import logging
12
+
13
+ from aiecs.domain.knowledge_graph.schema.graph_schema import GraphSchema
14
+ from aiecs.infrastructure.graph_storage.base import GraphStore
15
+ from aiecs.application.knowledge_graph.extractors.base import (
16
+ EntityExtractor,
17
+ RelationExtractor,
18
+ )
19
+ from aiecs.application.knowledge_graph.fusion.entity_deduplicator import (
20
+ EntityDeduplicator,
21
+ )
22
+ from aiecs.application.knowledge_graph.fusion.entity_linker import EntityLinker
23
+ from aiecs.application.knowledge_graph.fusion.relation_deduplicator import (
24
+ RelationDeduplicator,
25
+ )
26
+ from aiecs.application.knowledge_graph.validators.relation_validator import (
27
+ RelationValidator,
28
+ )
29
+
30
+ if TYPE_CHECKING:
31
+ from aiecs.llm.protocols import LLMClientProtocol
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ @dataclass
37
+ class BuildResult:
38
+ """
39
+ Result of graph building operation
40
+
41
+ Attributes:
42
+ success: Whether build completed successfully
43
+ entities_added: Number of entities added to graph
44
+ relations_added: Number of relations added to graph
45
+ entities_linked: Number of entities linked to existing entities
46
+ entities_deduplicated: Number of entities deduplicated
47
+ relations_deduplicated: Number of relations deduplicated
48
+ errors: List of errors encountered
49
+ warnings: List of warnings
50
+ metadata: Additional metadata about the build
51
+ start_time: When build started
52
+ end_time: When build ended
53
+ duration_seconds: Total duration in seconds
54
+ """
55
+
56
+ success: bool = True
57
+ entities_added: int = 0
58
+ relations_added: int = 0
59
+ entities_linked: int = 0
60
+ entities_deduplicated: int = 0
61
+ relations_deduplicated: int = 0
62
+ errors: List[str] = field(default_factory=list)
63
+ warnings: List[str] = field(default_factory=list)
64
+ metadata: Dict[str, Any] = field(default_factory=dict)
65
+ start_time: Optional[datetime] = None
66
+ end_time: Optional[datetime] = None
67
+ duration_seconds: float = 0.0
68
+
69
+
70
+ class GraphBuilder:
71
+ """
72
+ Main pipeline for building knowledge graphs from text
73
+
74
+ The pipeline:
75
+ 1. Extract entities from text
76
+ 2. Deduplicate entities
77
+ 3. Link entities to existing graph
78
+ 4. Extract relations between entities
79
+ 5. Validate relations
80
+ 6. Deduplicate relations
81
+ 7. Store entities and relations in graph
82
+
83
+ Features:
84
+ - Async/parallel processing
85
+ - Progress callbacks
86
+ - Error handling and recovery
87
+ - Provenance tracking
88
+ - Configurable components
89
+
90
+ Example::
91
+
92
+ # Initialize components
93
+ entity_extractor = LLMEntityExtractor(schema)
94
+ relation_extractor = LLMRelationExtractor(schema)
95
+
96
+ # Create builder
97
+ builder = GraphBuilder(
98
+ graph_store=store,
99
+ entity_extractor=entity_extractor,
100
+ relation_extractor=relation_extractor,
101
+ schema=schema
102
+ )
103
+
104
+ # Build graph from text
105
+ result = await builder.build_from_text(
106
+ text="Alice works at Tech Corp.",
107
+ source="document_1.pdf"
108
+ )
109
+
110
+ print(f"Added {result.entities_added} entities, {result.relations_added} relations")
111
+ """
112
+
113
+ def __init__(
114
+ self,
115
+ graph_store: GraphStore,
116
+ entity_extractor: EntityExtractor,
117
+ relation_extractor: RelationExtractor,
118
+ schema: Optional[GraphSchema] = None,
119
+ enable_deduplication: bool = True,
120
+ enable_linking: bool = True,
121
+ enable_validation: bool = True,
122
+ progress_callback: Optional[Callable[[str, float], None]] = None,
123
+ embedding_client: Optional["LLMClientProtocol"] = None,
124
+ ):
125
+ """
126
+ Initialize graph builder
127
+
128
+ Args:
129
+ graph_store: Graph storage to save entities/relations
130
+ entity_extractor: Entity extractor to use
131
+ relation_extractor: Relation extractor to use
132
+ schema: Optional schema for validation
133
+ enable_deduplication: Enable entity/relation deduplication
134
+ enable_linking: Enable linking to existing entities
135
+ enable_validation: Enable relation validation
136
+ progress_callback: Optional callback for progress updates (message, progress_pct)
137
+ embedding_client: Optional custom LLM client for generating embeddings
138
+ """
139
+ self.graph_store = graph_store
140
+ self.entity_extractor = entity_extractor
141
+ self.relation_extractor = relation_extractor
142
+ self.schema = schema
143
+ self.enable_deduplication = enable_deduplication
144
+ self.enable_linking = enable_linking
145
+ self.enable_validation = enable_validation
146
+ self.progress_callback = progress_callback
147
+ self.embedding_client = embedding_client
148
+
149
+ # Initialize fusion components
150
+ self.entity_deduplicator = EntityDeduplicator() if enable_deduplication else None
151
+ self.entity_linker = EntityLinker(graph_store) if enable_linking else None
152
+ self.relation_deduplicator = RelationDeduplicator() if enable_deduplication else None
153
+ self.relation_validator = RelationValidator(schema) if enable_validation and schema else None
154
+
155
+ @staticmethod
156
+ def from_config(
157
+ graph_store: GraphStore,
158
+ entity_extractor: EntityExtractor,
159
+ relation_extractor: RelationExtractor,
160
+ schema: Optional[GraphSchema] = None,
161
+ enable_deduplication: bool = True,
162
+ enable_linking: bool = True,
163
+ enable_validation: bool = True,
164
+ progress_callback: Optional[Callable[[str, float], None]] = None,
165
+ ) -> "GraphBuilder":
166
+ """
167
+ Create GraphBuilder with embedding client resolved from configuration
168
+
169
+ This factory method automatically resolves the embedding client from
170
+ the global Settings configuration using LLMClientFactory.
171
+
172
+ Args:
173
+ graph_store: Graph storage to save entities/relations
174
+ entity_extractor: Entity extractor to use
175
+ relation_extractor: Relation extractor to use
176
+ schema: Optional schema for validation
177
+ enable_deduplication: Enable entity/relation deduplication
178
+ enable_linking: Enable linking to existing entities
179
+ enable_validation: Enable relation validation
180
+ progress_callback: Optional callback for progress updates
181
+
182
+ Returns:
183
+ GraphBuilder instance with configured embedding client
184
+
185
+ Example::
186
+
187
+ from aiecs.config import get_settings
188
+ from aiecs.llm.factory import LLMClientFactory
189
+
190
+ # Register custom embedding provider
191
+ LLMClientFactory.register_custom_provider("my_embedder", my_client)
192
+
193
+ # Set environment variable
194
+ os.environ["KG_EMBEDDING_PROVIDER"] = "my_embedder"
195
+
196
+ # Create builder with auto-resolved embedding client
197
+ builder = GraphBuilder.from_config(
198
+ graph_store=store,
199
+ entity_extractor=extractor,
200
+ relation_extractor=rel_extractor
201
+ )
202
+ """
203
+ from aiecs.config import get_settings
204
+ from aiecs.llm import resolve_llm_client
205
+
206
+ settings = get_settings()
207
+
208
+ # Resolve embedding client from configuration
209
+ embedding_client = None
210
+ if settings.kg_embedding_provider:
211
+ try:
212
+ embedding_client = resolve_llm_client(
213
+ provider=settings.kg_embedding_provider,
214
+ model=settings.kg_embedding_model,
215
+ )
216
+ logger.info(
217
+ f"Using embedding provider: {settings.kg_embedding_provider} "
218
+ f"with model: {settings.kg_embedding_model}"
219
+ )
220
+ except Exception as e:
221
+ logger.warning(f"Failed to resolve embedding client from config: {e}")
222
+
223
+ return GraphBuilder(
224
+ graph_store=graph_store,
225
+ entity_extractor=entity_extractor,
226
+ relation_extractor=relation_extractor,
227
+ schema=schema,
228
+ enable_deduplication=enable_deduplication,
229
+ enable_linking=enable_linking,
230
+ enable_validation=enable_validation,
231
+ progress_callback=progress_callback,
232
+ embedding_client=embedding_client,
233
+ )
234
+
235
+ async def build_from_text(
236
+ self,
237
+ text: str,
238
+ source: Optional[str] = None,
239
+ metadata: Optional[Dict[str, Any]] = None,
240
+ ) -> BuildResult:
241
+ """
242
+ Build knowledge graph from text
243
+
244
+ Args:
245
+ text: Input text to process
246
+ source: Optional source identifier (document name, URL, etc.)
247
+ metadata: Optional metadata to attach to entities/relations
248
+
249
+ Returns:
250
+ BuildResult with statistics and errors
251
+ """
252
+ result = BuildResult(start_time=datetime.now())
253
+
254
+ try:
255
+ self._report_progress("Starting entity extraction", 0.1)
256
+
257
+ # Step 1: Extract entities
258
+ entities = await self.entity_extractor.extract_entities(text)
259
+
260
+ if not entities:
261
+ result.warnings.append("No entities extracted from text")
262
+ return self._finalize_result(result)
263
+
264
+ self._report_progress(f"Extracted {len(entities)} entities", 0.2)
265
+
266
+ # Step 2: Deduplicate entities (within this text)
267
+ if self.enable_deduplication and self.entity_deduplicator:
268
+ original_count = len(entities)
269
+ entities = await self.entity_deduplicator.deduplicate(entities)
270
+ result.entities_deduplicated = original_count - len(entities)
271
+ self._report_progress(f"Deduplicated to {len(entities)} entities", 0.3)
272
+
273
+ # Step 3: Link entities to existing graph
274
+ linked_entities = []
275
+ new_entities = []
276
+
277
+ if self.enable_linking and self.entity_linker:
278
+ self._report_progress("Linking entities to graph", 0.4)
279
+ link_results = await self.entity_linker.link_entities(entities)
280
+
281
+ for link_result in link_results:
282
+ if link_result.linked:
283
+ linked_entities.append(link_result.existing_entity)
284
+ result.entities_linked += 1
285
+ else:
286
+ new_entities.append(link_result.new_entity)
287
+ else:
288
+ new_entities = entities
289
+
290
+ # Combine linked and new entities for relation extraction
291
+ all_entities_with_none = linked_entities + new_entities
292
+ # Filter out None values for relation extraction
293
+ all_entities = [e for e in all_entities_with_none if e is not None]
294
+
295
+ # Step 4: Extract relations
296
+ if len(all_entities) >= 2:
297
+ self._report_progress(
298
+ f"Extracting relations from {len(all_entities)} entities",
299
+ 0.5,
300
+ )
301
+ relations = await self.relation_extractor.extract_relations(text, all_entities)
302
+ self._report_progress(f"Extracted {len(relations)} relations", 0.6)
303
+ else:
304
+ relations = []
305
+ result.warnings.append("Not enough entities for relation extraction")
306
+
307
+ # Step 5: Validate relations
308
+ valid_relations = relations
309
+ if self.enable_validation and self.relation_validator and relations:
310
+ self._report_progress("Validating relations", 0.7)
311
+ valid_relations = self.relation_validator.filter_valid_relations(relations, all_entities)
312
+ invalid_count = len(relations) - len(valid_relations)
313
+ if invalid_count > 0:
314
+ result.warnings.append(f"{invalid_count} relations failed validation")
315
+
316
+ # Step 6: Deduplicate relations
317
+ if self.enable_deduplication and self.relation_deduplicator and valid_relations:
318
+ original_count = len(valid_relations)
319
+ valid_relations = await self.relation_deduplicator.deduplicate(valid_relations)
320
+ result.relations_deduplicated = original_count - len(valid_relations)
321
+ self._report_progress(f"Deduplicated to {len(valid_relations)} relations", 0.8)
322
+
323
+ # Step 7: Generate embeddings for entities
324
+ if self.embedding_client and new_entities:
325
+ self._report_progress("Generating embeddings for entities", 0.85)
326
+ await self._generate_embeddings_for_entities(new_entities)
327
+
328
+ # Step 8: Store in graph
329
+ self._report_progress("Storing entities and relations in graph", 0.9)
330
+
331
+ # Add provenance metadata
332
+ if source or metadata:
333
+ provenance = {"source": source} if source else {}
334
+ if metadata:
335
+ provenance.update(metadata)
336
+
337
+ # Add provenance to entities
338
+ for entity in new_entities:
339
+ if not entity.properties:
340
+ entity.properties = {}
341
+ entity.properties["_provenance"] = provenance
342
+
343
+ # Add provenance to relations
344
+ for relation in valid_relations:
345
+ if not relation.properties:
346
+ relation.properties = {}
347
+ relation.properties["_provenance"] = provenance
348
+
349
+ # Store entities
350
+ for entity in new_entities:
351
+ await self.graph_store.add_entity(entity)
352
+ result.entities_added += 1
353
+
354
+ # Store relations
355
+ for relation in valid_relations:
356
+ await self.graph_store.add_relation(relation)
357
+ result.relations_added += 1
358
+
359
+ self._report_progress("Build complete", 1.0)
360
+
361
+ except Exception as e:
362
+ result.success = False
363
+ result.errors.append(f"Build failed: {str(e)}")
364
+
365
+ return self._finalize_result(result)
366
+
367
+ async def build_batch(
368
+ self,
369
+ texts: List[str],
370
+ sources: Optional[List[str]] = None,
371
+ parallel: bool = True,
372
+ max_parallel: int = 5,
373
+ ) -> List[BuildResult]:
374
+ """
375
+ Build graph from multiple texts in batch
376
+
377
+ Args:
378
+ texts: List of texts to process
379
+ sources: Optional list of source identifiers (same length as texts)
380
+ parallel: Process in parallel (default: True)
381
+ max_parallel: Maximum parallel tasks (default: 5)
382
+
383
+ Returns:
384
+ List of BuildResult objects (one per text)
385
+ """
386
+ if sources and len(sources) != len(texts):
387
+ raise ValueError("sources list must match texts list length")
388
+
389
+ if not sources:
390
+ sources = [f"text_{i}" for i in range(len(texts))]
391
+
392
+ if parallel:
393
+ # Process in parallel with semaphore for concurrency control
394
+ semaphore = asyncio.Semaphore(max_parallel)
395
+
396
+ async def process_one(text, source):
397
+ async with semaphore:
398
+ return await self.build_from_text(text, source)
399
+
400
+ tasks = [process_one(text, source) for text, source in zip(texts, sources)]
401
+ results = await asyncio.gather(*tasks, return_exceptions=True)
402
+
403
+ # Handle exceptions
404
+ for i, result in enumerate(results):
405
+ if isinstance(result, Exception):
406
+ error_result = BuildResult(success=False)
407
+ error_result.errors.append(str(result))
408
+ results[i] = error_result
409
+
410
+ return cast(List[BuildResult], results)
411
+ else:
412
+ # Process sequentially
413
+ sequential_results: List[BuildResult] = []
414
+ for text, source in zip(texts, sources):
415
+ result = await self.build_from_text(text, source)
416
+ sequential_results.append(result)
417
+ return sequential_results
418
+
419
+ def _report_progress(self, message: str, progress: float):
420
+ """
421
+ Report progress via callback
422
+
423
+ Args:
424
+ message: Progress message
425
+ progress: Progress percentage (0.0-1.0)
426
+ """
427
+ if self.progress_callback:
428
+ try:
429
+ self.progress_callback(message, progress)
430
+ except Exception as e:
431
+ # Don't let callback errors break the pipeline
432
+ print(f"Warning: Progress callback error: {e}")
433
+
434
+ def _finalize_result(self, result: BuildResult) -> BuildResult:
435
+ """
436
+ Finalize build result with timing information
437
+
438
+ Args:
439
+ result: BuildResult to finalize
440
+
441
+ Returns:
442
+ Finalized BuildResult
443
+ """
444
+ result.end_time = datetime.now()
445
+ if result.start_time:
446
+ result.duration_seconds = (result.end_time - result.start_time).total_seconds()
447
+ return result
448
+
449
+ async def _generate_embeddings_for_entities(
450
+ self, entities: List[Any], model: Optional[str] = None
451
+ ) -> None:
452
+ """
453
+ Generate embeddings for entities using the configured embedding client
454
+
455
+ Args:
456
+ entities: List of entities to generate embeddings for
457
+ model: Optional model name for embedding generation
458
+
459
+ Note:
460
+ This method modifies entities in-place by setting their embedding attribute.
461
+ If no embedding client is configured, entities will not have embeddings.
462
+ """
463
+ if not self.embedding_client or not entities:
464
+ return
465
+
466
+ try:
467
+ # Prepare texts for embedding (use entity name or string representation)
468
+ texts = []
469
+ for entity in entities:
470
+ # Try to get a meaningful text representation
471
+ name = entity.properties.get("name") if entity.properties else None
472
+ if name:
473
+ text = f"{entity.entity_type}: {name}"
474
+ else:
475
+ text = f"{entity.entity_type}: {entity.id}"
476
+ texts.append(text)
477
+
478
+ # Generate embeddings
479
+ embeddings = await self.embedding_client.get_embeddings(texts, model=model)
480
+
481
+ # Assign embeddings to entities
482
+ for entity, embedding in zip(entities, embeddings):
483
+ entity.embedding = embedding
484
+
485
+ logger.debug(f"Generated embeddings for {len(entities)} entities")
486
+
487
+ except NotImplementedError:
488
+ logger.debug("Embedding client does not support get_embeddings()")
489
+ except Exception as e:
490
+ logger.warning(f"Failed to generate embeddings: {e}")