aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +435 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3949 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1731 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +894 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +377 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +230 -37
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +328 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +415 -0
  199. aiecs/llm/clients/googleai_client.py +314 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +1186 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1464 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1016 -0
  271. aiecs/tools/docs/document_writer_tool.py +2008 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +220 -141
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
  321. aiecs-1.7.17.dist-info/RECORD +337 -0
  322. aiecs-1.7.17.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,293 @@
1
+ """
2
+ Data Reshaping for Knowledge Graph Import
3
+
4
+ Provides utilities to convert wide format data to normalized graph structures
5
+ and vice versa, enabling efficient import of datasets with many columns.
6
+ """
7
+
8
+ from typing import List, Optional, Dict, Any, Union
9
+ from dataclasses import dataclass
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Check for pandas availability
15
+ try:
16
+ import pandas as pd
17
+ PANDAS_AVAILABLE = True
18
+ except ImportError:
19
+ PANDAS_AVAILABLE = False
20
+
21
+
22
+ @dataclass
23
+ class ReshapeResult:
24
+ """
25
+ Result of data reshaping operation
26
+
27
+ Attributes:
28
+ data: Reshaped DataFrame
29
+ original_shape: Original (rows, cols) shape
30
+ new_shape: New (rows, cols) shape
31
+ id_columns: Columns used as identifiers
32
+ variable_column: Name of variable column (for melt)
33
+ value_column: Name of value column (for melt)
34
+ warnings: List of warnings
35
+ """
36
+ data: 'pd.DataFrame'
37
+ original_shape: tuple
38
+ new_shape: tuple
39
+ id_columns: List[str]
40
+ variable_column: Optional[str] = None
41
+ value_column: Optional[str] = None
42
+ warnings: List[str] = None
43
+
44
+ def __post_init__(self):
45
+ if self.warnings is None:
46
+ self.warnings = []
47
+
48
+
49
+ class DataReshaping:
50
+ """
51
+ Utility class for reshaping structured data
52
+
53
+ Provides methods to convert between wide and long formats,
54
+ enabling normalized graph structures from wide datasets.
55
+ """
56
+
57
+ @staticmethod
58
+ def melt(
59
+ df: 'pd.DataFrame',
60
+ id_vars: List[str],
61
+ value_vars: Optional[List[str]] = None,
62
+ var_name: str = 'variable',
63
+ value_name: str = 'value',
64
+ dropna: bool = True,
65
+ ) -> ReshapeResult:
66
+ """
67
+ Convert wide format to long format (melt operation)
68
+
69
+ Transforms data from wide format (many columns) to long format
70
+ (fewer columns, more rows), which is ideal for normalized graph structures.
71
+
72
+ Args:
73
+ df: DataFrame to reshape
74
+ id_vars: Columns to use as identifier variables
75
+ value_vars: Columns to unpivot (default: all columns except id_vars)
76
+ var_name: Name for the variable column (default: 'variable')
77
+ value_name: Name for the value column (default: 'value')
78
+ dropna: Whether to drop rows with missing values (default: True)
79
+
80
+ Returns:
81
+ ReshapeResult with reshaped data and metadata
82
+
83
+ Example:
84
+ ```python
85
+ # Wide format: sample_id, option1, option2, option3
86
+ # Long format: sample_id, variable, value
87
+
88
+ result = DataReshaping.melt(
89
+ df,
90
+ id_vars=['sample_id'],
91
+ value_vars=['option1', 'option2', 'option3'],
92
+ var_name='option_name',
93
+ value_name='option_value'
94
+ )
95
+ ```
96
+ """
97
+ if not PANDAS_AVAILABLE:
98
+ raise ImportError("pandas is required for data reshaping")
99
+
100
+ original_shape = df.shape
101
+ warnings = []
102
+
103
+ # If value_vars not specified, use all columns except id_vars
104
+ if value_vars is None:
105
+ value_vars = [col for col in df.columns if col not in id_vars]
106
+ warnings.append(f"Auto-detected {len(value_vars)} value columns")
107
+
108
+ # Perform melt operation
109
+ melted = pd.melt(
110
+ df,
111
+ id_vars=id_vars,
112
+ value_vars=value_vars,
113
+ var_name=var_name,
114
+ value_name=value_name,
115
+ )
116
+
117
+ # Drop NA values if requested
118
+ if dropna:
119
+ rows_before = len(melted)
120
+ melted = melted.dropna(subset=[value_name])
121
+ rows_dropped = rows_before - len(melted)
122
+ if rows_dropped > 0:
123
+ warnings.append(f"Dropped {rows_dropped} rows with missing values")
124
+
125
+ new_shape = melted.shape
126
+
127
+ return ReshapeResult(
128
+ data=melted,
129
+ original_shape=original_shape,
130
+ new_shape=new_shape,
131
+ id_columns=id_vars,
132
+ variable_column=var_name,
133
+ value_column=value_name,
134
+ warnings=warnings,
135
+ )
136
+
137
+ @staticmethod
138
+ def pivot(
139
+ df: 'pd.DataFrame',
140
+ index: Union[str, List[str]],
141
+ columns: str,
142
+ values: str,
143
+ aggfunc: str = 'first',
144
+ fill_value: Optional[Any] = None,
145
+ ) -> ReshapeResult:
146
+ """
147
+ Convert long format to wide format (pivot operation)
148
+
149
+ Transforms data from long format to wide format, creating columns
150
+ from unique values in the specified column.
151
+
152
+ Args:
153
+ df: DataFrame to reshape
154
+ index: Column(s) to use as index (identifier)
155
+ columns: Column whose unique values become new columns
156
+ values: Column containing values to populate the new columns
157
+ aggfunc: Aggregation function if multiple values per group (default: 'first')
158
+ fill_value: Value to use for missing data (default: None)
159
+
160
+ Returns:
161
+ ReshapeResult with reshaped data and metadata
162
+
163
+ Example:
164
+ ```python
165
+ # Long format: sample_id, option_name, option_value
166
+ # Wide format: sample_id, option1, option2, option3
167
+
168
+ result = DataReshaping.pivot(
169
+ df,
170
+ index='sample_id',
171
+ columns='option_name',
172
+ values='option_value'
173
+ )
174
+ ```
175
+ """
176
+ if not PANDAS_AVAILABLE:
177
+ raise ImportError("pandas is required for data reshaping")
178
+
179
+ original_shape = df.shape
180
+ warnings = []
181
+
182
+ # Perform pivot operation
183
+ try:
184
+ pivoted = df.pivot_table(
185
+ index=index,
186
+ columns=columns,
187
+ values=values,
188
+ aggfunc=aggfunc,
189
+ fill_value=fill_value,
190
+ )
191
+
192
+ # Reset index to make it a regular column
193
+ pivoted = pivoted.reset_index()
194
+
195
+ # Flatten column names if multi-level
196
+ if isinstance(pivoted.columns, pd.MultiIndex):
197
+ pivoted.columns = ['_'.join(map(str, col)).strip('_') for col in pivoted.columns.values]
198
+ warnings.append("Flattened multi-level column names")
199
+
200
+ except Exception as e:
201
+ raise ValueError(f"Pivot operation failed: {e}")
202
+
203
+ new_shape = pivoted.shape
204
+
205
+ # Determine id_columns
206
+ if isinstance(index, str):
207
+ id_columns = [index]
208
+ else:
209
+ id_columns = list(index)
210
+
211
+ return ReshapeResult(
212
+ data=pivoted,
213
+ original_shape=original_shape,
214
+ new_shape=new_shape,
215
+ id_columns=id_columns,
216
+ warnings=warnings,
217
+ )
218
+
219
+ @staticmethod
220
+ def detect_wide_format(
221
+ df: 'pd.DataFrame',
222
+ threshold_columns: int = 50,
223
+ ) -> bool:
224
+ """
225
+ Detect if DataFrame is in wide format
226
+
227
+ Wide format is characterized by many columns relative to rows.
228
+
229
+ Args:
230
+ df: DataFrame to analyze
231
+ threshold_columns: Minimum number of columns to consider wide (default: 50)
232
+
233
+ Returns:
234
+ True if DataFrame appears to be in wide format
235
+ """
236
+ if not PANDAS_AVAILABLE:
237
+ return False
238
+
239
+ num_cols = len(df.columns)
240
+ num_rows = len(df)
241
+
242
+ # Wide format indicators:
243
+ # 1. Many columns (>= threshold)
244
+ # 2. More columns than rows (or close to it) AND at least 20 columns
245
+ is_wide = num_cols >= threshold_columns or (num_cols >= 20 and num_cols > num_rows * 0.5)
246
+
247
+ return is_wide
248
+
249
+ @staticmethod
250
+ def suggest_melt_config(
251
+ df: 'pd.DataFrame',
252
+ id_column_patterns: Optional[List[str]] = None,
253
+ ) -> Dict[str, Any]:
254
+ """
255
+ Suggest melt configuration for wide format data
256
+
257
+ Analyzes DataFrame structure to suggest appropriate id_vars and value_vars.
258
+
259
+ Args:
260
+ df: DataFrame to analyze
261
+ id_column_patterns: Patterns to identify ID columns (default: ['id', 'key', 'name'])
262
+
263
+ Returns:
264
+ Dictionary with suggested melt configuration
265
+ """
266
+ if not PANDAS_AVAILABLE:
267
+ raise ImportError("pandas is required for data reshaping")
268
+
269
+ if id_column_patterns is None:
270
+ id_column_patterns = ['id', 'key', 'name', 'sample', 'subject']
271
+
272
+ # Identify potential ID columns
273
+ id_vars = []
274
+ for col in df.columns:
275
+ col_lower = col.lower()
276
+ if any(pattern in col_lower for pattern in id_column_patterns):
277
+ id_vars.append(col)
278
+
279
+ # If no ID columns found, use first column
280
+ if not id_vars and len(df.columns) > 0:
281
+ id_vars = [df.columns[0]]
282
+
283
+ # Value columns are all other columns
284
+ value_vars = [col for col in df.columns if col not in id_vars]
285
+
286
+ return {
287
+ 'id_vars': id_vars,
288
+ 'value_vars': value_vars,
289
+ 'var_name': 'variable',
290
+ 'value_name': 'value',
291
+ 'confidence': 0.8 if id_vars else 0.5,
292
+ }
293
+
@@ -0,0 +1,369 @@
1
+ """
2
+ Document Graph Builder
3
+
4
+ Builds knowledge graphs from documents (PDF, DOCX, TXT, etc.).
5
+ """
6
+
7
+ import asyncio
8
+ from pathlib import Path
9
+ from typing import List, Optional, Dict, Any, Union
10
+ from dataclasses import dataclass, field
11
+
12
+ from aiecs.application.knowledge_graph.builder.graph_builder import (
13
+ GraphBuilder,
14
+ BuildResult,
15
+ )
16
+ from aiecs.application.knowledge_graph.builder.text_chunker import TextChunker
17
+ from aiecs.tools.docs.document_parser_tool import (
18
+ DocumentParserTool,
19
+ ParsingStrategy,
20
+ OutputFormat,
21
+ )
22
+
23
+
24
+ @dataclass
25
+ class DocumentBuildResult:
26
+ """
27
+ Result of document-to-graph build operation
28
+
29
+ Extends BuildResult with document-specific information.
30
+ """
31
+
32
+ document_path: str
33
+ document_type: str
34
+ total_chunks: int = 0
35
+ chunks_processed: int = 0
36
+ chunk_results: List[BuildResult] = field(default_factory=list)
37
+ success: bool = True
38
+ errors: List[str] = field(default_factory=list)
39
+
40
+ @property
41
+ def total_entities_added(self) -> int:
42
+ """Total entities added across all chunks"""
43
+ return sum(r.entities_added for r in self.chunk_results)
44
+
45
+ @property
46
+ def total_relations_added(self) -> int:
47
+ """Total relations added across all chunks"""
48
+ return sum(r.relations_added for r in self.chunk_results)
49
+
50
+
51
+ class DocumentGraphBuilder:
52
+ """
53
+ Build knowledge graphs from documents
54
+
55
+ Supports multiple document formats:
56
+ - PDF
57
+ - DOCX (Microsoft Word)
58
+ - TXT (Plain text)
59
+ - And more via AIECS DocumentParserTool
60
+
61
+ For large documents, automatically chunks text into manageable pieces.
62
+
63
+ Example:
64
+ ```python
65
+ builder = DocumentGraphBuilder(
66
+ graph_builder=graph_builder,
67
+ chunk_size=1000
68
+ )
69
+
70
+ result = await builder.build_from_document("research_paper.pdf")
71
+
72
+ print(f"Processed {result.total_chunks} chunks")
73
+ print(f"Added {result.total_entities_added} entities")
74
+ print(f"Added {result.total_relations_added} relations")
75
+ ```
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ graph_builder: GraphBuilder,
81
+ chunk_size: int = 2000,
82
+ chunk_overlap: int = 200,
83
+ enable_chunking: bool = True,
84
+ parallel_chunks: bool = True,
85
+ max_parallel_chunks: int = 3,
86
+ ):
87
+ """
88
+ Initialize document graph builder
89
+
90
+ Args:
91
+ graph_builder: GraphBuilder instance for text processing
92
+ chunk_size: Size of text chunks (in characters)
93
+ chunk_overlap: Overlap between chunks
94
+ enable_chunking: Whether to chunk large documents
95
+ parallel_chunks: Process chunks in parallel
96
+ max_parallel_chunks: Maximum parallel chunk processing
97
+ """
98
+ self.graph_builder = graph_builder
99
+ self.chunk_size = chunk_size
100
+ self.chunk_overlap = chunk_overlap
101
+ self.enable_chunking = enable_chunking
102
+ self.parallel_chunks = parallel_chunks
103
+ self.max_parallel_chunks = max_parallel_chunks
104
+
105
+ # Initialize document parser (will read config from environment
106
+ # variables)
107
+ self.document_parser = DocumentParserTool()
108
+
109
+ # Initialize text chunker
110
+ self.text_chunker = TextChunker(
111
+ chunk_size=chunk_size,
112
+ overlap=chunk_overlap,
113
+ respect_sentences=True,
114
+ )
115
+
116
+ async def build_from_document(
117
+ self,
118
+ document_path: Union[str, Path],
119
+ metadata: Optional[Dict[str, Any]] = None,
120
+ ) -> DocumentBuildResult:
121
+ """
122
+ Build knowledge graph from a document
123
+
124
+ Args:
125
+ document_path: Path to document file
126
+ metadata: Optional metadata to attach to extracted entities/relations
127
+
128
+ Returns:
129
+ DocumentBuildResult with statistics
130
+ """
131
+ document_path = str(document_path)
132
+ result = DocumentBuildResult(document_path=document_path, document_type="unknown")
133
+
134
+ try:
135
+ # Step 1: Parse document to text
136
+ text = await self._parse_document(document_path)
137
+
138
+ if not text or not text.strip():
139
+ result.success = False
140
+ result.errors.append("Document parsing returned empty text")
141
+ return result
142
+
143
+ # Determine document type
144
+ result.document_type = Path(document_path).suffix[1:].lower() # Remove leading dot
145
+
146
+ # Step 2: Chunk text if needed
147
+ if self.enable_chunking and len(text) > self.chunk_size:
148
+ chunks = self.text_chunker.chunk_text(text, metadata={"document": document_path})
149
+ result.total_chunks = len(chunks)
150
+ else:
151
+ # Single chunk (small document)
152
+ from aiecs.application.knowledge_graph.builder.text_chunker import (
153
+ TextChunk,
154
+ )
155
+
156
+ chunks = [
157
+ TextChunk(
158
+ text=text,
159
+ start_char=0,
160
+ end_char=len(text),
161
+ chunk_index=0,
162
+ metadata={"document": document_path},
163
+ )
164
+ ]
165
+ result.total_chunks = 1
166
+
167
+ # Step 3: Process each chunk
168
+ if self.parallel_chunks and len(chunks) > 1:
169
+ # Process chunks in parallel
170
+ chunk_results = await self._process_chunks_parallel(chunks, document_path, metadata)
171
+ else:
172
+ # Process chunks sequentially
173
+ chunk_results = await self._process_chunks_sequential(chunks, document_path, metadata)
174
+
175
+ result.chunk_results = chunk_results
176
+ result.chunks_processed = len(chunk_results)
177
+
178
+ # Check if all chunks succeeded
179
+ failed_chunks = [r for r in chunk_results if not r.success]
180
+ if failed_chunks:
181
+ result.errors.append(f"{len(failed_chunks)} chunks failed processing")
182
+
183
+ result.success = len(failed_chunks) < len(chunks) # At least some chunks succeeded
184
+
185
+ except Exception as e:
186
+ result.success = False
187
+ result.errors.append(f"Document processing failed: {str(e)}")
188
+
189
+ return result
190
+
191
+ async def build_from_documents(
192
+ self,
193
+ document_paths: List[Union[str, Path]],
194
+ parallel: bool = True,
195
+ max_parallel: int = 3,
196
+ ) -> List[DocumentBuildResult]:
197
+ """
198
+ Build knowledge graph from multiple documents
199
+
200
+ Args:
201
+ document_paths: List of document paths
202
+ parallel: Process documents in parallel
203
+ max_parallel: Maximum parallel documents
204
+
205
+ Returns:
206
+ List of DocumentBuildResult objects
207
+ """
208
+ if parallel:
209
+ semaphore = asyncio.Semaphore(max_parallel)
210
+
211
+ async def process_one(doc_path):
212
+ async with semaphore:
213
+ return await self.build_from_document(doc_path)
214
+
215
+ tasks = [process_one(doc_path) for doc_path in document_paths]
216
+ gather_results = await asyncio.gather(*tasks, return_exceptions=True)
217
+
218
+ # Handle exceptions - convert all to DocumentBuildResult
219
+ results: List[DocumentBuildResult] = []
220
+ for i, result in enumerate(gather_results):
221
+ if isinstance(result, Exception):
222
+ error_result = DocumentBuildResult(
223
+ document_path=str(document_paths[i]),
224
+ document_type="unknown",
225
+ success=False,
226
+ )
227
+ error_result.errors.append(str(result))
228
+ results.append(error_result)
229
+ elif isinstance(result, DocumentBuildResult):
230
+ results.append(result)
231
+ else:
232
+ # Fallback for unexpected types
233
+ error_result = DocumentBuildResult(
234
+ document_path=str(document_paths[i]),
235
+ document_type="unknown",
236
+ success=False,
237
+ )
238
+ error_result.errors.append(f"Unexpected result type: {type(result)}")
239
+ results.append(error_result)
240
+
241
+ return results
242
+ else:
243
+ # Sequential processing
244
+ results = []
245
+ for doc_path in document_paths:
246
+ result = await self.build_from_document(doc_path)
247
+ results.append(result)
248
+ return results
249
+
250
+ async def _parse_document(self, document_path: str) -> str:
251
+ """
252
+ Parse document to text using AIECS document parser
253
+
254
+ Args:
255
+ document_path: Path to document
256
+
257
+ Returns:
258
+ Extracted text content
259
+ """
260
+ try:
261
+ # Use document parser tool
262
+ parse_result = self.document_parser.parse_document(
263
+ source=document_path,
264
+ strategy=ParsingStrategy.TEXT_ONLY,
265
+ output_format=OutputFormat.TEXT,
266
+ )
267
+
268
+ if isinstance(parse_result, dict):
269
+ return parse_result.get("content", "")
270
+ elif isinstance(parse_result, str):
271
+ return parse_result
272
+ else:
273
+ return ""
274
+
275
+ except Exception:
276
+ # Fallback: try reading as plain text
277
+ try:
278
+ with open(document_path, "r", encoding="utf-8") as f:
279
+ return f.read()
280
+ except Exception as fallback_error:
281
+ raise RuntimeError(f"Failed to parse document: {str(fallback_error)}")
282
+
283
+ async def _process_chunks_parallel(
284
+ self,
285
+ chunks: List,
286
+ document_path: str,
287
+ metadata: Optional[Dict[str, Any]],
288
+ ) -> List[BuildResult]:
289
+ """
290
+ Process chunks in parallel
291
+
292
+ Args:
293
+ chunks: List of TextChunk objects
294
+ document_path: Source document path
295
+ metadata: Optional metadata
296
+
297
+ Returns:
298
+ List of BuildResult objects
299
+ """
300
+ semaphore = asyncio.Semaphore(self.max_parallel_chunks)
301
+
302
+ async def process_chunk(chunk):
303
+ async with semaphore:
304
+ chunk_metadata = {
305
+ "document": document_path,
306
+ "chunk_index": chunk.chunk_index,
307
+ "chunk_start": chunk.start_char,
308
+ "chunk_end": chunk.end_char,
309
+ }
310
+ if metadata:
311
+ chunk_metadata.update(metadata)
312
+
313
+ source = f"{document_path}#chunk{chunk.chunk_index}"
314
+ return await self.graph_builder.build_from_text(text=chunk.text, source=source, metadata=chunk_metadata)
315
+
316
+ tasks = [process_chunk(chunk) for chunk in chunks]
317
+ gather_results = await asyncio.gather(*tasks, return_exceptions=True)
318
+
319
+ # Handle exceptions - convert all to BuildResult
320
+ results: List[BuildResult] = []
321
+ for i, result in enumerate(gather_results):
322
+ if isinstance(result, Exception):
323
+ error_result = BuildResult(success=False)
324
+ error_result.errors.append(f"Chunk {i} failed: {str(result)}")
325
+ results.append(error_result)
326
+ elif isinstance(result, BuildResult):
327
+ results.append(result)
328
+ else:
329
+ # Fallback for unexpected types
330
+ error_result = BuildResult(success=False)
331
+ error_result.errors.append(f"Unexpected result type: {type(result)}")
332
+ results.append(error_result)
333
+
334
+ return results
335
+
336
+ async def _process_chunks_sequential(
337
+ self,
338
+ chunks: List,
339
+ document_path: str,
340
+ metadata: Optional[Dict[str, Any]],
341
+ ) -> List[BuildResult]:
342
+ """
343
+ Process chunks sequentially
344
+
345
+ Args:
346
+ chunks: List of TextChunk objects
347
+ document_path: Source document path
348
+ metadata: Optional metadata
349
+
350
+ Returns:
351
+ List of BuildResult objects
352
+ """
353
+ results = []
354
+
355
+ for chunk in chunks:
356
+ chunk_metadata = {
357
+ "document": document_path,
358
+ "chunk_index": chunk.chunk_index,
359
+ "chunk_start": chunk.start_char,
360
+ "chunk_end": chunk.end_char,
361
+ }
362
+ if metadata:
363
+ chunk_metadata.update(metadata)
364
+
365
+ source = f"{document_path}#chunk{chunk.chunk_index}"
366
+ result = await self.graph_builder.build_from_text(text=chunk.text, source=source, metadata=chunk_metadata)
367
+ results.append(result)
368
+
369
+ return results