aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,507 @@
1
+ """
2
+ Model Trainer Tool - AutoML and machine learning model training
3
+
4
+ This tool provides AutoML capabilities with:
5
+ - Automatic model selection for classification and regression
6
+ - Hyperparameter tuning
7
+ - Model evaluation and comparison
8
+ - Feature importance analysis
9
+ - Model explanation support
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Any, List, Optional, Union
14
+ from enum import Enum
15
+
16
+ import pandas as pd # type: ignore[import-untyped]
17
+ import numpy as np
18
+ from sklearn.model_selection import train_test_split, cross_val_score # type: ignore[import-untyped]
19
+ from sklearn.metrics import ( # type: ignore[import-untyped]
20
+ accuracy_score,
21
+ precision_score,
22
+ recall_score,
23
+ f1_score,
24
+ r2_score,
25
+ mean_squared_error,
26
+ )
27
+ from sklearn.ensemble import ( # type: ignore[import-untyped]
28
+ RandomForestClassifier,
29
+ RandomForestRegressor,
30
+ GradientBoostingClassifier,
31
+ GradientBoostingRegressor,
32
+ )
33
+ from sklearn.linear_model import LogisticRegression, LinearRegression # type: ignore[import-untyped]
34
+ from sklearn.preprocessing import LabelEncoder # type: ignore[import-untyped]
35
+ from pydantic import BaseModel, Field
36
+ from pydantic_settings import BaseSettings, SettingsConfigDict
37
+
38
+ from aiecs.tools.base_tool import BaseTool
39
+ from aiecs.tools import register_tool
40
+
41
+
42
+ class ModelType(str, Enum):
43
+ """Supported model types"""
44
+
45
+ LOGISTIC_REGRESSION = "logistic_regression"
46
+ LINEAR_REGRESSION = "linear_regression"
47
+ RANDOM_FOREST_CLASSIFIER = "random_forest_classifier"
48
+ RANDOM_FOREST_REGRESSOR = "random_forest_regressor"
49
+ GRADIENT_BOOSTING_CLASSIFIER = "gradient_boosting_classifier"
50
+ GRADIENT_BOOSTING_REGRESSOR = "gradient_boosting_regressor"
51
+ AUTO = "auto"
52
+
53
+
54
+ class TaskType(str, Enum):
55
+ """Machine learning task types"""
56
+
57
+ CLASSIFICATION = "classification"
58
+ REGRESSION = "regression"
59
+ CLUSTERING = "clustering"
60
+
61
+
62
+ class ModelTrainerError(Exception):
63
+ """Base exception for ModelTrainer errors"""
64
+
65
+
66
+ class TrainingError(ModelTrainerError):
67
+ """Raised when model training fails"""
68
+
69
+
70
+ @register_tool("model_trainer")
71
+ class ModelTrainerTool(BaseTool):
72
+ """
73
+ AutoML tool that can:
74
+ 1. Train multiple model types
75
+ 2. Perform hyperparameter tuning
76
+ 3. Evaluate and compare models
77
+ 4. Generate feature importance
78
+ 5. Provide model explanations
79
+ """
80
+
81
+ # Configuration schema
82
+ class Config(BaseSettings):
83
+ """Configuration for the model trainer tool
84
+
85
+ Automatically reads from environment variables with MODEL_TRAINER_ prefix.
86
+ Example: MODEL_TRAINER_TEST_SIZE -> test_size
87
+ """
88
+
89
+ model_config = SettingsConfigDict(env_prefix="MODEL_TRAINER_")
90
+
91
+ test_size: float = Field(default=0.2, description="Proportion of data to use for testing")
92
+ random_state: int = Field(default=42, description="Random state for reproducibility")
93
+ cv_folds: int = Field(default=5, description="Number of cross-validation folds")
94
+ enable_hyperparameter_tuning: bool = Field(
95
+ default=False,
96
+ description="Whether to enable hyperparameter tuning",
97
+ )
98
+ max_tuning_iterations: int = Field(
99
+ default=20,
100
+ description="Maximum number of hyperparameter tuning iterations",
101
+ )
102
+
103
+ def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
104
+ """Initialize ModelTrainerTool with settings
105
+
106
+ Configuration is automatically loaded by BaseTool from:
107
+ 1. Explicit config dict (highest priority)
108
+ 2. YAML config files (config/tools/model_trainer.yaml)
109
+ 3. Environment variables (via dotenv from .env files)
110
+ 4. Tool defaults (lowest priority)
111
+
112
+ Args:
113
+ config: Optional configuration overrides
114
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
115
+ """
116
+ super().__init__(config, **kwargs)
117
+
118
+ # Configuration is automatically loaded by BaseTool into self._config_obj
119
+ # Access config via self._config_obj (BaseSettings instance)
120
+ self.config = self._config_obj if self._config_obj else self.Config()
121
+
122
+ self.logger = logging.getLogger(__name__)
123
+ if not self.logger.handlers:
124
+ handler = logging.StreamHandler()
125
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
126
+ self.logger.addHandler(handler)
127
+ self.logger.setLevel(logging.INFO)
128
+
129
+ self._init_external_tools()
130
+ self.trained_models: Dict[str, Any] = {}
131
+
132
+ def _init_external_tools(self):
133
+ """Initialize external task tools"""
134
+ self.external_tools = {}
135
+
136
+ # Schema definitions
137
+ class Train_modelSchema(BaseModel):
138
+ """Schema for train_model operation"""
139
+
140
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Training data")
141
+ target: str = Field(description="Target column name")
142
+ model_type: ModelType = Field(default=ModelType.AUTO, description="Model type to train")
143
+ auto_tune: bool = Field(default=False, description="Enable hyperparameter tuning")
144
+ cross_validation: int = Field(default=5, description="Number of CV folds")
145
+
146
+ class Auto_select_modelSchema(BaseModel):
147
+ """Schema for auto_select_model operation"""
148
+
149
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for model selection")
150
+ target: str = Field(description="Target column name")
151
+ task_type: Optional[TaskType] = Field(default=None, description="Task type")
152
+
153
+ class Evaluate_modelSchema(BaseModel):
154
+ """Schema for evaluate_model operation"""
155
+
156
+ model_id: str = Field(description="ID of trained model")
157
+ test_data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Test data")
158
+ target: str = Field(description="Target column name")
159
+
160
+ class Tune_hyperparametersSchema(BaseModel):
161
+ """Schema for tune_hyperparameters operation"""
162
+
163
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Training data")
164
+ target: str = Field(description="Target column name")
165
+ model_type: ModelType = Field(description="Model type to tune")
166
+
167
+ def train_model(
168
+ self,
169
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
170
+ target: str,
171
+ model_type: ModelType = ModelType.AUTO,
172
+ auto_tune: bool = False,
173
+ cross_validation: int = 5,
174
+ ) -> Dict[str, Any]:
175
+ """
176
+ Train and evaluate model.
177
+
178
+ Args:
179
+ data: Training data
180
+ target: Target column name
181
+ model_type: Type of model to train (auto-selected if AUTO)
182
+ auto_tune: Enable hyperparameter tuning
183
+ cross_validation: Number of cross-validation folds
184
+
185
+ Returns:
186
+ Dict containing:
187
+ - model_id: Unique identifier for trained model
188
+ - model_type: Type of model trained
189
+ - performance: Performance metrics
190
+ - feature_importance: Feature importance scores
191
+ - cross_validation_scores: CV scores
192
+ """
193
+ try:
194
+ df = self._to_dataframe(data)
195
+
196
+ # Separate features and target
197
+ X = df.drop(columns=[target])
198
+ y = df[target]
199
+
200
+ # Determine task type and model
201
+ task_type = self._determine_task_type(y)
202
+
203
+ if model_type == ModelType.AUTO:
204
+ model_type = self._auto_select_model_type(task_type)
205
+ self.logger.info(f"Auto-selected model type: {model_type.value}")
206
+
207
+ # Prepare data
208
+ X_processed, feature_names = self._preprocess_features(X)
209
+
210
+ # Handle categorical target for classification
211
+ if task_type == TaskType.CLASSIFICATION:
212
+ label_encoder = LabelEncoder()
213
+ y = label_encoder.fit_transform(y)
214
+ else:
215
+ label_encoder = None
216
+
217
+ # Split data
218
+ X_train, X_test, y_train, y_test = train_test_split(
219
+ X_processed,
220
+ y,
221
+ test_size=self.config.test_size,
222
+ random_state=self.config.random_state,
223
+ )
224
+
225
+ # Create and train model
226
+ model = self._create_model(model_type)
227
+ model.fit(X_train, y_train)
228
+
229
+ # Make predictions
230
+ y_pred = model.predict(X_test)
231
+
232
+ # Calculate metrics
233
+ performance = self._calculate_metrics(y_test, y_pred, task_type)
234
+
235
+ # Cross-validation
236
+ cv_scores = cross_val_score(model, X_processed, y, cv=cross_validation)
237
+
238
+ # Feature importance
239
+ feature_importance = self._get_feature_importance(model, feature_names)
240
+
241
+ # Store model
242
+ model_id = f"model_{len(self.trained_models) + 1}"
243
+ self.trained_models[model_id] = {
244
+ "model": model,
245
+ "model_type": model_type.value,
246
+ "task_type": task_type.value,
247
+ "feature_names": feature_names,
248
+ "label_encoder": label_encoder,
249
+ }
250
+
251
+ return {
252
+ "model_id": model_id,
253
+ "model_type": model_type.value,
254
+ "task_type": task_type.value,
255
+ "performance": performance,
256
+ "feature_importance": feature_importance,
257
+ "cross_validation_scores": {
258
+ "scores": cv_scores.tolist(),
259
+ "mean": float(cv_scores.mean()),
260
+ "std": float(cv_scores.std()),
261
+ },
262
+ "training_samples": len(X_train),
263
+ "test_samples": len(X_test),
264
+ }
265
+
266
+ except Exception as e:
267
+ self.logger.error(f"Error training model: {e}")
268
+ raise TrainingError(f"Model training failed: {e}")
269
+
270
+ def auto_select_model(
271
+ self,
272
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
273
+ target: str,
274
+ task_type: Optional[TaskType] = None,
275
+ ) -> Dict[str, Any]:
276
+ """
277
+ Automatically select best model based on data characteristics.
278
+
279
+ Args:
280
+ data: Data for model selection
281
+ target: Target column name
282
+ task_type: Optional task type (auto-determined if None)
283
+
284
+ Returns:
285
+ Dict containing recommended model and reasoning
286
+ """
287
+ try:
288
+ df = self._to_dataframe(data)
289
+ y = df[target]
290
+
291
+ # Determine task type
292
+ if task_type is None:
293
+ task_type = self._determine_task_type(y)
294
+
295
+ # Select model
296
+ model_type = self._auto_select_model_type(task_type)
297
+
298
+ # Provide reasoning
299
+ reasoning = self._explain_model_selection(df, y, task_type, model_type)
300
+
301
+ return {
302
+ "recommended_model": model_type.value,
303
+ "task_type": task_type.value,
304
+ "reasoning": reasoning,
305
+ "confidence": "high",
306
+ }
307
+
308
+ except Exception as e:
309
+ self.logger.error(f"Error in auto model selection: {e}")
310
+ raise TrainingError(f"Model selection failed: {e}")
311
+
312
+ def evaluate_model(
313
+ self,
314
+ model_id: str,
315
+ test_data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
316
+ target: str,
317
+ ) -> Dict[str, Any]:
318
+ """
319
+ Evaluate trained model on test data.
320
+
321
+ Args:
322
+ model_id: ID of trained model
323
+ test_data: Test data
324
+ target: Target column name
325
+
326
+ Returns:
327
+ Dict containing evaluation metrics
328
+ """
329
+ try:
330
+ if model_id not in self.trained_models:
331
+ raise TrainingError(f"Model {model_id} not found")
332
+
333
+ df = self._to_dataframe(test_data)
334
+ X_test = df.drop(columns=[target])
335
+ y_test = df[target]
336
+
337
+ model_info = self.trained_models[model_id]
338
+ model = model_info["model"]
339
+ task_type = TaskType(model_info["task_type"])
340
+
341
+ # Preprocess features
342
+ X_processed, _ = self._preprocess_features(X_test)
343
+
344
+ # Handle label encoding for classification
345
+ if model_info["label_encoder"]:
346
+ y_test = model_info["label_encoder"].transform(y_test)
347
+
348
+ # Make predictions
349
+ y_pred = model.predict(X_processed)
350
+
351
+ # Calculate metrics
352
+ performance = self._calculate_metrics(y_test, y_pred, task_type)
353
+
354
+ return {
355
+ "model_id": model_id,
356
+ "performance": performance,
357
+ "test_samples": len(X_test),
358
+ }
359
+
360
+ except Exception as e:
361
+ self.logger.error(f"Error evaluating model: {e}")
362
+ raise TrainingError(f"Model evaluation failed: {e}")
363
+
364
+ def tune_hyperparameters(
365
+ self,
366
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
367
+ target: str,
368
+ model_type: ModelType,
369
+ ) -> Dict[str, Any]:
370
+ """
371
+ Tune hyperparameters for specified model type.
372
+
373
+ Args:
374
+ data: Training data
375
+ target: Target column name
376
+ model_type: Model type to tune
377
+
378
+ Returns:
379
+ Dict containing best parameters and performance
380
+ """
381
+ try:
382
+ # Note: Full hyperparameter tuning with GridSearchCV would be implemented here
383
+ # For now, returning placeholder structure
384
+ self.logger.info("Hyperparameter tuning is a placeholder - train with default params")
385
+
386
+ result = self.train_model(data, target, model_type, auto_tune=False)
387
+ result["tuning_note"] = "Using default parameters - full tuning not implemented"
388
+
389
+ return result
390
+
391
+ except Exception as e:
392
+ self.logger.error(f"Error tuning hyperparameters: {e}")
393
+ raise TrainingError(f"Hyperparameter tuning failed: {e}")
394
+
395
+ # Internal helper methods
396
+
397
+ def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
398
+ """Convert data to DataFrame"""
399
+ if isinstance(data, pd.DataFrame):
400
+ return data
401
+ elif isinstance(data, list):
402
+ return pd.DataFrame(data)
403
+ elif isinstance(data, dict):
404
+ return pd.DataFrame([data])
405
+ else:
406
+ raise TrainingError(f"Unsupported data type: {type(data)}")
407
+
408
+ def _determine_task_type(self, y: pd.Series) -> TaskType:
409
+ """Determine task type from target variable"""
410
+ if y.dtype in ["object", "category", "bool"]:
411
+ return TaskType.CLASSIFICATION
412
+ elif y.nunique() < 10 and y.dtype in ["int64", "int32"]:
413
+ return TaskType.CLASSIFICATION
414
+ else:
415
+ return TaskType.REGRESSION
416
+
417
+ def _auto_select_model_type(self, task_type: TaskType) -> ModelType:
418
+ """Auto-select model type based on task"""
419
+ if task_type == TaskType.CLASSIFICATION:
420
+ return ModelType.RANDOM_FOREST_CLASSIFIER
421
+ else:
422
+ return ModelType.RANDOM_FOREST_REGRESSOR
423
+
424
+ def _create_model(self, model_type: ModelType):
425
+ """Create model instance"""
426
+ if model_type == ModelType.LOGISTIC_REGRESSION:
427
+ return LogisticRegression(random_state=self.config.random_state, max_iter=1000)
428
+ elif model_type == ModelType.LINEAR_REGRESSION:
429
+ return LinearRegression()
430
+ elif model_type == ModelType.RANDOM_FOREST_CLASSIFIER:
431
+ return RandomForestClassifier(random_state=self.config.random_state, n_estimators=100)
432
+ elif model_type == ModelType.RANDOM_FOREST_REGRESSOR:
433
+ return RandomForestRegressor(random_state=self.config.random_state, n_estimators=100)
434
+ elif model_type == ModelType.GRADIENT_BOOSTING_CLASSIFIER:
435
+ return GradientBoostingClassifier(random_state=self.config.random_state)
436
+ elif model_type == ModelType.GRADIENT_BOOSTING_REGRESSOR:
437
+ return GradientBoostingRegressor(random_state=self.config.random_state)
438
+ else:
439
+ raise TrainingError(f"Unsupported model type: {model_type}")
440
+
441
+ def _preprocess_features(self, X: pd.DataFrame) -> tuple:
442
+ """Preprocess features for training"""
443
+ X_processed = X.copy()
444
+
445
+ # Handle categorical variables with label encoding
446
+ for col in X_processed.select_dtypes(include=["object", "category"]).columns:
447
+ le = LabelEncoder()
448
+ X_processed[col] = le.fit_transform(X_processed[col].astype(str))
449
+
450
+ # Handle missing values
451
+ X_processed = X_processed.fillna(X_processed.mean(numeric_only=True))
452
+
453
+ feature_names = X_processed.columns.tolist()
454
+
455
+ return X_processed.values, feature_names
456
+
457
+ def _calculate_metrics(self, y_true, y_pred, task_type: TaskType) -> Dict[str, float]:
458
+ """Calculate performance metrics"""
459
+ if task_type == TaskType.CLASSIFICATION:
460
+ return {
461
+ "accuracy": float(accuracy_score(y_true, y_pred)),
462
+ "precision": float(precision_score(y_true, y_pred, average="weighted", zero_division=0)),
463
+ "recall": float(recall_score(y_true, y_pred, average="weighted", zero_division=0)),
464
+ "f1_score": float(f1_score(y_true, y_pred, average="weighted", zero_division=0)),
465
+ }
466
+ else:
467
+ mse = mean_squared_error(y_true, y_pred)
468
+ return {
469
+ "r2_score": float(r2_score(y_true, y_pred)),
470
+ "mse": float(mse),
471
+ "rmse": float(np.sqrt(mse)),
472
+ "mae": float(np.mean(np.abs(y_true - y_pred))),
473
+ }
474
+
475
+ def _get_feature_importance(self, model, feature_names: List[str]) -> Dict[str, float]:
476
+ """Extract feature importance from model"""
477
+ if hasattr(model, "feature_importances_"):
478
+ importance = model.feature_importances_
479
+ return {name: float(imp) for name, imp in zip(feature_names, importance)}
480
+ elif hasattr(model, "coef_"):
481
+ importance = np.abs(model.coef_).flatten()
482
+ return {name: float(imp) for name, imp in zip(feature_names, importance)}
483
+ else:
484
+ return {}
485
+
486
+ def _explain_model_selection(
487
+ self,
488
+ df: pd.DataFrame,
489
+ y: pd.Series,
490
+ task_type: TaskType,
491
+ model_type: ModelType,
492
+ ) -> str:
493
+ """Explain why a model was selected"""
494
+ n_samples = len(df)
495
+ n_features = len(df.columns) - 1
496
+
497
+ reasons = []
498
+ reasons.append(f"Task type: {task_type.value}")
499
+ reasons.append(f"Dataset size: {n_samples} samples, {n_features} features")
500
+
501
+ if model_type in [
502
+ ModelType.RANDOM_FOREST_CLASSIFIER,
503
+ ModelType.RANDOM_FOREST_REGRESSOR,
504
+ ]:
505
+ reasons.append("Random Forest selected for robust performance and feature importance")
506
+
507
+ return "; ".join(reasons)