aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,580 @@
1
+ """
2
+ Data Transformer Tool - Data cleaning, transformation, and feature engineering
3
+
4
+ This tool provides comprehensive data transformation capabilities with:
5
+ - Data cleaning and preprocessing
6
+ - Feature engineering and encoding
7
+ - Normalization and standardization
8
+ - Transformation pipelines
9
+ - Missing value handling
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Any, List, Optional, Union
14
+ from enum import Enum
15
+
16
+ import pandas as pd # type: ignore[import-untyped]
17
+ import numpy as np
18
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder # type: ignore[import-untyped]
19
+ from pydantic import BaseModel, Field
20
+ from pydantic_settings import BaseSettings, SettingsConfigDict
21
+
22
+ from aiecs.tools.base_tool import BaseTool
23
+ from aiecs.tools import register_tool
24
+
25
+
26
+ class TransformationType(str, Enum):
27
+ """Types of transformations"""
28
+
29
+ # Cleaning operations
30
+ REMOVE_DUPLICATES = "remove_duplicates"
31
+ FILL_MISSING = "fill_missing"
32
+ REMOVE_OUTLIERS = "remove_outliers"
33
+
34
+ # Transformation operations
35
+ NORMALIZE = "normalize"
36
+ STANDARDIZE = "standardize"
37
+ LOG_TRANSFORM = "log_transform"
38
+ BOX_COX = "box_cox"
39
+
40
+ # Encoding operations
41
+ ONE_HOT_ENCODE = "one_hot_encode"
42
+ LABEL_ENCODE = "label_encode"
43
+ TARGET_ENCODE = "target_encode"
44
+
45
+ # Feature engineering
46
+ POLYNOMIAL_FEATURES = "polynomial_features"
47
+ INTERACTION_FEATURES = "interaction_features"
48
+ BINNING = "binning"
49
+ AGGREGATION = "aggregation"
50
+
51
+
52
+ class MissingValueStrategy(str, Enum):
53
+ """Strategies for handling missing values"""
54
+
55
+ DROP = "drop"
56
+ MEAN = "mean"
57
+ MEDIAN = "median"
58
+ MODE = "mode"
59
+ FORWARD_FILL = "forward_fill"
60
+ BACKWARD_FILL = "backward_fill"
61
+ INTERPOLATE = "interpolate"
62
+ CONSTANT = "constant"
63
+
64
+
65
+ class DataTransformerError(Exception):
66
+ """Base exception for DataTransformer errors"""
67
+
68
+
69
+ class TransformationError(DataTransformerError):
70
+ """Raised when transformation fails"""
71
+
72
+
73
+ @register_tool("data_transformer")
74
+ class DataTransformerTool(BaseTool):
75
+ """
76
+ Advanced data transformation tool that can:
77
+ 1. Clean and preprocess data
78
+ 2. Engineer features
79
+ 3. Transform and normalize data
80
+ 4. Build transformation pipelines
81
+
82
+ Integrates with pandas_tool for core operations.
83
+ """
84
+
85
+ # Configuration schema
86
+ class Config(BaseSettings):
87
+ """Configuration for the data transformer tool
88
+
89
+ Automatically reads from environment variables with DATA_TRANSFORMER_ prefix.
90
+ Example: DATA_TRANSFORMER_OUTLIER_STD_THRESHOLD -> outlier_std_threshold
91
+ """
92
+
93
+ model_config = SettingsConfigDict(env_prefix="DATA_TRANSFORMER_")
94
+
95
+ outlier_std_threshold: float = Field(
96
+ default=3.0,
97
+ description="Standard deviation threshold for outlier detection",
98
+ )
99
+ default_missing_strategy: str = Field(
100
+ default="mean",
101
+ description="Default strategy for handling missing values",
102
+ )
103
+ enable_pipeline_caching: bool = Field(
104
+ default=True,
105
+ description="Whether to enable transformation pipeline caching",
106
+ )
107
+ max_one_hot_categories: int = Field(
108
+ default=10,
109
+ description="Maximum number of categories for one-hot encoding",
110
+ )
111
+
112
+ def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
113
+ """
114
+ Initialize DataTransformerTool with settings.
115
+
116
+ Configuration is automatically loaded by BaseTool from:
117
+ 1. Explicit config dict (highest priority)
118
+ 2. YAML config files (config/tools/data_transformer.yaml)
119
+ 3. Environment variables (via dotenv from .env files)
120
+ 4. Tool defaults (lowest priority)
121
+
122
+ Args:
123
+ config: Optional configuration overrides
124
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
125
+ """
126
+ super().__init__(config, **kwargs)
127
+
128
+ # Configuration is automatically loaded by BaseTool into self._config_obj
129
+ # Access config via self._config_obj (BaseSettings instance)
130
+ self.config = self._config_obj if self._config_obj else self.Config()
131
+
132
+ self.logger = logging.getLogger(__name__)
133
+ if not self.logger.handlers:
134
+ handler = logging.StreamHandler()
135
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
136
+ self.logger.addHandler(handler)
137
+ self.logger.setLevel(logging.INFO)
138
+
139
+ # Initialize external tools
140
+ self._init_external_tools()
141
+
142
+ # Initialize transformation pipeline cache
143
+ self.pipeline_cache: Dict[str, Any] = {}
144
+
145
+ def _init_external_tools(self):
146
+ """Initialize external task tools"""
147
+ self.external_tools = {}
148
+
149
+ # Initialize PandasTool for data operations
150
+ try:
151
+ from aiecs.tools.task_tools.pandas_tool import PandasTool
152
+
153
+ self.external_tools["pandas"] = PandasTool()
154
+ self.logger.info("PandasTool initialized successfully")
155
+ except ImportError:
156
+ self.logger.warning("PandasTool not available")
157
+ self.external_tools["pandas"] = None
158
+
159
+ # Schema definitions
160
+ class TransformDataSchema(BaseModel):
161
+ """Schema for transform_data operation"""
162
+
163
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to transform")
164
+ transformations: List[Dict[str, Any]] = Field(description="List of transformation steps")
165
+ enable_validation: bool = Field(default=True, description="Validate transformations")
166
+
167
+ class AutoTransformSchema(BaseModel):
168
+ """Schema for auto_transform operation"""
169
+
170
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to transform")
171
+ target_column: Optional[str] = Field(default=None, description="Target column name")
172
+ task_type: Optional[str] = Field(default=None, description="Task type: classification or regression")
173
+
174
+ class HandleMissingValuesSchema(BaseModel):
175
+ """Schema for handle_missing_values operation"""
176
+
177
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data with missing values")
178
+ strategy: MissingValueStrategy = Field(
179
+ default=MissingValueStrategy.MEAN,
180
+ description="Strategy for handling missing values",
181
+ )
182
+ columns: Optional[List[str]] = Field(default=None, description="Specific columns to handle")
183
+ fill_value: Optional[Any] = Field(default=None, description="Value for constant strategy")
184
+
185
+ class EncodeFeaturesSchema(BaseModel):
186
+ """Schema for encode_features operation"""
187
+
188
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to encode")
189
+ columns: List[str] = Field(description="Columns to encode")
190
+ method: str = Field(default="one_hot", description="Encoding method: one_hot or label")
191
+
192
+ def transform_data(
193
+ self,
194
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
195
+ transformations: List[Dict[str, Any]],
196
+ validate: bool = True,
197
+ ) -> Dict[str, Any]:
198
+ """
199
+ Apply transformation pipeline to data.
200
+
201
+ Args:
202
+ data: Data to transform
203
+ transformations: List of transformation steps, each containing:
204
+ - type: TransformationType
205
+ - columns: List of columns (optional)
206
+ - params: Additional parameters
207
+ validate: Whether to validate transformations
208
+
209
+ Returns:
210
+ Dict containing:
211
+ - transformed_data: Transformed DataFrame
212
+ - transformation_log: Log of applied transformations
213
+ - quality_improvement: Quality metrics comparison
214
+
215
+ Raises:
216
+ TransformationError: If transformation fails
217
+ """
218
+ try:
219
+ df = self._to_dataframe(data)
220
+ original_df = df.copy()
221
+
222
+ transformation_log = []
223
+
224
+ for i, transform in enumerate(transformations):
225
+ trans_type = transform.get("type")
226
+ if not isinstance(trans_type, str):
227
+ raise ValueError(f"Invalid transformation type: {trans_type}, expected string")
228
+ columns = transform.get("columns")
229
+ params = transform.get("params", {})
230
+
231
+ self.logger.info(f"Applying transformation {i+1}/{len(transformations)}: {trans_type}")
232
+
233
+ # Apply transformation
234
+ df = self._apply_single_transformation(df, trans_type, columns, params)
235
+
236
+ transformation_log.append(
237
+ {
238
+ "step": i + 1,
239
+ "type": trans_type,
240
+ "columns": columns,
241
+ "params": params,
242
+ "status": "success",
243
+ }
244
+ )
245
+
246
+ # Calculate quality improvement
247
+ quality_improvement = self._calculate_quality_improvement(original_df, df)
248
+
249
+ return {
250
+ "transformed_data": df,
251
+ "transformation_log": transformation_log,
252
+ "quality_improvement": quality_improvement,
253
+ "original_shape": original_df.shape,
254
+ "new_shape": df.shape,
255
+ }
256
+
257
+ except Exception as e:
258
+ self.logger.error(f"Error in transformation pipeline: {e}")
259
+ raise TransformationError(f"Transformation failed: {e}")
260
+
261
+ def auto_transform(
262
+ self,
263
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
264
+ target_column: Optional[str] = None,
265
+ task_type: Optional[str] = None,
266
+ ) -> Dict[str, Any]:
267
+ """
268
+ Automatically determine and apply optimal transformations.
269
+
270
+ Args:
271
+ data: Data to transform
272
+ target_column: Target column for ML tasks
273
+ task_type: Type of task (classification or regression)
274
+
275
+ Returns:
276
+ Dict containing transformed data and applied transformations
277
+ """
278
+ try:
279
+ df = self._to_dataframe(data)
280
+
281
+ # Determine transformations needed
282
+ transformations = self._determine_transformations(df, target_column, task_type)
283
+
284
+ # Apply transformations
285
+ result = self.transform_data(df, transformations, validate=True)
286
+ result["auto_detected_transformations"] = transformations
287
+
288
+ return result
289
+
290
+ except Exception as e:
291
+ self.logger.error(f"Error in auto transform: {e}")
292
+ raise TransformationError(f"Auto transform failed: {e}")
293
+
294
+ def handle_missing_values(
295
+ self,
296
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
297
+ strategy: MissingValueStrategy = MissingValueStrategy.MEAN,
298
+ columns: Optional[List[str]] = None,
299
+ fill_value: Optional[Any] = None,
300
+ ) -> Dict[str, Any]:
301
+ """
302
+ Handle missing values in data.
303
+
304
+ Args:
305
+ data: Data with missing values
306
+ strategy: Strategy for handling missing values
307
+ columns: Specific columns to handle (None for all)
308
+ fill_value: Value for constant strategy
309
+
310
+ Returns:
311
+ Dict containing data with handled missing values
312
+ """
313
+ try:
314
+ df = self._to_dataframe(data)
315
+ original_missing = df.isnull().sum().sum()
316
+
317
+ # Select columns to handle
318
+ cols_to_handle = columns if columns else df.columns.tolist()
319
+
320
+ # Apply strategy
321
+ if strategy == MissingValueStrategy.DROP:
322
+ df = df.dropna(subset=cols_to_handle)
323
+ elif strategy == MissingValueStrategy.MEAN:
324
+ for col in cols_to_handle:
325
+ if df[col].dtype in ["int64", "float64"]:
326
+ df[col].fillna(df[col].mean(), inplace=True)
327
+ elif strategy == MissingValueStrategy.MEDIAN:
328
+ for col in cols_to_handle:
329
+ if df[col].dtype in ["int64", "float64"]:
330
+ df[col].fillna(df[col].median(), inplace=True)
331
+ elif strategy == MissingValueStrategy.MODE:
332
+ for col in cols_to_handle:
333
+ if not df[col].mode().empty:
334
+ df[col].fillna(df[col].mode()[0], inplace=True)
335
+ elif strategy == MissingValueStrategy.FORWARD_FILL:
336
+ df[cols_to_handle] = df[cols_to_handle].fillna(method="ffill")
337
+ elif strategy == MissingValueStrategy.BACKWARD_FILL:
338
+ df[cols_to_handle] = df[cols_to_handle].fillna(method="bfill")
339
+ elif strategy == MissingValueStrategy.INTERPOLATE:
340
+ for col in cols_to_handle:
341
+ if df[col].dtype in ["int64", "float64"]:
342
+ df[col] = df[col].interpolate()
343
+ elif strategy == MissingValueStrategy.CONSTANT:
344
+ df[cols_to_handle] = df[cols_to_handle].fillna(fill_value)
345
+
346
+ final_missing = df.isnull().sum().sum()
347
+
348
+ return {
349
+ "data": df,
350
+ "original_missing": int(original_missing),
351
+ "final_missing": int(final_missing),
352
+ "missing_handled": int(original_missing - final_missing),
353
+ "strategy": strategy.value,
354
+ }
355
+
356
+ except Exception as e:
357
+ self.logger.error(f"Error handling missing values: {e}")
358
+ raise TransformationError(f"Failed to handle missing values: {e}")
359
+
360
+ def encode_features(
361
+ self,
362
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
363
+ columns: List[str],
364
+ method: str = "one_hot",
365
+ ) -> Dict[str, Any]:
366
+ """
367
+ Encode categorical features.
368
+
369
+ Args:
370
+ data: Data to encode
371
+ columns: Columns to encode
372
+ method: Encoding method (one_hot or label)
373
+
374
+ Returns:
375
+ Dict containing encoded data
376
+ """
377
+ try:
378
+ df = self._to_dataframe(data)
379
+
380
+ if method == "one_hot":
381
+ # One-hot encoding
382
+ df_encoded = pd.get_dummies(df, columns=columns, prefix=columns)
383
+ encoding_info: Dict[str, Any] = {
384
+ "method": "one_hot",
385
+ "original_columns": columns,
386
+ "new_columns": [col for col in df_encoded.columns if col not in df.columns],
387
+ }
388
+ elif method == "label":
389
+ # Label encoding
390
+ df_encoded = df.copy()
391
+ encoders: Dict[str, Any] = {}
392
+ for col in columns:
393
+ le = LabelEncoder()
394
+ df_encoded[col] = le.fit_transform(df[col].astype(str))
395
+ encoders[col] = le
396
+ encoding_info = {
397
+ "method": "label",
398
+ "columns": columns,
399
+ "encoders": encoders,
400
+ }
401
+ else:
402
+ raise TransformationError(f"Unsupported encoding method: {method}")
403
+
404
+ return {
405
+ "data": df_encoded,
406
+ "encoding_info": encoding_info,
407
+ "original_shape": df.shape,
408
+ "new_shape": df_encoded.shape,
409
+ }
410
+
411
+ except Exception as e:
412
+ self.logger.error(f"Error encoding features: {e}")
413
+ raise TransformationError(f"Feature encoding failed: {e}")
414
+
415
+ # Internal helper methods
416
+
417
+ def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
418
+ """Convert data to DataFrame"""
419
+ if isinstance(data, pd.DataFrame):
420
+ return data
421
+ elif isinstance(data, list):
422
+ return pd.DataFrame(data)
423
+ elif isinstance(data, dict):
424
+ return pd.DataFrame([data])
425
+ else:
426
+ raise TransformationError(f"Unsupported data type: {type(data)}")
427
+
428
+ def _apply_single_transformation(
429
+ self,
430
+ df: pd.DataFrame,
431
+ trans_type: str,
432
+ columns: Optional[List[str]],
433
+ params: Dict[str, Any],
434
+ ) -> pd.DataFrame:
435
+ """Apply a single transformation"""
436
+ if trans_type == TransformationType.REMOVE_DUPLICATES.value:
437
+ return df.drop_duplicates()
438
+
439
+ elif trans_type == TransformationType.FILL_MISSING.value:
440
+ strategy = params.get("strategy", "mean")
441
+ for col in columns or df.columns:
442
+ if df[col].isnull().any():
443
+ if strategy == "mean" and df[col].dtype in [
444
+ "int64",
445
+ "float64",
446
+ ]:
447
+ df[col].fillna(df[col].mean(), inplace=True)
448
+ elif strategy == "median" and df[col].dtype in [
449
+ "int64",
450
+ "float64",
451
+ ]:
452
+ df[col].fillna(df[col].median(), inplace=True)
453
+ elif strategy == "mode":
454
+ if not df[col].mode().empty:
455
+ df[col].fillna(df[col].mode()[0], inplace=True)
456
+ return df
457
+
458
+ elif trans_type == TransformationType.REMOVE_OUTLIERS.value:
459
+ for col in columns or df.select_dtypes(include=[np.number]).columns:
460
+ if df[col].std() > 0:
461
+ z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
462
+ df = df[z_scores < self.config.outlier_std_threshold]
463
+ return df
464
+
465
+ elif trans_type == TransformationType.STANDARDIZE.value:
466
+ scaler = StandardScaler()
467
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
468
+ df[cols] = scaler.fit_transform(df[cols])
469
+ return df
470
+
471
+ elif trans_type == TransformationType.NORMALIZE.value:
472
+ scaler = MinMaxScaler()
473
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
474
+ df[cols] = scaler.fit_transform(df[cols])
475
+ return df
476
+
477
+ elif trans_type == TransformationType.LOG_TRANSFORM.value:
478
+ cols = columns or df.select_dtypes(include=[np.number]).columns.tolist()
479
+ for col in cols:
480
+ if (df[col] > 0).all():
481
+ df[col] = np.log(df[col])
482
+ return df
483
+
484
+ elif trans_type == TransformationType.ONE_HOT_ENCODE.value:
485
+ cols = columns or df.select_dtypes(include=["object"]).columns.tolist()
486
+ return pd.get_dummies(df, columns=cols)
487
+
488
+ elif trans_type == TransformationType.LABEL_ENCODE.value:
489
+ cols = columns or df.select_dtypes(include=["object"]).columns.tolist()
490
+ for col in cols:
491
+ le = LabelEncoder()
492
+ df[col] = le.fit_transform(df[col].astype(str))
493
+ return df
494
+
495
+ else:
496
+ self.logger.warning(f"Transformation type {trans_type} not implemented, skipping")
497
+ return df
498
+
499
+ def _determine_transformations(
500
+ self,
501
+ df: pd.DataFrame,
502
+ target_column: Optional[str],
503
+ task_type: Optional[str],
504
+ ) -> List[Dict[str, Any]]:
505
+ """Determine transformations needed for data"""
506
+ transformations: List[Dict[str, Any]] = []
507
+
508
+ # Remove duplicates if present
509
+ if df.duplicated().sum() > 0:
510
+ transformations.append(
511
+ {
512
+ "type": TransformationType.REMOVE_DUPLICATES.value,
513
+ "columns": None,
514
+ "params": {},
515
+ }
516
+ )
517
+
518
+ # Handle missing values
519
+ if df.isnull().sum().sum() > 0:
520
+ transformations.append(
521
+ {
522
+ "type": TransformationType.FILL_MISSING.value,
523
+ "columns": None,
524
+ "params": {"strategy": "mean"},
525
+ }
526
+ )
527
+
528
+ # Encode categorical variables
529
+ categorical_cols = df.select_dtypes(include=["object"]).columns.tolist()
530
+ if target_column and target_column in categorical_cols:
531
+ categorical_cols.remove(target_column)
532
+
533
+ if len(categorical_cols) > 0:
534
+ # Use label encoding if too many categories, otherwise one-hot
535
+ for col in categorical_cols:
536
+ if df[col].nunique() > self.config.max_one_hot_categories:
537
+ transformations.append(
538
+ {
539
+ "type": TransformationType.LABEL_ENCODE.value,
540
+ "columns": [col],
541
+ "params": {},
542
+ }
543
+ )
544
+ else:
545
+ transformations.append(
546
+ {
547
+ "type": TransformationType.ONE_HOT_ENCODE.value,
548
+ "columns": [col],
549
+ "params": {},
550
+ }
551
+ )
552
+
553
+ # Standardize numeric features
554
+ numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
555
+ if target_column and target_column in numeric_cols:
556
+ numeric_cols.remove(target_column)
557
+
558
+ if len(numeric_cols) > 0:
559
+ transformations.append(
560
+ {
561
+ "type": TransformationType.STANDARDIZE.value,
562
+ "columns": numeric_cols,
563
+ "params": {},
564
+ }
565
+ )
566
+
567
+ return transformations
568
+
569
+ def _calculate_quality_improvement(self, original_df: pd.DataFrame, transformed_df: pd.DataFrame) -> Dict[str, Any]:
570
+ """Calculate quality improvement metrics"""
571
+ return {
572
+ "missing_before": int(original_df.isnull().sum().sum()),
573
+ "missing_after": int(transformed_df.isnull().sum().sum()),
574
+ "duplicates_before": int(original_df.duplicated().sum()),
575
+ "duplicates_after": int(transformed_df.duplicated().sum()),
576
+ "rows_before": len(original_df),
577
+ "rows_after": len(transformed_df),
578
+ "columns_before": len(original_df.columns),
579
+ "columns_after": len(transformed_df.columns),
580
+ }