aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +435 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3949 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1731 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +894 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +377 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +230 -37
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +328 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +415 -0
  199. aiecs/llm/clients/googleai_client.py +314 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +1186 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1464 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1016 -0
  271. aiecs/tools/docs/document_writer_tool.py +2008 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +220 -141
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
  321. aiecs-1.7.17.dist-info/RECORD +337 -0
  322. aiecs-1.7.17.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,638 @@
1
+ """
2
+ Data Profiler Tool - Comprehensive data profiling and quality assessment
3
+
4
+ This tool provides advanced data profiling capabilities with:
5
+ - Statistical summaries and distributions
6
+ - Data quality issue detection
7
+ - Pattern and anomaly identification
8
+ - Preprocessing recommendations
9
+ - Column-level and dataset-level analysis
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Any, List, Optional, Union
14
+ from enum import Enum
15
+
16
+ import pandas as pd # type: ignore[import-untyped]
17
+ import numpy as np
18
+ from pydantic import BaseModel, Field
19
+ from pydantic_settings import BaseSettings, SettingsConfigDict
20
+
21
+ from aiecs.tools.base_tool import BaseTool
22
+ from aiecs.tools import register_tool
23
+
24
+
25
+ class ProfileLevel(str, Enum):
26
+ """Data profiling depth levels"""
27
+
28
+ BASIC = "basic"
29
+ STANDARD = "standard"
30
+ COMPREHENSIVE = "comprehensive"
31
+ DEEP = "deep"
32
+
33
+
34
+ class DataQualityCheck(str, Enum):
35
+ """Types of data quality checks"""
36
+
37
+ MISSING_VALUES = "missing_values"
38
+ DUPLICATES = "duplicates"
39
+ OUTLIERS = "outliers"
40
+ INCONSISTENCIES = "inconsistencies"
41
+ DATA_TYPES = "data_types"
42
+ DISTRIBUTIONS = "distributions"
43
+ CORRELATIONS = "correlations"
44
+
45
+
46
+ class DataProfilerError(Exception):
47
+ """Base exception for DataProfiler errors"""
48
+
49
+
50
+ class ProfilingError(DataProfilerError):
51
+ """Raised when profiling operation fails"""
52
+
53
+
54
+ @register_tool("data_profiler")
55
+ class DataProfilerTool(BaseTool):
56
+ """
57
+ Comprehensive data profiling tool that can:
58
+ 1. Generate statistical summaries
59
+ 2. Detect data quality issues
60
+ 3. Identify patterns and anomalies
61
+ 4. Recommend preprocessing steps
62
+
63
+ Integrates with stats_tool and pandas_tool for core operations.
64
+ """
65
+
66
+ # Configuration schema
67
+ class Config(BaseSettings):
68
+ """Configuration for the data profiler tool
69
+
70
+ Automatically reads from environment variables with DATA_PROFILER_ prefix.
71
+ Example: DATA_PROFILER_DEFAULT_PROFILE_LEVEL -> default_profile_level
72
+ """
73
+
74
+ model_config = SettingsConfigDict(env_prefix="DATA_PROFILER_")
75
+
76
+ default_profile_level: str = Field(default="standard", description="Default profiling depth level")
77
+ outlier_std_threshold: float = Field(
78
+ default=3.0,
79
+ description="Standard deviation threshold for outlier detection",
80
+ )
81
+ correlation_threshold: float = Field(
82
+ default=0.7,
83
+ description="Correlation threshold for identifying strong relationships",
84
+ )
85
+ missing_threshold: float = Field(
86
+ default=0.5,
87
+ description="Missing value threshold for quality assessment",
88
+ )
89
+ enable_visualizations: bool = Field(
90
+ default=True,
91
+ description="Whether to enable visualization generation",
92
+ )
93
+ max_unique_values_categorical: int = Field(
94
+ default=50,
95
+ description="Maximum unique values for categorical analysis",
96
+ )
97
+
98
+ def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
99
+ """
100
+ Initialize DataProfilerTool with settings.
101
+
102
+ Configuration is automatically loaded by BaseTool from:
103
+ 1. Explicit config dict (highest priority)
104
+ 2. YAML config files (config/tools/data_profiler.yaml)
105
+ 3. Environment variables (via dotenv from .env files)
106
+ 4. Tool defaults (lowest priority)
107
+
108
+ Args:
109
+ config: Optional configuration overrides
110
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
111
+ """
112
+ super().__init__(config, **kwargs)
113
+
114
+ # Configuration is automatically loaded by BaseTool into self._config_obj
115
+ # Access config via self._config_obj (BaseSettings instance)
116
+ self.config = self._config_obj if self._config_obj else self.Config()
117
+
118
+ self.logger = logging.getLogger(__name__)
119
+ if not self.logger.handlers:
120
+ handler = logging.StreamHandler()
121
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
122
+ self.logger.addHandler(handler)
123
+ self.logger.setLevel(logging.INFO)
124
+
125
+ # Initialize external tools
126
+ self._init_external_tools()
127
+
128
+ def _init_external_tools(self):
129
+ """Initialize external task tools"""
130
+ self.external_tools = {}
131
+
132
+ # Initialize StatsTool for statistical operations
133
+ try:
134
+ from aiecs.tools.task_tools.stats_tool import StatsTool
135
+
136
+ self.external_tools["stats"] = StatsTool()
137
+ self.logger.info("StatsTool initialized successfully")
138
+ except ImportError:
139
+ self.logger.warning("StatsTool not available")
140
+ self.external_tools["stats"] = None
141
+
142
+ # Initialize PandasTool for data operations
143
+ try:
144
+ from aiecs.tools.task_tools.pandas_tool import PandasTool
145
+
146
+ self.external_tools["pandas"] = PandasTool()
147
+ self.logger.info("PandasTool initialized successfully")
148
+ except ImportError:
149
+ self.logger.warning("PandasTool not available")
150
+ self.external_tools["pandas"] = None
151
+
152
+ # Schema definitions
153
+ class ProfileDatasetSchema(BaseModel):
154
+ """Schema for profile_dataset operation"""
155
+
156
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to profile")
157
+ level: ProfileLevel = Field(default=ProfileLevel.STANDARD, description="Profiling depth level")
158
+ checks: Optional[List[DataQualityCheck]] = Field(default=None, description="Specific quality checks to perform")
159
+ generate_visualizations: bool = Field(default=False, description="Generate visualization data")
160
+
161
+ class DetectQualityIssuesSchema(BaseModel):
162
+ """Schema for detect_quality_issues operation"""
163
+
164
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to check")
165
+ checks: Optional[List[DataQualityCheck]] = Field(default=None, description="Specific checks to perform")
166
+
167
+ class RecommendPreprocessingSchema(BaseModel):
168
+ """Schema for recommend_preprocessing operation"""
169
+
170
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to analyze")
171
+ target_column: Optional[str] = Field(default=None, description="Target column for ML tasks")
172
+
173
+ def profile_dataset(
174
+ self,
175
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
176
+ level: ProfileLevel = ProfileLevel.STANDARD,
177
+ checks: Optional[List[DataQualityCheck]] = None,
178
+ generate_visualizations: bool = False,
179
+ ) -> Dict[str, Any]:
180
+ """
181
+ Generate comprehensive data profile.
182
+
183
+ Args:
184
+ data: Data to profile (dict, list of dicts, or DataFrame)
185
+ level: Profiling depth level
186
+ checks: Specific quality checks to perform (all if None)
187
+ generate_visualizations: Whether to generate visualization data
188
+
189
+ Returns:
190
+ Dict containing:
191
+ - summary: Dataset-level summary
192
+ - column_profiles: Column-level profiles
193
+ - quality_issues: Detected quality issues
194
+ - correlations: Correlation analysis
195
+ - recommendations: Preprocessing recommendations
196
+
197
+ Raises:
198
+ ProfilingError: If profiling fails
199
+ """
200
+ try:
201
+ # Convert to DataFrame if needed
202
+ df = self._to_dataframe(data)
203
+
204
+ self.logger.info(f"Profiling dataset with {len(df)} rows and {len(df.columns)} columns")
205
+
206
+ # Generate summary
207
+ summary = self._generate_summary(df)
208
+
209
+ # Generate column profiles
210
+ column_profiles = self._profile_columns(df, level)
211
+
212
+ # Detect quality issues
213
+ quality_issues = self._detect_quality_issues(df, checks)
214
+
215
+ # Correlation analysis (for comprehensive and deep levels)
216
+ correlations = {}
217
+ if level in [ProfileLevel.COMPREHENSIVE, ProfileLevel.DEEP]:
218
+ correlations = self._analyze_correlations(df)
219
+
220
+ # Generate recommendations
221
+ recommendations = self._generate_recommendations(df, quality_issues, level)
222
+
223
+ # Generate visualization data if requested
224
+ visualization_data = {}
225
+ if generate_visualizations:
226
+ visualization_data = self._generate_visualization_data(df)
227
+
228
+ result = {
229
+ "summary": summary,
230
+ "column_profiles": column_profiles,
231
+ "quality_issues": quality_issues,
232
+ "correlations": correlations,
233
+ "recommendations": recommendations,
234
+ "profile_level": level.value,
235
+ }
236
+
237
+ if visualization_data:
238
+ result["visualization_data"] = visualization_data
239
+
240
+ self.logger.info("Dataset profiling completed successfully")
241
+ return result
242
+
243
+ except Exception as e:
244
+ self.logger.error(f"Error profiling dataset: {e}")
245
+ raise ProfilingError(f"Failed to profile dataset: {e}")
246
+
247
+ def detect_quality_issues(
248
+ self,
249
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
250
+ checks: Optional[List[DataQualityCheck]] = None,
251
+ ) -> Dict[str, Any]:
252
+ """
253
+ Detect data quality issues.
254
+
255
+ Args:
256
+ data: Data to check
257
+ checks: Specific checks to perform (all if None)
258
+
259
+ Returns:
260
+ Dict containing detected issues by category
261
+ """
262
+ try:
263
+ df = self._to_dataframe(data)
264
+ issues = self._detect_quality_issues(df, checks)
265
+
266
+ return {
267
+ "issues": issues,
268
+ "total_issues": sum(len(v) for v in issues.values()),
269
+ "severity_counts": self._categorize_severity(issues),
270
+ }
271
+
272
+ except Exception as e:
273
+ self.logger.error(f"Error detecting quality issues: {e}")
274
+ raise ProfilingError(f"Failed to detect quality issues: {e}")
275
+
276
+ def recommend_preprocessing(
277
+ self,
278
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
279
+ target_column: Optional[str] = None,
280
+ ) -> Dict[str, Any]:
281
+ """
282
+ Recommend preprocessing steps based on data analysis.
283
+
284
+ Args:
285
+ data: Data to analyze
286
+ target_column: Target column for ML tasks (if applicable)
287
+
288
+ Returns:
289
+ Dict containing recommended preprocessing steps
290
+ """
291
+ try:
292
+ df = self._to_dataframe(data)
293
+
294
+ # Detect quality issues
295
+ quality_issues = self._detect_quality_issues(df, None)
296
+
297
+ # Generate recommendations
298
+ recommendations = self._generate_recommendations(df, quality_issues, ProfileLevel.COMPREHENSIVE)
299
+
300
+ # Add task-specific recommendations
301
+ if target_column and target_column in df.columns:
302
+ task_recommendations = self._generate_task_recommendations(df, target_column)
303
+ recommendations.extend(task_recommendations)
304
+
305
+ # Prioritize recommendations
306
+ prioritized = self._prioritize_recommendations(recommendations)
307
+
308
+ return {
309
+ "recommendations": prioritized,
310
+ "total_steps": len(prioritized),
311
+ "estimated_impact": "medium", # Placeholder for impact estimation
312
+ }
313
+
314
+ except Exception as e:
315
+ self.logger.error(f"Error generating recommendations: {e}")
316
+ raise ProfilingError(f"Failed to generate recommendations: {e}")
317
+
318
+ # Internal helper methods
319
+
320
+ def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
321
+ """Convert data to DataFrame"""
322
+ if isinstance(data, pd.DataFrame):
323
+ return data
324
+ elif isinstance(data, list):
325
+ return pd.DataFrame(data)
326
+ elif isinstance(data, dict):
327
+ return pd.DataFrame([data])
328
+ else:
329
+ raise ProfilingError(f"Unsupported data type: {type(data)}")
330
+
331
+ def _generate_summary(self, df: pd.DataFrame) -> Dict[str, Any]:
332
+ """Generate dataset-level summary"""
333
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
334
+ categorical_cols = df.select_dtypes(include=["object", "category"]).columns
335
+
336
+ return {
337
+ "rows": len(df),
338
+ "columns": len(df.columns),
339
+ "numeric_columns": len(numeric_cols),
340
+ "categorical_columns": len(categorical_cols),
341
+ "memory_usage_mb": df.memory_usage(deep=True).sum() / (1024 * 1024),
342
+ "missing_cells": df.isnull().sum().sum(),
343
+ "missing_percentage": ((df.isnull().sum().sum() / (len(df) * len(df.columns)) * 100) if len(df) > 0 else 0),
344
+ "duplicate_rows": df.duplicated().sum(),
345
+ "duplicate_percentage": ((df.duplicated().sum() / len(df) * 100) if len(df) > 0 else 0),
346
+ }
347
+
348
+ def _profile_columns(self, df: pd.DataFrame, level: ProfileLevel) -> Dict[str, Dict[str, Any]]:
349
+ """Generate column-level profiles"""
350
+ profiles = {}
351
+
352
+ for col in df.columns:
353
+ profile = {
354
+ "name": col,
355
+ "dtype": str(df[col].dtype),
356
+ "missing_count": df[col].isnull().sum(),
357
+ "missing_percentage": ((df[col].isnull().sum() / len(df) * 100) if len(df) > 0 else 0),
358
+ "unique_count": df[col].nunique(),
359
+ "unique_percentage": ((df[col].nunique() / len(df) * 100) if len(df) > 0 else 0),
360
+ }
361
+
362
+ # Add type-specific statistics
363
+ if df[col].dtype in ["int64", "float64"]:
364
+ profile.update(self._profile_numeric_column(df[col], level))
365
+ else:
366
+ profile.update(self._profile_categorical_column(df[col], level))
367
+
368
+ profiles[col] = profile
369
+
370
+ return profiles
371
+
372
+ def _profile_numeric_column(self, series: pd.Series, level: ProfileLevel) -> Dict[str, Any]:
373
+ """Profile numeric column"""
374
+ profile = {
375
+ "type": "numeric",
376
+ "min": float(series.min()) if not series.empty else None,
377
+ "max": float(series.max()) if not series.empty else None,
378
+ "mean": float(series.mean()) if not series.empty else None,
379
+ "median": float(series.median()) if not series.empty else None,
380
+ "std": float(series.std()) if not series.empty else None,
381
+ }
382
+
383
+ if level in [ProfileLevel.COMPREHENSIVE, ProfileLevel.DEEP]:
384
+ profile.update(
385
+ {
386
+ "q25": (float(series.quantile(0.25)) if not series.empty else None),
387
+ "q75": (float(series.quantile(0.75)) if not series.empty else None),
388
+ "skewness": (float(series.skew()) if not series.empty else None),
389
+ "kurtosis": (float(series.kurt()) if not series.empty else None),
390
+ }
391
+ )
392
+
393
+ # Detect outliers
394
+ if not series.empty and series.std() > 0:
395
+ z_scores = np.abs((series - series.mean()) / series.std())
396
+ outlier_count = (z_scores > self.config.outlier_std_threshold).sum()
397
+ profile["outlier_count"] = int(outlier_count)
398
+ profile["outlier_percentage"] = float(outlier_count / len(series) * 100)
399
+
400
+ return profile
401
+
402
+ def _profile_categorical_column(self, series: pd.Series, level: ProfileLevel) -> Dict[str, Any]:
403
+ """Profile categorical column"""
404
+ value_counts = series.value_counts()
405
+
406
+ profile = {
407
+ "type": "categorical",
408
+ "unique_values": int(series.nunique()),
409
+ "most_common": (str(value_counts.index[0]) if not value_counts.empty else None),
410
+ "most_common_count": (int(value_counts.iloc[0]) if not value_counts.empty else None),
411
+ }
412
+
413
+ if level in [ProfileLevel.COMPREHENSIVE, ProfileLevel.DEEP]:
414
+ # Add top categories
415
+ top_n = min(10, len(value_counts))
416
+ profile["top_categories"] = {str(k): int(v) for k, v in value_counts.head(top_n).items()}
417
+
418
+ return profile
419
+
420
+ def _detect_quality_issues(self, df: pd.DataFrame, checks: Optional[List[DataQualityCheck]]) -> Dict[str, List[Dict[str, Any]]]:
421
+ """Detect data quality issues"""
422
+ issues: Dict[str, List[Dict[str, Any]]] = {
423
+ "missing_values": [],
424
+ "duplicates": [],
425
+ "outliers": [],
426
+ "inconsistencies": [],
427
+ "data_types": [],
428
+ "distributions": [],
429
+ "correlations": [],
430
+ }
431
+
432
+ # All checks by default
433
+ if checks is None:
434
+ checks = list(DataQualityCheck)
435
+
436
+ # Missing values check
437
+ if DataQualityCheck.MISSING_VALUES in checks:
438
+ for col in df.columns:
439
+ missing_pct = (df[col].isnull().sum() / len(df) * 100) if len(df) > 0 else 0
440
+ if missing_pct > 0:
441
+ issues["missing_values"].append(
442
+ {
443
+ "column": col,
444
+ "missing_percentage": missing_pct,
445
+ "severity": ("high" if missing_pct > self.config.missing_threshold * 100 else "medium"),
446
+ }
447
+ )
448
+
449
+ # Duplicates check
450
+ if DataQualityCheck.DUPLICATES in checks:
451
+ dup_count = df.duplicated().sum()
452
+ if dup_count > 0:
453
+ issues["duplicates"].append(
454
+ {
455
+ "type": "row_duplicates",
456
+ "count": int(dup_count),
457
+ "percentage": (float(dup_count / len(df) * 100) if len(df) > 0 else 0),
458
+ "severity": "medium",
459
+ }
460
+ )
461
+
462
+ # Outliers check
463
+ if DataQualityCheck.OUTLIERS in checks:
464
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
465
+ for col in numeric_cols:
466
+ if df[col].std() > 0:
467
+ z_scores = np.abs((df[col] - df[col].mean()) / df[col].std())
468
+ outlier_count = (z_scores > self.config.outlier_std_threshold).sum()
469
+ if outlier_count > 0:
470
+ issues["outliers"].append(
471
+ {
472
+ "column": col,
473
+ "count": int(outlier_count),
474
+ "percentage": float(outlier_count / len(df) * 100),
475
+ "severity": "low",
476
+ }
477
+ )
478
+
479
+ return issues
480
+
481
+ def _analyze_correlations(self, df: pd.DataFrame) -> Dict[str, Any]:
482
+ """Analyze correlations between numeric columns"""
483
+ numeric_df = df.select_dtypes(include=[np.number])
484
+
485
+ if numeric_df.shape[1] < 2:
486
+ return {"message": "Insufficient numeric columns for correlation analysis"}
487
+
488
+ corr_matrix = numeric_df.corr()
489
+
490
+ # Find high correlations
491
+ high_corr_pairs = []
492
+ for i in range(len(corr_matrix.columns)):
493
+ for j in range(i + 1, len(corr_matrix.columns)):
494
+ corr_value = corr_matrix.iloc[i, j]
495
+ if abs(corr_value) > self.config.correlation_threshold:
496
+ high_corr_pairs.append(
497
+ {
498
+ "column1": corr_matrix.columns[i],
499
+ "column2": corr_matrix.columns[j],
500
+ "correlation": float(corr_value),
501
+ }
502
+ )
503
+
504
+ return {
505
+ "correlation_matrix": corr_matrix.to_dict(),
506
+ "high_correlations": high_corr_pairs,
507
+ "num_high_correlations": len(high_corr_pairs),
508
+ }
509
+
510
+ def _generate_recommendations(
511
+ self,
512
+ df: pd.DataFrame,
513
+ quality_issues: Dict[str, List],
514
+ level: ProfileLevel,
515
+ ) -> List[Dict[str, Any]]:
516
+ """Generate preprocessing recommendations"""
517
+ recommendations = []
518
+
519
+ # Missing value recommendations
520
+ for issue in quality_issues.get("missing_values", []):
521
+ if issue["missing_percentage"] < 5:
522
+ recommendations.append(
523
+ {
524
+ "action": "drop_missing_rows",
525
+ "column": issue["column"],
526
+ "reason": f"Low missing percentage ({issue['missing_percentage']:.2f}%)",
527
+ "priority": "medium",
528
+ }
529
+ )
530
+ elif issue["missing_percentage"] < 50:
531
+ recommendations.append(
532
+ {
533
+ "action": "impute_missing",
534
+ "column": issue["column"],
535
+ "method": ("mean" if df[issue["column"]].dtype in ["int64", "float64"] else "mode"),
536
+ "reason": f"Moderate missing percentage ({issue['missing_percentage']:.2f}%)",
537
+ "priority": "high",
538
+ }
539
+ )
540
+ else:
541
+ recommendations.append(
542
+ {
543
+ "action": "consider_dropping_column",
544
+ "column": issue["column"],
545
+ "reason": f"High missing percentage ({issue['missing_percentage']:.2f}%)",
546
+ "priority": "high",
547
+ }
548
+ )
549
+
550
+ # Duplicate recommendations
551
+ if quality_issues.get("duplicates"):
552
+ recommendations.append(
553
+ {
554
+ "action": "remove_duplicates",
555
+ "reason": f"{quality_issues['duplicates'][0]['count']} duplicate rows found",
556
+ "priority": "high",
557
+ }
558
+ )
559
+
560
+ # Outlier recommendations
561
+ if quality_issues.get("outliers"):
562
+ for issue in quality_issues["outliers"]:
563
+ if issue["percentage"] > 5:
564
+ recommendations.append(
565
+ {
566
+ "action": "handle_outliers",
567
+ "column": issue["column"],
568
+ "method": "winsorize or cap",
569
+ "reason": f"Significant outliers detected ({issue['percentage']:.2f}%)",
570
+ "priority": "medium",
571
+ }
572
+ )
573
+
574
+ return recommendations
575
+
576
+ def _generate_task_recommendations(self, df: pd.DataFrame, target_column: str) -> List[Dict[str, Any]]:
577
+ """Generate task-specific recommendations"""
578
+ recommendations = []
579
+
580
+ # Check if target is numeric or categorical
581
+ if df[target_column].dtype in ["int64", "float64"]:
582
+ task_type = "regression"
583
+ else:
584
+ task_type = "classification"
585
+
586
+ recommendations.append(
587
+ {
588
+ "action": "task_identified",
589
+ "task_type": task_type,
590
+ "target_column": target_column,
591
+ "reason": f"Based on target column type: {df[target_column].dtype}",
592
+ "priority": "info",
593
+ }
594
+ )
595
+
596
+ return recommendations
597
+
598
+ def _prioritize_recommendations(self, recommendations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
599
+ """Prioritize recommendations by importance"""
600
+ priority_order = {"high": 0, "medium": 1, "low": 2, "info": 3}
601
+ return sorted(
602
+ recommendations,
603
+ key=lambda x: priority_order.get(x.get("priority", "low"), 2),
604
+ )
605
+
606
+ def _categorize_severity(self, issues: Dict[str, List]) -> Dict[str, int]:
607
+ """Categorize issues by severity"""
608
+ severity_counts = {"high": 0, "medium": 0, "low": 0}
609
+
610
+ for issue_list in issues.values():
611
+ for issue in issue_list:
612
+ severity = issue.get("severity", "low")
613
+ severity_counts[severity] = severity_counts.get(severity, 0) + 1
614
+
615
+ return severity_counts
616
+
617
+ def _generate_visualization_data(self, df: pd.DataFrame) -> Dict[str, Any]:
618
+ """Generate data for visualizations"""
619
+ viz_data = {}
620
+
621
+ # Numeric distributions
622
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
623
+ if len(numeric_cols) > 0:
624
+ viz_data["numeric_distributions"] = {
625
+ col: {
626
+ # Sample for performance
627
+ "values": df[col].dropna().tolist()[:1000],
628
+ "bins": 30,
629
+ }
630
+ for col in numeric_cols[:5] # Limit to first 5
631
+ }
632
+
633
+ # Categorical distributions
634
+ categorical_cols = df.select_dtypes(include=["object", "category"]).columns
635
+ if len(categorical_cols) > 0:
636
+ viz_data["categorical_distributions"] = {col: df[col].value_counts().head(10).to_dict() for col in categorical_cols[:5]} # Limit to first 5
637
+
638
+ return viz_data