aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +435 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3949 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1731 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +894 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +377 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +230 -37
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +328 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +415 -0
  199. aiecs/llm/clients/googleai_client.py +314 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +1186 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1464 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1016 -0
  271. aiecs/tools/docs/document_writer_tool.py +2008 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +220 -141
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
  321. aiecs-1.7.17.dist-info/RECORD +337 -0
  322. aiecs-1.7.17.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,555 @@
1
+ """
2
+ Data Loader Tool - Universal data loading from multiple file formats
3
+
4
+ This tool provides comprehensive data loading capabilities with:
5
+ - Auto-detection of file formats
6
+ - Multiple loading strategies (full, streaming, chunked, lazy)
7
+ - Data quality validation on load
8
+ - Schema inference and validation
9
+ - Support for CSV, Excel, JSON, Parquet, and other formats
10
+ """
11
+
12
+ import os
13
+ import logging
14
+ from typing import Dict, Any, List, Optional, Union, Iterator
15
+ from enum import Enum
16
+ from pathlib import Path
17
+
18
+ import pandas as pd # type: ignore[import-untyped]
19
+ from pydantic import BaseModel, Field
20
+ from pydantic_settings import BaseSettings, SettingsConfigDict
21
+
22
+ from aiecs.tools.base_tool import BaseTool
23
+ from aiecs.tools import register_tool
24
+
25
+
26
+ class DataSourceType(str, Enum):
27
+ """Supported data source types"""
28
+
29
+ CSV = "csv"
30
+ EXCEL = "excel"
31
+ JSON = "json"
32
+ PARQUET = "parquet"
33
+ FEATHER = "feather"
34
+ HDF5 = "hdf5"
35
+ STATA = "stata"
36
+ SAS = "sas"
37
+ SPSS = "spss"
38
+ AUTO = "auto"
39
+
40
+
41
+ class LoadStrategy(str, Enum):
42
+ """Data loading strategies"""
43
+
44
+ FULL_LOAD = "full_load"
45
+ STREAMING = "streaming"
46
+ CHUNKED = "chunked"
47
+ LAZY = "lazy"
48
+ INCREMENTAL = "incremental"
49
+
50
+
51
+ class DataLoaderError(Exception):
52
+ """Base exception for DataLoader errors"""
53
+
54
+
55
+ class FileFormatError(DataLoaderError):
56
+ """Raised when file format is unsupported or invalid"""
57
+
58
+
59
+ class SchemaValidationError(DataLoaderError):
60
+ """Raised when schema validation fails"""
61
+
62
+
63
+ class DataQualityError(DataLoaderError):
64
+ """Raised when data quality issues are detected"""
65
+
66
+
67
+ @register_tool("data_loader")
68
+ class DataLoaderTool(BaseTool):
69
+ """
70
+ Universal data loading tool that can:
71
+ 1. Load data from multiple file formats
72
+ 2. Auto-detect data formats and schemas
73
+ 3. Handle large datasets with streaming
74
+ 4. Validate data quality on load
75
+
76
+ Integrates with pandas_tool for core data operations.
77
+ """
78
+
79
+ # Configuration schema
80
+ class Config(BaseSettings):
81
+ """Configuration for the data loader tool
82
+
83
+ Automatically reads from environment variables with DATA_LOADER_ prefix.
84
+ Example: DATA_LOADER_MAX_FILE_SIZE_MB -> max_file_size_mb
85
+ """
86
+
87
+ model_config = SettingsConfigDict(env_prefix="DATA_LOADER_")
88
+
89
+ max_file_size_mb: int = Field(default=500, description="Maximum file size in megabytes")
90
+ default_chunk_size: int = Field(default=10000, description="Default chunk size for chunked loading")
91
+ max_memory_usage_mb: int = Field(default=2000, description="Maximum memory usage in megabytes")
92
+ enable_schema_inference: bool = Field(
93
+ default=True,
94
+ description="Whether to enable automatic schema inference",
95
+ )
96
+ enable_quality_validation: bool = Field(
97
+ default=True,
98
+ description="Whether to enable data quality validation",
99
+ )
100
+ default_encoding: str = Field(
101
+ default="utf-8",
102
+ description="Default text encoding for file operations",
103
+ )
104
+
105
+ def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
106
+ """
107
+ Initialize DataLoaderTool with settings.
108
+
109
+ Configuration is automatically loaded by BaseTool from:
110
+ 1. Explicit config dict (highest priority)
111
+ 2. YAML config files (config/tools/data_loader.yaml)
112
+ 3. Environment variables (via dotenv from .env files)
113
+ 4. Tool defaults (lowest priority)
114
+
115
+ Args:
116
+ config: Optional configuration overrides
117
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
118
+ """
119
+ super().__init__(config, **kwargs)
120
+
121
+ # Configuration is automatically loaded by BaseTool into self._config_obj
122
+ # Access config via self._config_obj (BaseSettings instance)
123
+ self.config = self._config_obj if self._config_obj else self.Config()
124
+
125
+ self.logger = logging.getLogger(__name__)
126
+ if not self.logger.handlers:
127
+ handler = logging.StreamHandler()
128
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
129
+ self.logger.addHandler(handler)
130
+ self.logger.setLevel(logging.INFO)
131
+
132
+ # Initialize external tools
133
+ self._init_external_tools()
134
+
135
+ def _init_external_tools(self):
136
+ """Initialize external task tools"""
137
+ self.external_tools = {}
138
+
139
+ # Initialize PandasTool for data operations
140
+ try:
141
+ from aiecs.tools.task_tools.pandas_tool import PandasTool
142
+
143
+ self.external_tools["pandas"] = PandasTool()
144
+ self.logger.info("PandasTool initialized successfully")
145
+ except ImportError:
146
+ self.logger.warning("PandasTool not available")
147
+ self.external_tools["pandas"] = None
148
+
149
+ # Schema definitions
150
+ class Load_dataSchema(BaseModel):
151
+ """Schema for load_data operation"""
152
+
153
+ source: str = Field(description="Path to data source file")
154
+ source_type: Optional[DataSourceType] = Field(default=DataSourceType.AUTO, description="Data source type")
155
+ strategy: LoadStrategy = Field(default=LoadStrategy.FULL_LOAD, description="Loading strategy")
156
+ data_schema: Optional[Dict[str, Any]] = Field(default=None, description="Expected schema for validation")
157
+ validation_rules: Optional[Dict[str, Any]] = Field(default=None, description="Data quality validation rules")
158
+ nrows: Optional[int] = Field(default=None, description="Number of rows to load")
159
+ chunk_size: Optional[int] = Field(default=None, description="Chunk size for chunked loading")
160
+ encoding: Optional[str] = Field(default=None, description="File encoding")
161
+
162
+ class Detect_formatSchema(BaseModel):
163
+ """Schema for detect_format operation"""
164
+
165
+ source: str = Field(description="Path to data source file")
166
+
167
+ class Validate_schemaSchema(BaseModel):
168
+ """Schema for validate_schema operation"""
169
+
170
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to validate")
171
+ data_schema: Dict[str, Any] = Field(description="Expected schema")
172
+
173
+ class Stream_dataSchema(BaseModel):
174
+ """Schema for stream_data operation"""
175
+
176
+ source: str = Field(description="Path to data source file")
177
+ chunk_size: int = Field(default=10000, description="Chunk size for streaming")
178
+ source_type: Optional[DataSourceType] = Field(default=DataSourceType.AUTO, description="Data source type")
179
+
180
+ def load_data(
181
+ self,
182
+ source: str,
183
+ source_type: DataSourceType = DataSourceType.AUTO,
184
+ strategy: LoadStrategy = LoadStrategy.FULL_LOAD,
185
+ schema: Optional[Dict[str, Any]] = None,
186
+ validation_rules: Optional[Dict[str, Any]] = None,
187
+ nrows: Optional[int] = None,
188
+ chunk_size: Optional[int] = None,
189
+ encoding: Optional[str] = None,
190
+ ) -> Dict[str, Any]:
191
+ """
192
+ Load data from source with automatic format detection.
193
+
194
+ Args:
195
+ source: Path to data source file
196
+ source_type: Type of data source (auto-detected if not specified)
197
+ strategy: Loading strategy to use
198
+ schema: Expected schema for validation
199
+ validation_rules: Data quality validation rules
200
+ nrows: Number of rows to load (None for all)
201
+ chunk_size: Chunk size for chunked loading
202
+ encoding: File encoding
203
+
204
+ Returns:
205
+ Dict containing:
206
+ - data: Loaded DataFrame or data structure
207
+ - metadata: Metadata about loaded data
208
+ - quality_report: Quality assessment results
209
+
210
+ Raises:
211
+ DataLoaderError: If loading fails
212
+ FileFormatError: If format is unsupported
213
+ """
214
+ try:
215
+ # Validate source exists
216
+ if not os.path.exists(source):
217
+ raise DataLoaderError(f"Source file not found: {source}")
218
+
219
+ # Detect format if auto
220
+ if source_type == DataSourceType.AUTO:
221
+ source_type = self._detect_format(source)
222
+
223
+ # Check file size
224
+ file_size_mb = os.path.getsize(source) / (1024 * 1024)
225
+ if file_size_mb > self.config.max_file_size_mb:
226
+ self.logger.warning(f"File size {file_size_mb:.2f}MB exceeds recommended limit")
227
+
228
+ # Load data based on strategy
229
+ if strategy == LoadStrategy.FULL_LOAD:
230
+ data = self._load_full(source, source_type, nrows, encoding)
231
+ elif strategy == LoadStrategy.CHUNKED:
232
+ data = self._load_chunked(
233
+ source,
234
+ source_type,
235
+ chunk_size or self.config.default_chunk_size,
236
+ encoding,
237
+ )
238
+ elif strategy == LoadStrategy.STREAMING:
239
+ data = self._load_streaming(
240
+ source,
241
+ source_type,
242
+ chunk_size or self.config.default_chunk_size,
243
+ encoding,
244
+ )
245
+ elif strategy == LoadStrategy.LAZY:
246
+ data = self._load_lazy(source, source_type, encoding)
247
+ else:
248
+ raise DataLoaderError(f"Unsupported loading strategy: {strategy}")
249
+
250
+ # Generate metadata
251
+ metadata = self._generate_metadata(data, source, source_type)
252
+
253
+ # Validate schema if provided
254
+ if schema and self.config.enable_schema_inference:
255
+ schema_valid = self._validate_schema_internal(data, schema)
256
+ metadata["schema_valid"] = schema_valid
257
+
258
+ # Validate quality if enabled
259
+ quality_report = {}
260
+ if self.config.enable_quality_validation and isinstance(data, pd.DataFrame):
261
+ quality_report = self._validate_quality(data, validation_rules)
262
+
263
+ self.logger.info(f"Successfully loaded data from {source}")
264
+
265
+ return {
266
+ "data": data,
267
+ "metadata": metadata,
268
+ "quality_report": quality_report,
269
+ "source": source,
270
+ "source_type": source_type.value,
271
+ "strategy": strategy.value,
272
+ }
273
+
274
+ except Exception as e:
275
+ self.logger.error(f"Error loading data from {source}: {e}")
276
+ raise DataLoaderError(f"Failed to load data: {e}")
277
+
278
+ def detect_format(self, source: str) -> Dict[str, Any]:
279
+ """
280
+ Detect file format from source.
281
+
282
+ Args:
283
+ source: Path to data source file
284
+
285
+ Returns:
286
+ Dict containing detected format information
287
+ """
288
+ try:
289
+ detected_type = self._detect_format(source)
290
+
291
+ return {
292
+ "source": source,
293
+ "detected_type": detected_type.value,
294
+ "file_extension": Path(source).suffix.lower(),
295
+ "confidence": "high",
296
+ }
297
+ except Exception as e:
298
+ self.logger.error(f"Error detecting format: {e}")
299
+ raise FileFormatError(f"Failed to detect format: {e}")
300
+
301
+ def validate_schema(
302
+ self,
303
+ data: Union[Dict[str, Any], List[Dict[str, Any]]],
304
+ schema: Dict[str, Any],
305
+ ) -> Dict[str, Any]:
306
+ """
307
+ Validate data against expected schema.
308
+
309
+ Args:
310
+ data: Data to validate
311
+ schema: Expected schema definition
312
+
313
+ Returns:
314
+ Dict containing validation results
315
+ """
316
+ try:
317
+ # Convert to DataFrame if needed
318
+ if isinstance(data, list):
319
+ df = pd.DataFrame(data)
320
+ elif isinstance(data, dict):
321
+ df = pd.DataFrame([data])
322
+ else:
323
+ df = data
324
+
325
+ is_valid = self._validate_schema_internal(df, schema)
326
+
327
+ issues = []
328
+ if not is_valid:
329
+ # Check column presence
330
+ expected_columns = set(schema.get("columns", {}).keys())
331
+ actual_columns = set(df.columns)
332
+ missing = expected_columns - actual_columns
333
+ extra = actual_columns - expected_columns
334
+
335
+ if missing:
336
+ issues.append(f"Missing columns: {missing}")
337
+ if extra:
338
+ issues.append(f"Extra columns: {extra}")
339
+
340
+ return {
341
+ "valid": is_valid,
342
+ "issues": issues,
343
+ "expected_columns": list(schema.get("columns", {}).keys()),
344
+ "actual_columns": list(df.columns),
345
+ }
346
+
347
+ except Exception as e:
348
+ self.logger.error(f"Error validating schema: {e}")
349
+ raise SchemaValidationError(f"Schema validation failed: {e}")
350
+
351
+ def stream_data(
352
+ self,
353
+ source: str,
354
+ chunk_size: int = 10000,
355
+ source_type: DataSourceType = DataSourceType.AUTO,
356
+ ) -> Dict[str, Any]:
357
+ """
358
+ Stream data in chunks for large files.
359
+
360
+ Args:
361
+ source: Path to data source file
362
+ chunk_size: Size of each chunk
363
+ source_type: Type of data source
364
+
365
+ Returns:
366
+ Dict containing streaming iterator information
367
+ """
368
+ try:
369
+ if source_type == DataSourceType.AUTO:
370
+ source_type = self._detect_format(source)
371
+
372
+ # Create iterator based on format
373
+ if source_type == DataSourceType.CSV:
374
+ iterator = pd.read_csv(source, chunksize=chunk_size)
375
+ elif source_type == DataSourceType.JSON:
376
+ iterator = pd.read_json(source, lines=True, chunksize=chunk_size)
377
+ else:
378
+ raise FileFormatError(f"Streaming not supported for format: {source_type}")
379
+
380
+ return {
381
+ "iterator": iterator,
382
+ "chunk_size": chunk_size,
383
+ "source_type": source_type.value,
384
+ "message": "Streaming iterator created successfully",
385
+ }
386
+
387
+ except Exception as e:
388
+ self.logger.error(f"Error creating stream: {e}")
389
+ raise DataLoaderError(f"Failed to create stream: {e}")
390
+
391
+ # Internal helper methods
392
+
393
+ def _detect_format(self, source: str) -> DataSourceType:
394
+ """Detect file format from extension"""
395
+ ext = Path(source).suffix.lower()
396
+
397
+ format_map = {
398
+ ".csv": DataSourceType.CSV,
399
+ ".xlsx": DataSourceType.EXCEL,
400
+ ".xls": DataSourceType.EXCEL,
401
+ ".json": DataSourceType.JSON,
402
+ ".parquet": DataSourceType.PARQUET,
403
+ ".feather": DataSourceType.FEATHER,
404
+ ".h5": DataSourceType.HDF5,
405
+ ".hdf": DataSourceType.HDF5,
406
+ ".dta": DataSourceType.STATA,
407
+ ".sas7bdat": DataSourceType.SAS,
408
+ ".sav": DataSourceType.SPSS,
409
+ }
410
+
411
+ detected = format_map.get(ext)
412
+ if not detected:
413
+ raise FileFormatError(f"Unsupported file format: {ext}")
414
+
415
+ return detected
416
+
417
+ def _load_full(
418
+ self,
419
+ source: str,
420
+ source_type: DataSourceType,
421
+ nrows: Optional[int],
422
+ encoding: Optional[str],
423
+ ) -> pd.DataFrame:
424
+ """Load entire dataset into memory"""
425
+ encoding = encoding or self.config.default_encoding
426
+
427
+ if source_type == DataSourceType.CSV:
428
+ return pd.read_csv(source, nrows=nrows, encoding=encoding)
429
+ elif source_type == DataSourceType.EXCEL:
430
+ return pd.read_excel(source, nrows=nrows)
431
+ elif source_type == DataSourceType.JSON:
432
+ return pd.read_json(source, nrows=nrows, encoding=encoding)
433
+ elif source_type == DataSourceType.PARQUET:
434
+ return pd.read_parquet(source)
435
+ elif source_type == DataSourceType.FEATHER:
436
+ return pd.read_feather(source)
437
+ elif source_type == DataSourceType.HDF5:
438
+ return pd.read_hdf(source)
439
+ elif source_type == DataSourceType.STATA:
440
+ df = pd.read_stata(source)
441
+ if nrows:
442
+ return df.head(nrows)
443
+ return df
444
+ elif source_type == DataSourceType.SAS:
445
+ return pd.read_sas(source)
446
+ elif source_type == DataSourceType.SPSS:
447
+ try:
448
+ import pyreadstat # type: ignore[import-untyped]
449
+
450
+ df, meta = pyreadstat.read_sav(source)
451
+ return df
452
+ except ImportError:
453
+ raise DataLoaderError("pyreadstat required for SPSS files")
454
+ else:
455
+ raise FileFormatError(f"Unsupported format for full load: {source_type}")
456
+
457
+ def _load_chunked(
458
+ self,
459
+ source: str,
460
+ source_type: DataSourceType,
461
+ chunk_size: int,
462
+ encoding: Optional[str],
463
+ ) -> pd.DataFrame:
464
+ """Load data in chunks and combine"""
465
+ encoding = encoding or self.config.default_encoding
466
+ chunks = []
467
+
468
+ if source_type == DataSourceType.CSV:
469
+ for chunk in pd.read_csv(source, chunksize=chunk_size, encoding=encoding):
470
+ chunks.append(chunk)
471
+ elif source_type == DataSourceType.JSON:
472
+ for chunk in pd.read_json(source, lines=True, chunksize=chunk_size, encoding=encoding):
473
+ chunks.append(chunk)
474
+ else:
475
+ raise FileFormatError(f"Chunked loading not supported for: {source_type}")
476
+
477
+ return pd.concat(chunks, ignore_index=True)
478
+
479
+ def _load_streaming(
480
+ self,
481
+ source: str,
482
+ source_type: DataSourceType,
483
+ chunk_size: int,
484
+ encoding: Optional[str],
485
+ ) -> Iterator[pd.DataFrame]:
486
+ """Create streaming iterator"""
487
+ encoding = encoding or self.config.default_encoding
488
+
489
+ if source_type == DataSourceType.CSV:
490
+ return pd.read_csv(source, chunksize=chunk_size, encoding=encoding)
491
+ elif source_type == DataSourceType.JSON:
492
+ return pd.read_json(source, lines=True, chunksize=chunk_size, encoding=encoding)
493
+ else:
494
+ raise FileFormatError(f"Streaming not supported for: {source_type}")
495
+
496
+ def _load_lazy(self, source: str, source_type: DataSourceType, encoding: Optional[str]) -> Any:
497
+ """Create lazy loading wrapper"""
498
+ # For now, return full load with warning
499
+ self.logger.warning("Lazy loading not fully implemented, using full load")
500
+ return self._load_full(source, source_type, None, encoding)
501
+
502
+ def _generate_metadata(self, data: Any, source: str, source_type: DataSourceType) -> Dict[str, Any]:
503
+ """Generate metadata about loaded data"""
504
+ if isinstance(data, pd.DataFrame):
505
+ return {
506
+ "rows": len(data),
507
+ "columns": len(data.columns),
508
+ "column_names": list(data.columns),
509
+ "dtypes": {col: str(dtype) for col, dtype in data.dtypes.items()},
510
+ "memory_usage_mb": data.memory_usage(deep=True).sum() / (1024 * 1024),
511
+ "file_size_mb": os.path.getsize(source) / (1024 * 1024),
512
+ }
513
+ else:
514
+ return {
515
+ "type": str(type(data)),
516
+ "file_size_mb": os.path.getsize(source) / (1024 * 1024),
517
+ }
518
+
519
+ def _validate_schema_internal(self, data: pd.DataFrame, schema: Dict[str, Any]) -> bool:
520
+ """Internal schema validation"""
521
+ if "columns" not in schema:
522
+ return True
523
+
524
+ expected_columns = set(schema["columns"].keys())
525
+ actual_columns = set(data.columns)
526
+
527
+ return expected_columns.issubset(actual_columns)
528
+
529
+ def _validate_quality(self, data: pd.DataFrame, validation_rules: Optional[Dict[str, Any]]) -> Dict[str, Any]:
530
+ """Validate data quality"""
531
+ quality_report = {
532
+ "total_rows": len(data),
533
+ "total_columns": len(data.columns),
534
+ "missing_values": data.isnull().sum().to_dict(),
535
+ "duplicate_rows": data.duplicated().sum(),
536
+ "quality_score": 1.0,
537
+ }
538
+
539
+ # Calculate quality score
540
+ missing_ratio = data.isnull().sum().sum() / (len(data) * len(data.columns)) if len(data) > 0 else 0
541
+ duplicate_ratio = quality_report["duplicate_rows"] / len(data) if len(data) > 0 else 0
542
+
543
+ quality_score = 1.0 - (missing_ratio * 0.5 + duplicate_ratio * 0.5)
544
+ quality_report["quality_score"] = max(0.0, min(1.0, quality_score))
545
+
546
+ # Add issues list
547
+ issues = []
548
+ if missing_ratio > 0.1:
549
+ issues.append(f"High missing value ratio: {missing_ratio:.2%}")
550
+ if duplicate_ratio > 0.05:
551
+ issues.append(f"High duplicate ratio: {duplicate_ratio:.2%}")
552
+
553
+ quality_report["issues"] = issues
554
+
555
+ return quality_report