aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -1,57 +1,35 @@
1
1
  from io import StringIO
2
- import pandas as pd
2
+ import pandas as pd # type: ignore[import-untyped]
3
3
  import numpy as np
4
- from typing import List, Dict, Union, Optional, Any
5
- from pydantic import BaseModel, ValidationError as PydanticValidationError, ConfigDict
4
+ from typing import List, Dict, Union, Optional, cast, Any
5
+ from pydantic import Field, BaseModel
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
6
7
  import logging
7
8
 
8
9
  from aiecs.tools.base_tool import BaseTool
9
10
  from aiecs.tools import register_tool
10
11
 
11
12
  # Custom exceptions
13
+
14
+
12
15
  class PandasToolError(Exception):
13
16
  """Base exception for PandasTool errors."""
14
- pass
17
+
15
18
 
16
19
  class InputValidationError(PandasToolError):
17
20
  """Input validation error."""
18
- pass
21
+
19
22
 
20
23
  class DataFrameError(PandasToolError):
21
24
  """DataFrame operation error."""
22
- pass
25
+
23
26
 
24
27
  class SecurityError(PandasToolError):
25
28
  """Security-related error."""
26
- pass
29
+
27
30
 
28
31
  class ValidationError(PandasToolError):
29
32
  """Validation error."""
30
- pass
31
-
32
- # Configuration for PandasTool
33
- class PandasToolConfig(BaseModel):
34
- """
35
- Configuration for PandasTool.
36
-
37
- Attributes:
38
- csv_delimiter (str): Delimiter for CSV files.
39
- encoding (str): Encoding for file operations.
40
- default_agg (Dict[str, str]): Default aggregation functions.
41
- chunk_size (int): Chunk size for large file processing.
42
- max_csv_size (int): Threshold for chunked CSV processing.
43
- allowed_file_extensions (List[str]): Allowed file extensions.
44
- env_prefix (str): Environment variable prefix.
45
- """
46
- csv_delimiter: str = ","
47
- encoding: str = "utf-8"
48
- default_agg: Dict[str, str] = {"numeric": "mean", "object": "count"}
49
- chunk_size: int = 10000
50
- max_csv_size: int = 1000000
51
- allowed_file_extensions: List[str] = ['.csv', '.xlsx', '.json']
52
- env_prefix: str = "PANDAS_TOOL_"
53
-
54
- model_config = ConfigDict(env_prefix="PANDAS_TOOL_")
55
33
 
56
34
 
57
35
  @register_tool("pandas")
@@ -72,27 +50,302 @@ class PandasTool(BaseTool):
72
50
 
73
51
  Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
74
52
  """
75
- def __init__(self, config: Optional[Dict] = None):
53
+
54
+ # Configuration schema
55
+ class Config(BaseSettings):
56
+ """Configuration for the pandas tool
57
+
58
+ Automatically reads from environment variables with PANDAS_TOOL_ prefix.
59
+ Example: PANDAS_TOOL_CSV_DELIMITER -> csv_delimiter
60
+ """
61
+
62
+ model_config = SettingsConfigDict(env_prefix="PANDAS_TOOL_")
63
+
64
+ csv_delimiter: str = Field(default=",", description="Delimiter for CSV files")
65
+ encoding: str = Field(default="utf-8", description="Encoding for file operations")
66
+ default_agg: Dict[str, str] = Field(
67
+ default={"numeric": "mean", "object": "count"},
68
+ description="Default aggregation functions",
69
+ )
70
+ chunk_size: int = Field(default=10000, description="Chunk size for large file processing")
71
+ max_csv_size: int = Field(default=1000000, description="Threshold for chunked CSV processing")
72
+ allowed_file_extensions: List[str] = Field(
73
+ default=[".csv", ".xlsx", ".json"],
74
+ description="Allowed file extensions",
75
+ )
76
+
77
+ # Schema definitions
78
+ class Read_csvSchema(BaseModel):
79
+ """Schema for read_csv operation"""
80
+
81
+ csv_str: str = Field(description="CSV string content to read into a DataFrame")
82
+
83
+ class Read_jsonSchema(BaseModel):
84
+ """Schema for read_json operation"""
85
+
86
+ json_str: str = Field(description="JSON string content to read into a DataFrame")
87
+
88
+ class Read_fileSchema(BaseModel):
89
+ """Schema for read_file operation"""
90
+
91
+ file_path: str = Field(description="Path to the file to read")
92
+ file_type: str = Field(default="csv", description="Type of file: 'csv', 'excel', or 'json'")
93
+
94
+ class Write_fileSchema(BaseModel):
95
+ """Schema for write_file operation"""
96
+
97
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) to write as DataFrame")
98
+ file_path: str = Field(description="Path where the file will be written")
99
+ file_type: str = Field(default="csv", description="Type of file to write: 'csv', 'excel', or 'json'")
100
+
101
+ class SummarySchema(BaseModel):
102
+ """Schema for summary operation"""
103
+
104
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
105
+
106
+ class DescribeSchema(BaseModel):
107
+ """Schema for describe operation"""
108
+
109
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
110
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to describe. If None, describes all columns")
111
+
112
+ class Value_countsSchema(BaseModel):
113
+ """Schema for value_counts operation"""
114
+
115
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
116
+ columns: List[str] = Field(description="List of column names for which to compute value counts")
117
+
118
+ class FilterSchema(BaseModel):
119
+ """Schema for filter operation"""
120
+
121
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
122
+ condition: str = Field(description="Query condition string to filter rows (e.g., 'age > 30')")
123
+
124
+ class Select_columnsSchema(BaseModel):
125
+ """Schema for select_columns operation"""
126
+
127
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
128
+ columns: List[str] = Field(description="List of column names to select from the DataFrame")
129
+
130
+ class Drop_columnsSchema(BaseModel):
131
+ """Schema for drop_columns operation"""
132
+
133
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
134
+ columns: List[str] = Field(description="List of column names to drop from the DataFrame")
135
+
136
+ class Drop_duplicatesSchema(BaseModel):
137
+ """Schema for drop_duplicates operation"""
138
+
139
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
140
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to consider when identifying duplicates. If None, considers all columns")
141
+
142
+ class DropnaSchema(BaseModel):
143
+ """Schema for dropna operation"""
144
+
145
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
146
+ axis: int = Field(default=0, description="Axis along which to drop missing values: 0 for rows, 1 for columns")
147
+ how: str = Field(default="any", description="How to determine if a row/column is dropped: 'any' drops if any value is missing, 'all' drops if all values are missing")
148
+
149
+ class GroupbySchema(BaseModel):
150
+ """Schema for groupby operation"""
151
+
152
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
153
+ by: List[str] = Field(description="List of column names to group by")
154
+ agg: Dict[str, str] = Field(description="Dictionary mapping column names to aggregation functions (e.g., {'age': 'mean', 'salary': 'sum'})")
155
+
156
+ class Pivot_tableSchema(BaseModel):
157
+ """Schema for pivot_table operation"""
158
+
159
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
160
+ values: List[str] = Field(description="List of column names to aggregate")
161
+ index: List[str] = Field(description="List of column names to use as row index")
162
+ columns: List[str] = Field(description="List of column names to use as column index")
163
+ aggfunc: str = Field(default="mean", description="Aggregation function to apply (e.g., 'mean', 'sum', 'count')")
164
+
165
+ class MergeSchema(BaseModel):
166
+ """Schema for merge operation"""
167
+
168
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the left DataFrame")
169
+ records_right: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the right DataFrame")
170
+ on: Union[str, List[str]] = Field(description="Column name(s) to join on. Can be a single string or list of strings")
171
+ join_type: str = Field(default="inner", description="Type of join: 'inner', 'left', 'right', or 'outer'")
172
+
173
+ class ConcatSchema(BaseModel):
174
+ """Schema for concat operation"""
175
+
176
+ records_list: List[List[Dict[str, Any]]] = Field(description="List of DataFrames (each as a list of dictionaries) to concatenate")
177
+ axis: int = Field(default=0, description="Axis along which to concatenate: 0 for rows (vertical), 1 for columns (horizontal)")
178
+
179
+ class Sort_valuesSchema(BaseModel):
180
+ """Schema for sort_values operation"""
181
+
182
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
183
+ sort_by: List[str] = Field(description="List of column names to sort by")
184
+ ascending: Union[bool, List[bool]] = Field(default=True, description="Whether to sort in ascending order. Can be a single boolean or list of booleans (one per column)")
185
+
186
+ class Rename_columnsSchema(BaseModel):
187
+ """Schema for rename_columns operation"""
188
+
189
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
190
+ mapping: Dict[str, str] = Field(description="Dictionary mapping old column names to new column names")
191
+
192
+ class Replace_valuesSchema(BaseModel):
193
+ """Schema for replace_values operation"""
194
+
195
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
196
+ to_replace: Dict[str, Any] = Field(description="Dictionary mapping values to replace to their replacement values")
197
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to apply replacement to. If None, applies to all columns")
198
+
199
+ class Fill_naSchema(BaseModel):
200
+ """Schema for fill_na operation"""
201
+
202
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
203
+ value: Union[str, int, float] = Field(description="Value to use for filling missing values")
204
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to fill. If None, fills all columns")
205
+
206
+ class AstypeSchema(BaseModel):
207
+ """Schema for astype operation"""
208
+
209
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
210
+ dtypes: Dict[str, str] = Field(description="Dictionary mapping column names to target data types (e.g., {'age': 'int64', 'name': 'string'})")
211
+
212
+ class ApplySchema(BaseModel):
213
+ """Schema for apply operation"""
214
+
215
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
216
+ func: str = Field(description="Name of the function to apply (e.g., 'upper', 'lower', 'strip', 'abs', 'round')")
217
+ columns: List[str] = Field(description="List of column names to apply the function to")
218
+ axis: int = Field(default=0, description="Axis along which to apply: 0 for columns, 1 for rows")
219
+
220
+ class MeltSchema(BaseModel):
221
+ """Schema for melt operation"""
222
+
223
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
224
+ id_vars: List[str] = Field(description="List of column names to use as identifier variables (kept as columns)")
225
+ value_vars: List[str] = Field(description="List of column names to unpivot (melted into rows)")
226
+
227
+ class PivotSchema(BaseModel):
228
+ """Schema for pivot operation"""
229
+
230
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
231
+ index: str = Field(description="Column name to use as row index")
232
+ columns: str = Field(description="Column name to use as column index")
233
+ values: str = Field(description="Column name containing values to pivot")
234
+
235
+ class StackSchema(BaseModel):
236
+ """Schema for stack operation"""
237
+
238
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
239
+
240
+ class UnstackSchema(BaseModel):
241
+ """Schema for unstack operation"""
242
+
243
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
244
+ level: Union[int, str] = Field(default=-1, description="Level to unstack: integer index or column name. Default is -1 (last level)")
245
+
246
+ class Strip_stringsSchema(BaseModel):
247
+ """Schema for strip_strings operation"""
248
+
249
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
250
+ columns: List[str] = Field(description="List of string column names to strip whitespace from")
251
+
252
+ class To_numericSchema(BaseModel):
253
+ """Schema for to_numeric operation"""
254
+
255
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
256
+ columns: List[str] = Field(description="List of column names to convert to numeric type")
257
+
258
+ class To_datetimeSchema(BaseModel):
259
+ """Schema for to_datetime operation"""
260
+
261
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
262
+ columns: List[str] = Field(description="List of column names to convert to datetime type")
263
+ format: Optional[str] = Field(default=None, description="Optional datetime format string (e.g., '%Y-%m-%d'). If None, pandas will infer the format")
264
+
265
+ class MeanSchema(BaseModel):
266
+ """Schema for mean operation"""
267
+
268
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
269
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of numeric column names to compute mean for. If None, computes mean for all numeric columns")
270
+
271
+ class SumSchema(BaseModel):
272
+ """Schema for sum operation"""
273
+
274
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
275
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of numeric column names to compute sum for. If None, computes sum for all numeric columns")
276
+
277
+ class CountSchema(BaseModel):
278
+ """Schema for count operation"""
279
+
280
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
281
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to count non-null values for. If None, counts for all columns")
282
+
283
+ class MinSchema(BaseModel):
284
+ """Schema for min operation"""
285
+
286
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
287
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to compute minimum values for. If None, computes minimum for all columns")
288
+
289
+ class MaxSchema(BaseModel):
290
+ """Schema for max operation"""
291
+
292
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
293
+ columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to compute maximum values for. If None, computes maximum for all columns")
294
+
295
+ class RollingSchema(BaseModel):
296
+ """Schema for rolling operation"""
297
+
298
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
299
+ columns: List[str] = Field(description="List of numeric column names to apply rolling window function to")
300
+ window: int = Field(description="Size of the rolling window (number of rows)")
301
+ function: str = Field(default="mean", description="Rolling function to apply: 'mean', 'sum', 'min', 'max', 'std', 'count', or 'median'")
302
+
303
+ class HeadSchema(BaseModel):
304
+ """Schema for head operation"""
305
+
306
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
307
+ n: int = Field(default=5, description="Number of rows to return from the beginning of the DataFrame")
308
+
309
+ class TailSchema(BaseModel):
310
+ """Schema for tail operation"""
311
+
312
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
313
+ n: int = Field(default=5, description="Number of rows to return from the end of the DataFrame")
314
+
315
+ class SampleSchema(BaseModel):
316
+ """Schema for sample operation"""
317
+
318
+ records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
319
+ n: int = Field(default=5, description="Number of random rows to sample")
320
+ random_state: Optional[int] = Field(default=None, description="Optional random seed for reproducible sampling")
321
+
322
+ def __init__(self, config: Optional[Dict] = None, **kwargs):
76
323
  """
77
324
  Initialize PandasTool with configuration.
78
325
 
79
326
  Args:
80
- config (Dict, optional): Configuration overrides for PandasToolConfig.
327
+ config (Dict, optional): Configuration overrides for PandasTool.
328
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
81
329
 
82
330
  Raises:
83
331
  ValueError: If config is invalid.
332
+
333
+ Configuration is automatically loaded by BaseTool from:
334
+ 1. Explicit config dict (highest priority)
335
+ 2. YAML config files (config/tools/pandas.yaml)
336
+ 3. Environment variables (via dotenv from .env files)
337
+ 4. Tool defaults (lowest priority)
84
338
  """
85
- super().__init__(config)
86
- self.config = PandasToolConfig()
87
- if config:
88
- try:
89
- self.config = self.config.model_validate({**self.config.model_dump(), **config})
90
- except PydanticValidationError as e:
91
- raise ValueError(f"Invalid configuration: {e}")
339
+ super().__init__(config, **kwargs)
340
+
341
+ # Configuration is automatically loaded by BaseTool into self._config_obj
342
+ # Access config via self._config_obj (BaseSettings instance)
343
+ self.config = self._config_obj if self._config_obj else self.Config()
344
+
92
345
  self.logger = logging.getLogger(__name__)
93
346
  if not self.logger.handlers:
94
347
  handler = logging.StreamHandler()
95
- handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
348
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
96
349
  self.logger.addHandler(handler)
97
350
  self.logger.setLevel(logging.INFO)
98
351
 
@@ -135,9 +388,7 @@ class PandasTool(BaseTool):
135
388
  available_columns = set(df.columns)
136
389
  missing = [col for col in columns if col not in available_columns]
137
390
  if missing:
138
- raise InputValidationError(
139
- f"Columns not found: {missing}. Available columns: {list(available_columns)}"
140
- )
391
+ raise InputValidationError(f"Columns not found: {missing}. Available columns: {list(available_columns)}")
141
392
 
142
393
  def _to_json_serializable(self, result: Union[pd.DataFrame, pd.Series, Dict]) -> Union[List[Dict], Dict]:
143
394
  """
@@ -150,14 +401,15 @@ class PandasTool(BaseTool):
150
401
  Union[List[Dict], Dict]: JSON-serializable result.
151
402
  """
152
403
  if isinstance(result, pd.DataFrame):
153
- for col in result.select_dtypes(include=['datetime64']).columns:
154
- result[col] = result[col].dt.strftime('%Y-%m-%d %H:%M:%S')
404
+ for col in result.select_dtypes(include=["datetime64"]).columns:
405
+ result[col] = result[col].dt.strftime("%Y-%m-%d %H:%M:%S")
155
406
  return result.to_dict(orient="records")
156
407
  elif isinstance(result, pd.Series):
157
408
  if pd.api.types.is_datetime64_any_dtype(result):
158
- result = result.dt.strftime('%Y-%m-%d %H:%M:%S')
409
+ result = result.dt.strftime("%Y-%m-%d %H:%M:%S")
159
410
  return result.to_dict()
160
411
  elif isinstance(result, dict):
412
+
161
413
  def convert_value(v):
162
414
  if isinstance(v, (np.floating, np.integer)):
163
415
  return float(v)
@@ -170,6 +422,7 @@ class PandasTool(BaseTool):
170
422
  elif pd.isna(v):
171
423
  return None
172
424
  return v
425
+
173
426
  return {k: convert_value(v) for k, v in result.items()}
174
427
  return result
175
428
 
@@ -182,13 +435,18 @@ class PandasTool(BaseTool):
182
435
  StringIO(csv_str),
183
436
  sep=self.config.csv_delimiter,
184
437
  encoding=self.config.encoding,
185
- chunksize=self.config.chunk_size
438
+ chunksize=self.config.chunk_size,
186
439
  ):
187
440
  chunks.append(chunk)
188
441
  df = pd.concat(chunks)
189
442
  else:
190
- df = pd.read_csv(StringIO(csv_str), sep=self.config.csv_delimiter, encoding=self.config.encoding)
191
- return self._to_json_serializable(df)
443
+ df = pd.read_csv(
444
+ StringIO(csv_str),
445
+ sep=self.config.csv_delimiter,
446
+ encoding=self.config.encoding,
447
+ )
448
+ result = self._to_json_serializable(df)
449
+ return cast(List[Dict], result)
192
450
  except Exception as e:
193
451
  raise DataFrameError(f"Failed to read CSV: {e}")
194
452
 
@@ -196,7 +454,8 @@ class PandasTool(BaseTool):
196
454
  """Read JSON string into a DataFrame."""
197
455
  try:
198
456
  df = pd.read_json(StringIO(json_str))
199
- return self._to_json_serializable(df)
457
+ result = self._to_json_serializable(df)
458
+ return cast(List[Dict], result)
200
459
  except Exception as e:
201
460
  raise DataFrameError(f"Failed to read JSON: {e}")
202
461
 
@@ -204,26 +463,31 @@ class PandasTool(BaseTool):
204
463
  """Read data from a file (CSV, Excel, JSON)."""
205
464
  try:
206
465
  if file_type == "csv":
207
- file_size = sum(1 for _ in open(file_path, 'r', encoding=self.config.encoding))
466
+ file_size = sum(1 for _ in open(file_path, "r", encoding=self.config.encoding))
208
467
  if file_size > self.config.chunk_size:
209
468
  chunks = []
210
469
  for chunk in pd.read_csv(
211
470
  file_path,
212
471
  sep=self.config.csv_delimiter,
213
472
  encoding=self.config.encoding,
214
- chunksize=self.config.chunk_size
473
+ chunksize=self.config.chunk_size,
215
474
  ):
216
475
  chunks.append(chunk)
217
476
  df = pd.concat(chunks)
218
477
  else:
219
- df = pd.read_csv(file_path, sep=self.config.csv_delimiter, encoding=self.config.encoding)
478
+ df = pd.read_csv(
479
+ file_path,
480
+ sep=self.config.csv_delimiter,
481
+ encoding=self.config.encoding,
482
+ )
220
483
  elif file_type == "excel":
221
484
  df = pd.read_excel(file_path)
222
485
  elif file_type == "json":
223
486
  df = pd.read_json(file_path)
224
487
  else:
225
488
  raise ValidationError(f"Unsupported file type: {file_type}")
226
- return self._to_json_serializable(df)
489
+ result = self._to_json_serializable(df)
490
+ return cast(List[Dict], result)
227
491
  except ValidationError:
228
492
  raise
229
493
  except Exception as e:
@@ -234,7 +498,12 @@ class PandasTool(BaseTool):
234
498
  df = self._validate_df(records)
235
499
  try:
236
500
  if file_type == "csv":
237
- df.to_csv(file_path, index=False, sep=self.config.csv_delimiter, encoding=self.config.encoding)
501
+ df.to_csv(
502
+ file_path,
503
+ index=False,
504
+ sep=self.config.csv_delimiter,
505
+ encoding=self.config.encoding,
506
+ )
238
507
  elif file_type == "excel":
239
508
  df.to_excel(file_path, index=False)
240
509
  elif file_type == "json":
@@ -249,7 +518,8 @@ class PandasTool(BaseTool):
249
518
  """Compute summary statistics for DataFrame."""
250
519
  df = self._validate_df(records)
251
520
  desc = df.describe(include="all").to_dict()
252
- return self._to_json_serializable(desc)
521
+ result = self._to_json_serializable(desc)
522
+ return cast(Dict, result)
253
523
 
254
524
  def describe(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
255
525
  """Compute descriptive statistics for specified columns."""
@@ -258,21 +528,24 @@ class PandasTool(BaseTool):
258
528
  self._validate_columns(df, columns)
259
529
  df = df[columns]
260
530
  desc = df.describe().to_dict()
261
- return self._to_json_serializable(desc)
531
+ result = self._to_json_serializable(desc)
532
+ return cast(Dict, result)
262
533
 
263
534
  def value_counts(self, records: List[Dict], columns: List[str]) -> Dict:
264
535
  """Compute value counts for specified columns."""
265
536
  df = self._validate_df(records)
266
537
  self._validate_columns(df, columns)
267
538
  result = {col: df[col].value_counts().to_dict() for col in columns}
268
- return self._to_json_serializable(result)
539
+ converted = self._to_json_serializable(result)
540
+ return cast(Dict, converted)
269
541
 
270
542
  def filter(self, records: List[Dict], condition: str) -> List[Dict]:
271
543
  """Filter DataFrame based on a condition."""
272
544
  df = self._validate_df(records)
273
545
  try:
274
546
  df = df.query(condition, engine="python")
275
- return self._to_json_serializable(df)
547
+ result = self._to_json_serializable(df)
548
+ return cast(List[Dict], result)
276
549
  except Exception as e:
277
550
  raise DataFrameError(f"Invalid query condition: {e}")
278
551
 
@@ -280,27 +553,31 @@ class PandasTool(BaseTool):
280
553
  """Select specified columns from DataFrame."""
281
554
  df = self._validate_df(records)
282
555
  self._validate_columns(df, columns)
283
- return self._to_json_serializable(df[columns])
556
+ result = self._to_json_serializable(df[columns])
557
+ return cast(List[Dict], result)
284
558
 
285
559
  def drop_columns(self, records: List[Dict], columns: List[str]) -> List[Dict]:
286
560
  """Drop specified columns from DataFrame."""
287
561
  df = self._validate_df(records)
288
562
  self._validate_columns(df, columns)
289
- return self._to_json_serializable(df.drop(columns=columns))
563
+ result = self._to_json_serializable(df.drop(columns=columns))
564
+ return cast(List[Dict], result)
290
565
 
291
566
  def drop_duplicates(self, records: List[Dict], columns: Optional[List[str]] = None) -> List[Dict]:
292
567
  """Drop duplicate rows based on specified columns."""
293
568
  df = self._validate_df(records)
294
569
  if columns:
295
570
  self._validate_columns(df, columns)
296
- return self._to_json_serializable(df.drop_duplicates(subset=columns))
571
+ result = self._to_json_serializable(df.drop_duplicates(subset=columns))
572
+ return cast(List[Dict], result)
297
573
 
298
574
  def dropna(self, records: List[Dict], axis: int = 0, how: str = "any") -> List[Dict]:
299
575
  """Drop rows or columns with missing values."""
300
576
  df = self._validate_df(records)
301
577
  if how not in ["any", "all"]:
302
578
  raise ValidationError("how must be 'any' or 'all'")
303
- return self._to_json_serializable(df.dropna(axis=axis, how=how))
579
+ result = self._to_json_serializable(df.dropna(axis=axis, how=how))
580
+ return cast(List[Dict], result)
304
581
 
305
582
  def groupby(self, records: List[Dict], by: List[str], agg: Dict[str, str]) -> List[Dict]:
306
583
  """Group DataFrame and apply aggregations."""
@@ -308,21 +585,42 @@ class PandasTool(BaseTool):
308
585
  self._validate_columns(df, by + list(agg.keys()))
309
586
  try:
310
587
  df = df.groupby(by).agg(agg).reset_index()
311
- return self._to_json_serializable(df)
588
+ result = self._to_json_serializable(df)
589
+ return cast(List[Dict], result)
312
590
  except Exception as e:
313
591
  raise DataFrameError(f"Groupby failed: {e}")
314
592
 
315
- def pivot_table(self, records: List[Dict], values: List[str], index: List[str], columns: List[str], aggfunc: str = "mean") -> List[Dict]:
593
+ def pivot_table(
594
+ self,
595
+ records: List[Dict],
596
+ values: List[str],
597
+ index: List[str],
598
+ columns: List[str],
599
+ aggfunc: str = "mean",
600
+ ) -> List[Dict]:
316
601
  """Create a pivot table from DataFrame."""
317
602
  df = self._validate_df(records)
318
603
  self._validate_columns(df, values + index + columns)
319
604
  try:
320
- df = pd.pivot_table(df, values=values, index=index, columns=columns, aggfunc=aggfunc)
321
- return self._to_json_serializable(df.reset_index())
605
+ df = pd.pivot_table(
606
+ df,
607
+ values=values,
608
+ index=index,
609
+ columns=columns,
610
+ aggfunc=aggfunc,
611
+ )
612
+ result = self._to_json_serializable(df.reset_index())
613
+ return cast(List[Dict], result)
322
614
  except Exception as e:
323
615
  raise DataFrameError(f"Pivot table failed: {e}")
324
616
 
325
- def merge(self, records: List[Dict], records_right: List[Dict], on: Union[str, List[str]], join_type: str = "inner") -> List[Dict]:
617
+ def merge(
618
+ self,
619
+ records: List[Dict],
620
+ records_right: List[Dict],
621
+ on: Union[str, List[str]],
622
+ join_type: str = "inner",
623
+ ) -> List[Dict]:
326
624
  """Merge two DataFrames."""
327
625
  df_left = self._validate_df(records)
328
626
  df_right = self._validate_df(records_right)
@@ -332,7 +630,8 @@ class PandasTool(BaseTool):
332
630
  self._validate_columns(df_right, [on] if isinstance(on, str) else on)
333
631
  try:
334
632
  df = df_left.merge(df_right, on=on, how=join_type)
335
- return self._to_json_serializable(df)
633
+ result = self._to_json_serializable(df)
634
+ return cast(List[Dict], result)
336
635
  except Exception as e:
337
636
  raise DataFrameError(f"Merge failed: {e}")
338
637
 
@@ -343,17 +642,24 @@ class PandasTool(BaseTool):
343
642
  dfs = [self._validate_df(records) for records in records_list]
344
643
  try:
345
644
  df = pd.concat(dfs, axis=axis, ignore_index=True)
346
- return self._to_json_serializable(df)
645
+ result = self._to_json_serializable(df)
646
+ return cast(List[Dict], result)
347
647
  except Exception as e:
348
648
  raise DataFrameError(f"Concat failed: {e}")
349
649
 
350
- def sort_values(self, records: List[Dict], sort_by: List[str], ascending: Union[bool, List[bool]] = True) -> List[Dict]:
650
+ def sort_values(
651
+ self,
652
+ records: List[Dict],
653
+ sort_by: List[str],
654
+ ascending: Union[bool, List[bool]] = True,
655
+ ) -> List[Dict]:
351
656
  """Sort DataFrame by specified columns."""
352
657
  df = self._validate_df(records)
353
658
  self._validate_columns(df, sort_by)
354
659
  try:
355
660
  df = df.sort_values(by=sort_by, ascending=ascending)
356
- return self._to_json_serializable(df)
661
+ result = self._to_json_serializable(df)
662
+ return cast(List[Dict], result)
357
663
  except Exception as e:
358
664
  raise DataFrameError(f"Sort failed: {e}")
359
665
 
@@ -361,17 +667,29 @@ class PandasTool(BaseTool):
361
667
  """Rename DataFrame columns."""
362
668
  df = self._validate_df(records)
363
669
  self._validate_columns(df, list(mapping.keys()))
364
- return self._to_json_serializable(df.rename(columns=mapping))
365
-
366
- def replace_values(self, records: List[Dict], to_replace: Dict, columns: Optional[List[str]] = None) -> List[Dict]:
670
+ result = self._to_json_serializable(df.rename(columns=mapping))
671
+ return cast(List[Dict], result)
672
+
673
+ def replace_values(
674
+ self,
675
+ records: List[Dict],
676
+ to_replace: Dict,
677
+ columns: Optional[List[str]] = None,
678
+ ) -> List[Dict]:
367
679
  """Replace values in DataFrame."""
368
680
  df = self._validate_df(records)
369
681
  if columns:
370
682
  self._validate_columns(df, columns)
371
683
  df = df[columns]
372
- return self._to_json_serializable(df.replace(to_replace))
373
-
374
- def fill_na(self, records: List[Dict], value: Union[str, int, float], columns: Optional[List[str]] = None) -> List[Dict]:
684
+ result = self._to_json_serializable(df.replace(to_replace))
685
+ return cast(List[Dict], result)
686
+
687
+ def fill_na(
688
+ self,
689
+ records: List[Dict],
690
+ value: Union[str, int, float],
691
+ columns: Optional[List[str]] = None,
692
+ ) -> List[Dict]:
375
693
  """Fill missing values in DataFrame."""
376
694
  df = self._validate_df(records)
377
695
  if columns:
@@ -379,7 +697,8 @@ class PandasTool(BaseTool):
379
697
  df[columns] = df[columns].fillna(value)
380
698
  else:
381
699
  df = df.fillna(value)
382
- return self._to_json_serializable(df)
700
+ result = self._to_json_serializable(df)
701
+ return cast(List[Dict], result)
383
702
 
384
703
  def astype(self, records: List[Dict], dtypes: Dict[str, str]) -> List[Dict]:
385
704
  """Convert column types in DataFrame."""
@@ -387,7 +706,8 @@ class PandasTool(BaseTool):
387
706
  self._validate_columns(df, list(dtypes.keys()))
388
707
  try:
389
708
  df = df.astype(dtypes)
390
- return self._to_json_serializable(df)
709
+ result = self._to_json_serializable(df)
710
+ return cast(List[Dict], result)
391
711
  except Exception as e:
392
712
  raise DataFrameError(f"Type conversion failed: {e}")
393
713
 
@@ -396,24 +716,24 @@ class PandasTool(BaseTool):
396
716
  df = self._validate_df(records)
397
717
  self._validate_columns(df, columns)
398
718
  allowed_funcs = {
399
- 'upper': lambda x: x.upper() if isinstance(x, str) else x,
400
- 'lower': lambda x: x.lower() if isinstance(x, str) else x,
401
- 'strip': lambda x: x.strip() if isinstance(x, str) else x,
402
- 'capitalize': lambda x: x.capitalize() if isinstance(x, str) else x,
403
- 'title': lambda x: x.title() if isinstance(x, str) else x,
404
- 'len': lambda x: len(str(x)) if pd.notna(x) else 0,
405
- 'abs': lambda x: abs(float(x)) if pd.notna(x) and not isinstance(x, str) else x,
406
- 'round': lambda x: round(float(x)) if pd.notna(x) and not isinstance(x, str) else x,
407
- 'ceil': lambda x: np.ceil(float(x)) if pd.notna(x) and not isinstance(x, str) else x,
408
- 'floor': lambda x: np.floor(float(x)) if pd.notna(x) and not isinstance(x, str) else x,
409
- 'int': lambda x: int(float(x)) if pd.notna(x) and not isinstance(x, str) else None,
410
- 'float': lambda x: float(x) if pd.notna(x) and not isinstance(x, str) else None,
411
- 'str': lambda x: str(x) if pd.notna(x) else "",
412
- 'bool': lambda x: bool(x) if pd.notna(x) else False,
413
- 'date_only': lambda x: x.date() if isinstance(x, pd.Timestamp) else x,
414
- 'year': lambda x: x.year if isinstance(x, pd.Timestamp) else None,
415
- 'month': lambda x: x.month if isinstance(x, pd.Timestamp) else None,
416
- 'day': lambda x: x.day if isinstance(x, pd.Timestamp) else None,
719
+ "upper": lambda x: x.upper() if isinstance(x, str) else x,
720
+ "lower": lambda x: x.lower() if isinstance(x, str) else x,
721
+ "strip": lambda x: x.strip() if isinstance(x, str) else x,
722
+ "capitalize": lambda x: (x.capitalize() if isinstance(x, str) else x),
723
+ "title": lambda x: x.title() if isinstance(x, str) else x,
724
+ "len": lambda x: len(str(x)) if pd.notna(x) else 0,
725
+ "abs": lambda x: (abs(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
726
+ "round": lambda x: (round(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
727
+ "ceil": lambda x: (np.ceil(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
728
+ "floor": lambda x: (np.floor(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
729
+ "int": lambda x: (int(float(x)) if pd.notna(x) and not isinstance(x, str) else None),
730
+ "float": lambda x: (float(x) if pd.notna(x) and not isinstance(x, str) else None),
731
+ "str": lambda x: str(x) if pd.notna(x) else "",
732
+ "bool": lambda x: bool(x) if pd.notna(x) else False,
733
+ "date_only": lambda x: (x.date() if isinstance(x, pd.Timestamp) else x),
734
+ "year": lambda x: x.year if isinstance(x, pd.Timestamp) else None,
735
+ "month": lambda x: (x.month if isinstance(x, pd.Timestamp) else None),
736
+ "day": lambda x: x.day if isinstance(x, pd.Timestamp) else None,
417
737
  }
418
738
  try:
419
739
  if axis == 0:
@@ -421,7 +741,8 @@ class PandasTool(BaseTool):
421
741
  df[col] = df[col].apply(allowed_funcs[func])
422
742
  else:
423
743
  df[columns] = df[columns].apply(allowed_funcs[func], axis=1)
424
- return self._to_json_serializable(df)
744
+ result = self._to_json_serializable(df)
745
+ return cast(List[Dict], result)
425
746
  except Exception as e:
426
747
  raise DataFrameError(f"Apply failed: {e}")
427
748
 
@@ -431,7 +752,8 @@ class PandasTool(BaseTool):
431
752
  self._validate_columns(df, id_vars + value_vars)
432
753
  try:
433
754
  df = pd.melt(df, id_vars=id_vars, value_vars=value_vars)
434
- return self._to_json_serializable(df)
755
+ result = self._to_json_serializable(df)
756
+ return cast(List[Dict], result)
435
757
  except Exception as e:
436
758
  raise DataFrameError(f"Melt failed: {e}")
437
759
 
@@ -441,7 +763,11 @@ class PandasTool(BaseTool):
441
763
  self._validate_columns(df, [index, columns, values])
442
764
  try:
443
765
  df = df.pivot(index=index, columns=columns, values=values)
444
- return self._to_json_serializable(df.reset_index())
766
+ result = self._to_json_serializable(df.reset_index())
767
+ # Ensure we return a list
768
+ if isinstance(result, dict):
769
+ return [result]
770
+ return result
445
771
  except Exception as e:
446
772
  raise DataFrameError(f"Pivot failed: {e}")
447
773
 
@@ -450,7 +776,8 @@ class PandasTool(BaseTool):
450
776
  df = self._validate_df(records)
451
777
  try:
452
778
  df = df.stack().reset_index()
453
- return self._to_json_serializable(df)
779
+ result = self._to_json_serializable(df)
780
+ return cast(List[Dict], result)
454
781
  except Exception as e:
455
782
  raise DataFrameError(f"Stack failed: {e}")
456
783
 
@@ -459,7 +786,8 @@ class PandasTool(BaseTool):
459
786
  df = self._validate_df(records)
460
787
  try:
461
788
  df = df.unstack(level=level).reset_index()
462
- return self._to_json_serializable(df)
789
+ result = self._to_json_serializable(df)
790
+ return cast(List[Dict], result)
463
791
  except Exception as e:
464
792
  raise DataFrameError(f"Unstack failed: {e}")
465
793
 
@@ -470,7 +798,8 @@ class PandasTool(BaseTool):
470
798
  for col in columns:
471
799
  if df[col].dtype == "object":
472
800
  df[col] = df[col].str.strip()
473
- return self._to_json_serializable(df)
801
+ result = self._to_json_serializable(df)
802
+ return cast(List[Dict], result)
474
803
 
475
804
  def to_numeric(self, records: List[Dict], columns: List[str]) -> List[Dict]:
476
805
  """Convert columns to numeric type."""
@@ -479,18 +808,25 @@ class PandasTool(BaseTool):
479
808
  try:
480
809
  for col in columns:
481
810
  df[col] = pd.to_numeric(df[col], errors="coerce")
482
- return self._to_json_serializable(df)
811
+ result = self._to_json_serializable(df)
812
+ return cast(List[Dict], result)
483
813
  except Exception as e:
484
814
  raise DataFrameError(f"To numeric failed: {e}")
485
815
 
486
- def to_datetime(self, records: List[Dict], columns: List[str], format: Optional[str] = None) -> List[Dict]:
816
+ def to_datetime(
817
+ self,
818
+ records: List[Dict],
819
+ columns: List[str],
820
+ format: Optional[str] = None,
821
+ ) -> List[Dict]:
487
822
  """Convert columns to datetime type."""
488
823
  df = self._validate_df(records)
489
824
  self._validate_columns(df, columns)
490
825
  try:
491
826
  for col in columns:
492
827
  df[col] = pd.to_datetime(df[col], format=format, errors="coerce")
493
- return self._to_json_serializable(df)
828
+ result = self._to_json_serializable(df)
829
+ return cast(List[Dict], result)
494
830
  except Exception as e:
495
831
  raise DataFrameError(f"To datetime failed: {e}")
496
832
 
@@ -500,7 +836,8 @@ class PandasTool(BaseTool):
500
836
  if columns:
501
837
  self._validate_columns(df, columns)
502
838
  df = df[columns]
503
- return self._to_json_serializable(df.select_dtypes(include=np.number).mean())
839
+ result = self._to_json_serializable(df.select_dtypes(include=np.number).mean())
840
+ return cast(Dict, result)
504
841
 
505
842
  def sum(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
506
843
  """Compute sum of numeric columns."""
@@ -508,7 +845,8 @@ class PandasTool(BaseTool):
508
845
  if columns:
509
846
  self._validate_columns(df, columns)
510
847
  df = df[columns]
511
- return self._to_json_serializable(df.select_dtypes(include=np.number).sum())
848
+ result = self._to_json_serializable(df.select_dtypes(include=np.number).sum())
849
+ return cast(Dict, result)
512
850
 
513
851
  def count(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
514
852
  """Compute count of non-null values."""
@@ -516,7 +854,8 @@ class PandasTool(BaseTool):
516
854
  if columns:
517
855
  self._validate_columns(df, columns)
518
856
  df = df[columns]
519
- return self._to_json_serializable(df.count())
857
+ result = self._to_json_serializable(df.count())
858
+ return cast(Dict, result)
520
859
 
521
860
  def min(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
522
861
  """Compute minimum values."""
@@ -524,7 +863,8 @@ class PandasTool(BaseTool):
524
863
  if columns:
525
864
  self._validate_columns(df, columns)
526
865
  df = df[columns]
527
- return self._to_json_serializable(df.min())
866
+ result = self._to_json_serializable(df.min())
867
+ return cast(Dict, result)
528
868
 
529
869
  def max(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
530
870
  """Compute maximum values."""
@@ -532,9 +872,16 @@ class PandasTool(BaseTool):
532
872
  if columns:
533
873
  self._validate_columns(df, columns)
534
874
  df = df[columns]
535
- return self._to_json_serializable(df.max())
536
-
537
- def rolling(self, records: List[Dict], columns: List[str], window: int, function: str = "mean") -> List[Dict]:
875
+ result = self._to_json_serializable(df.max())
876
+ return cast(Dict, result)
877
+
878
+ def rolling(
879
+ self,
880
+ records: List[Dict],
881
+ columns: List[str],
882
+ window: int,
883
+ function: str = "mean",
884
+ ) -> List[Dict]:
538
885
  """Apply rolling window function to columns."""
539
886
  df = self._validate_df(records)
540
887
  self._validate_columns(df, columns)
@@ -545,21 +892,30 @@ class PandasTool(BaseTool):
545
892
  for col in columns:
546
893
  if pd.api.types.is_numeric_dtype(df[col]):
547
894
  df[f"{col}_{function}_{window}"] = getattr(df[col].rolling(window), function)()
548
- return self._to_json_serializable(df)
895
+ result = self._to_json_serializable(df)
896
+ return cast(List[Dict], result)
549
897
  except Exception as e:
550
898
  raise DataFrameError(f"Rolling operation failed: {e}")
551
899
 
552
900
  def head(self, records: List[Dict], n: int = 5) -> List[Dict]:
553
901
  """Return first n rows of DataFrame."""
554
902
  df = self._validate_df(records)
555
- return self._to_json_serializable(df.head(n))
903
+ result = self._to_json_serializable(df.head(n))
904
+ return cast(List[Dict], result)
556
905
 
557
906
  def tail(self, records: List[Dict], n: int = 5) -> List[Dict]:
558
907
  """Return last n rows of DataFrame."""
559
908
  df = self._validate_df(records)
560
- return self._to_json_serializable(df.tail(n))
909
+ result = self._to_json_serializable(df.tail(n))
910
+ return cast(List[Dict], result)
561
911
 
562
- def sample(self, records: List[Dict], n: int = 5, random_state: Optional[int] = None) -> List[Dict]:
912
+ def sample(
913
+ self,
914
+ records: List[Dict],
915
+ n: int = 5,
916
+ random_state: Optional[int] = None,
917
+ ) -> List[Dict]:
563
918
  """Return random sample of n rows from DataFrame."""
564
919
  df = self._validate_df(records)
565
- return self._to_json_serializable(df.sample(n=min(n, len(df)), random_state=random_state))
920
+ result = self._to_json_serializable(df.sample(n=min(n, len(df)), random_state=random_state))
921
+ return cast(List[Dict], result)