aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -1,57 +1,35 @@
|
|
|
1
1
|
from io import StringIO
|
|
2
|
-
import pandas as pd
|
|
2
|
+
import pandas as pd # type: ignore[import-untyped]
|
|
3
3
|
import numpy as np
|
|
4
|
-
from typing import List, Dict, Union, Optional, Any
|
|
5
|
-
from pydantic import
|
|
4
|
+
from typing import List, Dict, Union, Optional, cast, Any
|
|
5
|
+
from pydantic import Field, BaseModel
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
6
7
|
import logging
|
|
7
8
|
|
|
8
9
|
from aiecs.tools.base_tool import BaseTool
|
|
9
10
|
from aiecs.tools import register_tool
|
|
10
11
|
|
|
11
12
|
# Custom exceptions
|
|
13
|
+
|
|
14
|
+
|
|
12
15
|
class PandasToolError(Exception):
|
|
13
16
|
"""Base exception for PandasTool errors."""
|
|
14
|
-
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class InputValidationError(PandasToolError):
|
|
17
20
|
"""Input validation error."""
|
|
18
|
-
|
|
21
|
+
|
|
19
22
|
|
|
20
23
|
class DataFrameError(PandasToolError):
|
|
21
24
|
"""DataFrame operation error."""
|
|
22
|
-
|
|
25
|
+
|
|
23
26
|
|
|
24
27
|
class SecurityError(PandasToolError):
|
|
25
28
|
"""Security-related error."""
|
|
26
|
-
|
|
29
|
+
|
|
27
30
|
|
|
28
31
|
class ValidationError(PandasToolError):
|
|
29
32
|
"""Validation error."""
|
|
30
|
-
pass
|
|
31
|
-
|
|
32
|
-
# Configuration for PandasTool
|
|
33
|
-
class PandasToolConfig(BaseModel):
|
|
34
|
-
"""
|
|
35
|
-
Configuration for PandasTool.
|
|
36
|
-
|
|
37
|
-
Attributes:
|
|
38
|
-
csv_delimiter (str): Delimiter for CSV files.
|
|
39
|
-
encoding (str): Encoding for file operations.
|
|
40
|
-
default_agg (Dict[str, str]): Default aggregation functions.
|
|
41
|
-
chunk_size (int): Chunk size for large file processing.
|
|
42
|
-
max_csv_size (int): Threshold for chunked CSV processing.
|
|
43
|
-
allowed_file_extensions (List[str]): Allowed file extensions.
|
|
44
|
-
env_prefix (str): Environment variable prefix.
|
|
45
|
-
"""
|
|
46
|
-
csv_delimiter: str = ","
|
|
47
|
-
encoding: str = "utf-8"
|
|
48
|
-
default_agg: Dict[str, str] = {"numeric": "mean", "object": "count"}
|
|
49
|
-
chunk_size: int = 10000
|
|
50
|
-
max_csv_size: int = 1000000
|
|
51
|
-
allowed_file_extensions: List[str] = ['.csv', '.xlsx', '.json']
|
|
52
|
-
env_prefix: str = "PANDAS_TOOL_"
|
|
53
|
-
|
|
54
|
-
model_config = ConfigDict(env_prefix="PANDAS_TOOL_")
|
|
55
33
|
|
|
56
34
|
|
|
57
35
|
@register_tool("pandas")
|
|
@@ -72,27 +50,302 @@ class PandasTool(BaseTool):
|
|
|
72
50
|
|
|
73
51
|
Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
|
|
74
52
|
"""
|
|
75
|
-
|
|
53
|
+
|
|
54
|
+
# Configuration schema
|
|
55
|
+
class Config(BaseSettings):
|
|
56
|
+
"""Configuration for the pandas tool
|
|
57
|
+
|
|
58
|
+
Automatically reads from environment variables with PANDAS_TOOL_ prefix.
|
|
59
|
+
Example: PANDAS_TOOL_CSV_DELIMITER -> csv_delimiter
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
model_config = SettingsConfigDict(env_prefix="PANDAS_TOOL_")
|
|
63
|
+
|
|
64
|
+
csv_delimiter: str = Field(default=",", description="Delimiter for CSV files")
|
|
65
|
+
encoding: str = Field(default="utf-8", description="Encoding for file operations")
|
|
66
|
+
default_agg: Dict[str, str] = Field(
|
|
67
|
+
default={"numeric": "mean", "object": "count"},
|
|
68
|
+
description="Default aggregation functions",
|
|
69
|
+
)
|
|
70
|
+
chunk_size: int = Field(default=10000, description="Chunk size for large file processing")
|
|
71
|
+
max_csv_size: int = Field(default=1000000, description="Threshold for chunked CSV processing")
|
|
72
|
+
allowed_file_extensions: List[str] = Field(
|
|
73
|
+
default=[".csv", ".xlsx", ".json"],
|
|
74
|
+
description="Allowed file extensions",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Schema definitions
|
|
78
|
+
class Read_csvSchema(BaseModel):
|
|
79
|
+
"""Schema for read_csv operation"""
|
|
80
|
+
|
|
81
|
+
csv_str: str = Field(description="CSV string content to read into a DataFrame")
|
|
82
|
+
|
|
83
|
+
class Read_jsonSchema(BaseModel):
|
|
84
|
+
"""Schema for read_json operation"""
|
|
85
|
+
|
|
86
|
+
json_str: str = Field(description="JSON string content to read into a DataFrame")
|
|
87
|
+
|
|
88
|
+
class Read_fileSchema(BaseModel):
|
|
89
|
+
"""Schema for read_file operation"""
|
|
90
|
+
|
|
91
|
+
file_path: str = Field(description="Path to the file to read")
|
|
92
|
+
file_type: str = Field(default="csv", description="Type of file: 'csv', 'excel', or 'json'")
|
|
93
|
+
|
|
94
|
+
class Write_fileSchema(BaseModel):
|
|
95
|
+
"""Schema for write_file operation"""
|
|
96
|
+
|
|
97
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) to write as DataFrame")
|
|
98
|
+
file_path: str = Field(description="Path where the file will be written")
|
|
99
|
+
file_type: str = Field(default="csv", description="Type of file to write: 'csv', 'excel', or 'json'")
|
|
100
|
+
|
|
101
|
+
class SummarySchema(BaseModel):
|
|
102
|
+
"""Schema for summary operation"""
|
|
103
|
+
|
|
104
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
105
|
+
|
|
106
|
+
class DescribeSchema(BaseModel):
|
|
107
|
+
"""Schema for describe operation"""
|
|
108
|
+
|
|
109
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
110
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to describe. If None, describes all columns")
|
|
111
|
+
|
|
112
|
+
class Value_countsSchema(BaseModel):
|
|
113
|
+
"""Schema for value_counts operation"""
|
|
114
|
+
|
|
115
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
116
|
+
columns: List[str] = Field(description="List of column names for which to compute value counts")
|
|
117
|
+
|
|
118
|
+
class FilterSchema(BaseModel):
|
|
119
|
+
"""Schema for filter operation"""
|
|
120
|
+
|
|
121
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
122
|
+
condition: str = Field(description="Query condition string to filter rows (e.g., 'age > 30')")
|
|
123
|
+
|
|
124
|
+
class Select_columnsSchema(BaseModel):
|
|
125
|
+
"""Schema for select_columns operation"""
|
|
126
|
+
|
|
127
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
128
|
+
columns: List[str] = Field(description="List of column names to select from the DataFrame")
|
|
129
|
+
|
|
130
|
+
class Drop_columnsSchema(BaseModel):
|
|
131
|
+
"""Schema for drop_columns operation"""
|
|
132
|
+
|
|
133
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
134
|
+
columns: List[str] = Field(description="List of column names to drop from the DataFrame")
|
|
135
|
+
|
|
136
|
+
class Drop_duplicatesSchema(BaseModel):
|
|
137
|
+
"""Schema for drop_duplicates operation"""
|
|
138
|
+
|
|
139
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
140
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to consider when identifying duplicates. If None, considers all columns")
|
|
141
|
+
|
|
142
|
+
class DropnaSchema(BaseModel):
|
|
143
|
+
"""Schema for dropna operation"""
|
|
144
|
+
|
|
145
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
146
|
+
axis: int = Field(default=0, description="Axis along which to drop missing values: 0 for rows, 1 for columns")
|
|
147
|
+
how: str = Field(default="any", description="How to determine if a row/column is dropped: 'any' drops if any value is missing, 'all' drops if all values are missing")
|
|
148
|
+
|
|
149
|
+
class GroupbySchema(BaseModel):
|
|
150
|
+
"""Schema for groupby operation"""
|
|
151
|
+
|
|
152
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
153
|
+
by: List[str] = Field(description="List of column names to group by")
|
|
154
|
+
agg: Dict[str, str] = Field(description="Dictionary mapping column names to aggregation functions (e.g., {'age': 'mean', 'salary': 'sum'})")
|
|
155
|
+
|
|
156
|
+
class Pivot_tableSchema(BaseModel):
|
|
157
|
+
"""Schema for pivot_table operation"""
|
|
158
|
+
|
|
159
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
160
|
+
values: List[str] = Field(description="List of column names to aggregate")
|
|
161
|
+
index: List[str] = Field(description="List of column names to use as row index")
|
|
162
|
+
columns: List[str] = Field(description="List of column names to use as column index")
|
|
163
|
+
aggfunc: str = Field(default="mean", description="Aggregation function to apply (e.g., 'mean', 'sum', 'count')")
|
|
164
|
+
|
|
165
|
+
class MergeSchema(BaseModel):
|
|
166
|
+
"""Schema for merge operation"""
|
|
167
|
+
|
|
168
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the left DataFrame")
|
|
169
|
+
records_right: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the right DataFrame")
|
|
170
|
+
on: Union[str, List[str]] = Field(description="Column name(s) to join on. Can be a single string or list of strings")
|
|
171
|
+
join_type: str = Field(default="inner", description="Type of join: 'inner', 'left', 'right', or 'outer'")
|
|
172
|
+
|
|
173
|
+
class ConcatSchema(BaseModel):
|
|
174
|
+
"""Schema for concat operation"""
|
|
175
|
+
|
|
176
|
+
records_list: List[List[Dict[str, Any]]] = Field(description="List of DataFrames (each as a list of dictionaries) to concatenate")
|
|
177
|
+
axis: int = Field(default=0, description="Axis along which to concatenate: 0 for rows (vertical), 1 for columns (horizontal)")
|
|
178
|
+
|
|
179
|
+
class Sort_valuesSchema(BaseModel):
|
|
180
|
+
"""Schema for sort_values operation"""
|
|
181
|
+
|
|
182
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
183
|
+
sort_by: List[str] = Field(description="List of column names to sort by")
|
|
184
|
+
ascending: Union[bool, List[bool]] = Field(default=True, description="Whether to sort in ascending order. Can be a single boolean or list of booleans (one per column)")
|
|
185
|
+
|
|
186
|
+
class Rename_columnsSchema(BaseModel):
|
|
187
|
+
"""Schema for rename_columns operation"""
|
|
188
|
+
|
|
189
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
190
|
+
mapping: Dict[str, str] = Field(description="Dictionary mapping old column names to new column names")
|
|
191
|
+
|
|
192
|
+
class Replace_valuesSchema(BaseModel):
|
|
193
|
+
"""Schema for replace_values operation"""
|
|
194
|
+
|
|
195
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
196
|
+
to_replace: Dict[str, Any] = Field(description="Dictionary mapping values to replace to their replacement values")
|
|
197
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to apply replacement to. If None, applies to all columns")
|
|
198
|
+
|
|
199
|
+
class Fill_naSchema(BaseModel):
|
|
200
|
+
"""Schema for fill_na operation"""
|
|
201
|
+
|
|
202
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
203
|
+
value: Union[str, int, float] = Field(description="Value to use for filling missing values")
|
|
204
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to fill. If None, fills all columns")
|
|
205
|
+
|
|
206
|
+
class AstypeSchema(BaseModel):
|
|
207
|
+
"""Schema for astype operation"""
|
|
208
|
+
|
|
209
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
210
|
+
dtypes: Dict[str, str] = Field(description="Dictionary mapping column names to target data types (e.g., {'age': 'int64', 'name': 'string'})")
|
|
211
|
+
|
|
212
|
+
class ApplySchema(BaseModel):
|
|
213
|
+
"""Schema for apply operation"""
|
|
214
|
+
|
|
215
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
216
|
+
func: str = Field(description="Name of the function to apply (e.g., 'upper', 'lower', 'strip', 'abs', 'round')")
|
|
217
|
+
columns: List[str] = Field(description="List of column names to apply the function to")
|
|
218
|
+
axis: int = Field(default=0, description="Axis along which to apply: 0 for columns, 1 for rows")
|
|
219
|
+
|
|
220
|
+
class MeltSchema(BaseModel):
|
|
221
|
+
"""Schema for melt operation"""
|
|
222
|
+
|
|
223
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
224
|
+
id_vars: List[str] = Field(description="List of column names to use as identifier variables (kept as columns)")
|
|
225
|
+
value_vars: List[str] = Field(description="List of column names to unpivot (melted into rows)")
|
|
226
|
+
|
|
227
|
+
class PivotSchema(BaseModel):
|
|
228
|
+
"""Schema for pivot operation"""
|
|
229
|
+
|
|
230
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
231
|
+
index: str = Field(description="Column name to use as row index")
|
|
232
|
+
columns: str = Field(description="Column name to use as column index")
|
|
233
|
+
values: str = Field(description="Column name containing values to pivot")
|
|
234
|
+
|
|
235
|
+
class StackSchema(BaseModel):
|
|
236
|
+
"""Schema for stack operation"""
|
|
237
|
+
|
|
238
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
239
|
+
|
|
240
|
+
class UnstackSchema(BaseModel):
|
|
241
|
+
"""Schema for unstack operation"""
|
|
242
|
+
|
|
243
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
244
|
+
level: Union[int, str] = Field(default=-1, description="Level to unstack: integer index or column name. Default is -1 (last level)")
|
|
245
|
+
|
|
246
|
+
class Strip_stringsSchema(BaseModel):
|
|
247
|
+
"""Schema for strip_strings operation"""
|
|
248
|
+
|
|
249
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
250
|
+
columns: List[str] = Field(description="List of string column names to strip whitespace from")
|
|
251
|
+
|
|
252
|
+
class To_numericSchema(BaseModel):
|
|
253
|
+
"""Schema for to_numeric operation"""
|
|
254
|
+
|
|
255
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
256
|
+
columns: List[str] = Field(description="List of column names to convert to numeric type")
|
|
257
|
+
|
|
258
|
+
class To_datetimeSchema(BaseModel):
|
|
259
|
+
"""Schema for to_datetime operation"""
|
|
260
|
+
|
|
261
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
262
|
+
columns: List[str] = Field(description="List of column names to convert to datetime type")
|
|
263
|
+
format: Optional[str] = Field(default=None, description="Optional datetime format string (e.g., '%Y-%m-%d'). If None, pandas will infer the format")
|
|
264
|
+
|
|
265
|
+
class MeanSchema(BaseModel):
|
|
266
|
+
"""Schema for mean operation"""
|
|
267
|
+
|
|
268
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
269
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of numeric column names to compute mean for. If None, computes mean for all numeric columns")
|
|
270
|
+
|
|
271
|
+
class SumSchema(BaseModel):
|
|
272
|
+
"""Schema for sum operation"""
|
|
273
|
+
|
|
274
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
275
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of numeric column names to compute sum for. If None, computes sum for all numeric columns")
|
|
276
|
+
|
|
277
|
+
class CountSchema(BaseModel):
|
|
278
|
+
"""Schema for count operation"""
|
|
279
|
+
|
|
280
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
281
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to count non-null values for. If None, counts for all columns")
|
|
282
|
+
|
|
283
|
+
class MinSchema(BaseModel):
|
|
284
|
+
"""Schema for min operation"""
|
|
285
|
+
|
|
286
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
287
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to compute minimum values for. If None, computes minimum for all columns")
|
|
288
|
+
|
|
289
|
+
class MaxSchema(BaseModel):
|
|
290
|
+
"""Schema for max operation"""
|
|
291
|
+
|
|
292
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
293
|
+
columns: Optional[List[str]] = Field(default=None, description="Optional list of column names to compute maximum values for. If None, computes maximum for all columns")
|
|
294
|
+
|
|
295
|
+
class RollingSchema(BaseModel):
|
|
296
|
+
"""Schema for rolling operation"""
|
|
297
|
+
|
|
298
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
299
|
+
columns: List[str] = Field(description="List of numeric column names to apply rolling window function to")
|
|
300
|
+
window: int = Field(description="Size of the rolling window (number of rows)")
|
|
301
|
+
function: str = Field(default="mean", description="Rolling function to apply: 'mean', 'sum', 'min', 'max', 'std', 'count', or 'median'")
|
|
302
|
+
|
|
303
|
+
class HeadSchema(BaseModel):
|
|
304
|
+
"""Schema for head operation"""
|
|
305
|
+
|
|
306
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
307
|
+
n: int = Field(default=5, description="Number of rows to return from the beginning of the DataFrame")
|
|
308
|
+
|
|
309
|
+
class TailSchema(BaseModel):
|
|
310
|
+
"""Schema for tail operation"""
|
|
311
|
+
|
|
312
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
313
|
+
n: int = Field(default=5, description="Number of rows to return from the end of the DataFrame")
|
|
314
|
+
|
|
315
|
+
class SampleSchema(BaseModel):
|
|
316
|
+
"""Schema for sample operation"""
|
|
317
|
+
|
|
318
|
+
records: List[Dict[str, Any]] = Field(description="List of records (dictionaries) representing the DataFrame")
|
|
319
|
+
n: int = Field(default=5, description="Number of random rows to sample")
|
|
320
|
+
random_state: Optional[int] = Field(default=None, description="Optional random seed for reproducible sampling")
|
|
321
|
+
|
|
322
|
+
def __init__(self, config: Optional[Dict] = None, **kwargs):
|
|
76
323
|
"""
|
|
77
324
|
Initialize PandasTool with configuration.
|
|
78
325
|
|
|
79
326
|
Args:
|
|
80
|
-
config (Dict, optional): Configuration overrides for
|
|
327
|
+
config (Dict, optional): Configuration overrides for PandasTool.
|
|
328
|
+
**kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
|
|
81
329
|
|
|
82
330
|
Raises:
|
|
83
331
|
ValueError: If config is invalid.
|
|
332
|
+
|
|
333
|
+
Configuration is automatically loaded by BaseTool from:
|
|
334
|
+
1. Explicit config dict (highest priority)
|
|
335
|
+
2. YAML config files (config/tools/pandas.yaml)
|
|
336
|
+
3. Environment variables (via dotenv from .env files)
|
|
337
|
+
4. Tool defaults (lowest priority)
|
|
84
338
|
"""
|
|
85
|
-
super().__init__(config)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
raise ValueError(f"Invalid configuration: {e}")
|
|
339
|
+
super().__init__(config, **kwargs)
|
|
340
|
+
|
|
341
|
+
# Configuration is automatically loaded by BaseTool into self._config_obj
|
|
342
|
+
# Access config via self._config_obj (BaseSettings instance)
|
|
343
|
+
self.config = self._config_obj if self._config_obj else self.Config()
|
|
344
|
+
|
|
92
345
|
self.logger = logging.getLogger(__name__)
|
|
93
346
|
if not self.logger.handlers:
|
|
94
347
|
handler = logging.StreamHandler()
|
|
95
|
-
handler.setFormatter(logging.Formatter(
|
|
348
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
96
349
|
self.logger.addHandler(handler)
|
|
97
350
|
self.logger.setLevel(logging.INFO)
|
|
98
351
|
|
|
@@ -135,9 +388,7 @@ class PandasTool(BaseTool):
|
|
|
135
388
|
available_columns = set(df.columns)
|
|
136
389
|
missing = [col for col in columns if col not in available_columns]
|
|
137
390
|
if missing:
|
|
138
|
-
raise InputValidationError(
|
|
139
|
-
f"Columns not found: {missing}. Available columns: {list(available_columns)}"
|
|
140
|
-
)
|
|
391
|
+
raise InputValidationError(f"Columns not found: {missing}. Available columns: {list(available_columns)}")
|
|
141
392
|
|
|
142
393
|
def _to_json_serializable(self, result: Union[pd.DataFrame, pd.Series, Dict]) -> Union[List[Dict], Dict]:
|
|
143
394
|
"""
|
|
@@ -150,14 +401,15 @@ class PandasTool(BaseTool):
|
|
|
150
401
|
Union[List[Dict], Dict]: JSON-serializable result.
|
|
151
402
|
"""
|
|
152
403
|
if isinstance(result, pd.DataFrame):
|
|
153
|
-
for col in result.select_dtypes(include=[
|
|
154
|
-
result[col] = result[col].dt.strftime(
|
|
404
|
+
for col in result.select_dtypes(include=["datetime64"]).columns:
|
|
405
|
+
result[col] = result[col].dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
155
406
|
return result.to_dict(orient="records")
|
|
156
407
|
elif isinstance(result, pd.Series):
|
|
157
408
|
if pd.api.types.is_datetime64_any_dtype(result):
|
|
158
|
-
result = result.dt.strftime(
|
|
409
|
+
result = result.dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
159
410
|
return result.to_dict()
|
|
160
411
|
elif isinstance(result, dict):
|
|
412
|
+
|
|
161
413
|
def convert_value(v):
|
|
162
414
|
if isinstance(v, (np.floating, np.integer)):
|
|
163
415
|
return float(v)
|
|
@@ -170,6 +422,7 @@ class PandasTool(BaseTool):
|
|
|
170
422
|
elif pd.isna(v):
|
|
171
423
|
return None
|
|
172
424
|
return v
|
|
425
|
+
|
|
173
426
|
return {k: convert_value(v) for k, v in result.items()}
|
|
174
427
|
return result
|
|
175
428
|
|
|
@@ -182,13 +435,18 @@ class PandasTool(BaseTool):
|
|
|
182
435
|
StringIO(csv_str),
|
|
183
436
|
sep=self.config.csv_delimiter,
|
|
184
437
|
encoding=self.config.encoding,
|
|
185
|
-
chunksize=self.config.chunk_size
|
|
438
|
+
chunksize=self.config.chunk_size,
|
|
186
439
|
):
|
|
187
440
|
chunks.append(chunk)
|
|
188
441
|
df = pd.concat(chunks)
|
|
189
442
|
else:
|
|
190
|
-
df = pd.read_csv(
|
|
191
|
-
|
|
443
|
+
df = pd.read_csv(
|
|
444
|
+
StringIO(csv_str),
|
|
445
|
+
sep=self.config.csv_delimiter,
|
|
446
|
+
encoding=self.config.encoding,
|
|
447
|
+
)
|
|
448
|
+
result = self._to_json_serializable(df)
|
|
449
|
+
return cast(List[Dict], result)
|
|
192
450
|
except Exception as e:
|
|
193
451
|
raise DataFrameError(f"Failed to read CSV: {e}")
|
|
194
452
|
|
|
@@ -196,7 +454,8 @@ class PandasTool(BaseTool):
|
|
|
196
454
|
"""Read JSON string into a DataFrame."""
|
|
197
455
|
try:
|
|
198
456
|
df = pd.read_json(StringIO(json_str))
|
|
199
|
-
|
|
457
|
+
result = self._to_json_serializable(df)
|
|
458
|
+
return cast(List[Dict], result)
|
|
200
459
|
except Exception as e:
|
|
201
460
|
raise DataFrameError(f"Failed to read JSON: {e}")
|
|
202
461
|
|
|
@@ -204,26 +463,31 @@ class PandasTool(BaseTool):
|
|
|
204
463
|
"""Read data from a file (CSV, Excel, JSON)."""
|
|
205
464
|
try:
|
|
206
465
|
if file_type == "csv":
|
|
207
|
-
file_size = sum(1 for _ in open(file_path,
|
|
466
|
+
file_size = sum(1 for _ in open(file_path, "r", encoding=self.config.encoding))
|
|
208
467
|
if file_size > self.config.chunk_size:
|
|
209
468
|
chunks = []
|
|
210
469
|
for chunk in pd.read_csv(
|
|
211
470
|
file_path,
|
|
212
471
|
sep=self.config.csv_delimiter,
|
|
213
472
|
encoding=self.config.encoding,
|
|
214
|
-
chunksize=self.config.chunk_size
|
|
473
|
+
chunksize=self.config.chunk_size,
|
|
215
474
|
):
|
|
216
475
|
chunks.append(chunk)
|
|
217
476
|
df = pd.concat(chunks)
|
|
218
477
|
else:
|
|
219
|
-
df = pd.read_csv(
|
|
478
|
+
df = pd.read_csv(
|
|
479
|
+
file_path,
|
|
480
|
+
sep=self.config.csv_delimiter,
|
|
481
|
+
encoding=self.config.encoding,
|
|
482
|
+
)
|
|
220
483
|
elif file_type == "excel":
|
|
221
484
|
df = pd.read_excel(file_path)
|
|
222
485
|
elif file_type == "json":
|
|
223
486
|
df = pd.read_json(file_path)
|
|
224
487
|
else:
|
|
225
488
|
raise ValidationError(f"Unsupported file type: {file_type}")
|
|
226
|
-
|
|
489
|
+
result = self._to_json_serializable(df)
|
|
490
|
+
return cast(List[Dict], result)
|
|
227
491
|
except ValidationError:
|
|
228
492
|
raise
|
|
229
493
|
except Exception as e:
|
|
@@ -234,7 +498,12 @@ class PandasTool(BaseTool):
|
|
|
234
498
|
df = self._validate_df(records)
|
|
235
499
|
try:
|
|
236
500
|
if file_type == "csv":
|
|
237
|
-
df.to_csv(
|
|
501
|
+
df.to_csv(
|
|
502
|
+
file_path,
|
|
503
|
+
index=False,
|
|
504
|
+
sep=self.config.csv_delimiter,
|
|
505
|
+
encoding=self.config.encoding,
|
|
506
|
+
)
|
|
238
507
|
elif file_type == "excel":
|
|
239
508
|
df.to_excel(file_path, index=False)
|
|
240
509
|
elif file_type == "json":
|
|
@@ -249,7 +518,8 @@ class PandasTool(BaseTool):
|
|
|
249
518
|
"""Compute summary statistics for DataFrame."""
|
|
250
519
|
df = self._validate_df(records)
|
|
251
520
|
desc = df.describe(include="all").to_dict()
|
|
252
|
-
|
|
521
|
+
result = self._to_json_serializable(desc)
|
|
522
|
+
return cast(Dict, result)
|
|
253
523
|
|
|
254
524
|
def describe(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
|
|
255
525
|
"""Compute descriptive statistics for specified columns."""
|
|
@@ -258,21 +528,24 @@ class PandasTool(BaseTool):
|
|
|
258
528
|
self._validate_columns(df, columns)
|
|
259
529
|
df = df[columns]
|
|
260
530
|
desc = df.describe().to_dict()
|
|
261
|
-
|
|
531
|
+
result = self._to_json_serializable(desc)
|
|
532
|
+
return cast(Dict, result)
|
|
262
533
|
|
|
263
534
|
def value_counts(self, records: List[Dict], columns: List[str]) -> Dict:
|
|
264
535
|
"""Compute value counts for specified columns."""
|
|
265
536
|
df = self._validate_df(records)
|
|
266
537
|
self._validate_columns(df, columns)
|
|
267
538
|
result = {col: df[col].value_counts().to_dict() for col in columns}
|
|
268
|
-
|
|
539
|
+
converted = self._to_json_serializable(result)
|
|
540
|
+
return cast(Dict, converted)
|
|
269
541
|
|
|
270
542
|
def filter(self, records: List[Dict], condition: str) -> List[Dict]:
|
|
271
543
|
"""Filter DataFrame based on a condition."""
|
|
272
544
|
df = self._validate_df(records)
|
|
273
545
|
try:
|
|
274
546
|
df = df.query(condition, engine="python")
|
|
275
|
-
|
|
547
|
+
result = self._to_json_serializable(df)
|
|
548
|
+
return cast(List[Dict], result)
|
|
276
549
|
except Exception as e:
|
|
277
550
|
raise DataFrameError(f"Invalid query condition: {e}")
|
|
278
551
|
|
|
@@ -280,27 +553,31 @@ class PandasTool(BaseTool):
|
|
|
280
553
|
"""Select specified columns from DataFrame."""
|
|
281
554
|
df = self._validate_df(records)
|
|
282
555
|
self._validate_columns(df, columns)
|
|
283
|
-
|
|
556
|
+
result = self._to_json_serializable(df[columns])
|
|
557
|
+
return cast(List[Dict], result)
|
|
284
558
|
|
|
285
559
|
def drop_columns(self, records: List[Dict], columns: List[str]) -> List[Dict]:
|
|
286
560
|
"""Drop specified columns from DataFrame."""
|
|
287
561
|
df = self._validate_df(records)
|
|
288
562
|
self._validate_columns(df, columns)
|
|
289
|
-
|
|
563
|
+
result = self._to_json_serializable(df.drop(columns=columns))
|
|
564
|
+
return cast(List[Dict], result)
|
|
290
565
|
|
|
291
566
|
def drop_duplicates(self, records: List[Dict], columns: Optional[List[str]] = None) -> List[Dict]:
|
|
292
567
|
"""Drop duplicate rows based on specified columns."""
|
|
293
568
|
df = self._validate_df(records)
|
|
294
569
|
if columns:
|
|
295
570
|
self._validate_columns(df, columns)
|
|
296
|
-
|
|
571
|
+
result = self._to_json_serializable(df.drop_duplicates(subset=columns))
|
|
572
|
+
return cast(List[Dict], result)
|
|
297
573
|
|
|
298
574
|
def dropna(self, records: List[Dict], axis: int = 0, how: str = "any") -> List[Dict]:
|
|
299
575
|
"""Drop rows or columns with missing values."""
|
|
300
576
|
df = self._validate_df(records)
|
|
301
577
|
if how not in ["any", "all"]:
|
|
302
578
|
raise ValidationError("how must be 'any' or 'all'")
|
|
303
|
-
|
|
579
|
+
result = self._to_json_serializable(df.dropna(axis=axis, how=how))
|
|
580
|
+
return cast(List[Dict], result)
|
|
304
581
|
|
|
305
582
|
def groupby(self, records: List[Dict], by: List[str], agg: Dict[str, str]) -> List[Dict]:
|
|
306
583
|
"""Group DataFrame and apply aggregations."""
|
|
@@ -308,21 +585,42 @@ class PandasTool(BaseTool):
|
|
|
308
585
|
self._validate_columns(df, by + list(agg.keys()))
|
|
309
586
|
try:
|
|
310
587
|
df = df.groupby(by).agg(agg).reset_index()
|
|
311
|
-
|
|
588
|
+
result = self._to_json_serializable(df)
|
|
589
|
+
return cast(List[Dict], result)
|
|
312
590
|
except Exception as e:
|
|
313
591
|
raise DataFrameError(f"Groupby failed: {e}")
|
|
314
592
|
|
|
315
|
-
def pivot_table(
|
|
593
|
+
def pivot_table(
|
|
594
|
+
self,
|
|
595
|
+
records: List[Dict],
|
|
596
|
+
values: List[str],
|
|
597
|
+
index: List[str],
|
|
598
|
+
columns: List[str],
|
|
599
|
+
aggfunc: str = "mean",
|
|
600
|
+
) -> List[Dict]:
|
|
316
601
|
"""Create a pivot table from DataFrame."""
|
|
317
602
|
df = self._validate_df(records)
|
|
318
603
|
self._validate_columns(df, values + index + columns)
|
|
319
604
|
try:
|
|
320
|
-
df = pd.pivot_table(
|
|
321
|
-
|
|
605
|
+
df = pd.pivot_table(
|
|
606
|
+
df,
|
|
607
|
+
values=values,
|
|
608
|
+
index=index,
|
|
609
|
+
columns=columns,
|
|
610
|
+
aggfunc=aggfunc,
|
|
611
|
+
)
|
|
612
|
+
result = self._to_json_serializable(df.reset_index())
|
|
613
|
+
return cast(List[Dict], result)
|
|
322
614
|
except Exception as e:
|
|
323
615
|
raise DataFrameError(f"Pivot table failed: {e}")
|
|
324
616
|
|
|
325
|
-
def merge(
|
|
617
|
+
def merge(
|
|
618
|
+
self,
|
|
619
|
+
records: List[Dict],
|
|
620
|
+
records_right: List[Dict],
|
|
621
|
+
on: Union[str, List[str]],
|
|
622
|
+
join_type: str = "inner",
|
|
623
|
+
) -> List[Dict]:
|
|
326
624
|
"""Merge two DataFrames."""
|
|
327
625
|
df_left = self._validate_df(records)
|
|
328
626
|
df_right = self._validate_df(records_right)
|
|
@@ -332,7 +630,8 @@ class PandasTool(BaseTool):
|
|
|
332
630
|
self._validate_columns(df_right, [on] if isinstance(on, str) else on)
|
|
333
631
|
try:
|
|
334
632
|
df = df_left.merge(df_right, on=on, how=join_type)
|
|
335
|
-
|
|
633
|
+
result = self._to_json_serializable(df)
|
|
634
|
+
return cast(List[Dict], result)
|
|
336
635
|
except Exception as e:
|
|
337
636
|
raise DataFrameError(f"Merge failed: {e}")
|
|
338
637
|
|
|
@@ -343,17 +642,24 @@ class PandasTool(BaseTool):
|
|
|
343
642
|
dfs = [self._validate_df(records) for records in records_list]
|
|
344
643
|
try:
|
|
345
644
|
df = pd.concat(dfs, axis=axis, ignore_index=True)
|
|
346
|
-
|
|
645
|
+
result = self._to_json_serializable(df)
|
|
646
|
+
return cast(List[Dict], result)
|
|
347
647
|
except Exception as e:
|
|
348
648
|
raise DataFrameError(f"Concat failed: {e}")
|
|
349
649
|
|
|
350
|
-
def sort_values(
|
|
650
|
+
def sort_values(
|
|
651
|
+
self,
|
|
652
|
+
records: List[Dict],
|
|
653
|
+
sort_by: List[str],
|
|
654
|
+
ascending: Union[bool, List[bool]] = True,
|
|
655
|
+
) -> List[Dict]:
|
|
351
656
|
"""Sort DataFrame by specified columns."""
|
|
352
657
|
df = self._validate_df(records)
|
|
353
658
|
self._validate_columns(df, sort_by)
|
|
354
659
|
try:
|
|
355
660
|
df = df.sort_values(by=sort_by, ascending=ascending)
|
|
356
|
-
|
|
661
|
+
result = self._to_json_serializable(df)
|
|
662
|
+
return cast(List[Dict], result)
|
|
357
663
|
except Exception as e:
|
|
358
664
|
raise DataFrameError(f"Sort failed: {e}")
|
|
359
665
|
|
|
@@ -361,17 +667,29 @@ class PandasTool(BaseTool):
|
|
|
361
667
|
"""Rename DataFrame columns."""
|
|
362
668
|
df = self._validate_df(records)
|
|
363
669
|
self._validate_columns(df, list(mapping.keys()))
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
670
|
+
result = self._to_json_serializable(df.rename(columns=mapping))
|
|
671
|
+
return cast(List[Dict], result)
|
|
672
|
+
|
|
673
|
+
def replace_values(
|
|
674
|
+
self,
|
|
675
|
+
records: List[Dict],
|
|
676
|
+
to_replace: Dict,
|
|
677
|
+
columns: Optional[List[str]] = None,
|
|
678
|
+
) -> List[Dict]:
|
|
367
679
|
"""Replace values in DataFrame."""
|
|
368
680
|
df = self._validate_df(records)
|
|
369
681
|
if columns:
|
|
370
682
|
self._validate_columns(df, columns)
|
|
371
683
|
df = df[columns]
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
684
|
+
result = self._to_json_serializable(df.replace(to_replace))
|
|
685
|
+
return cast(List[Dict], result)
|
|
686
|
+
|
|
687
|
+
def fill_na(
|
|
688
|
+
self,
|
|
689
|
+
records: List[Dict],
|
|
690
|
+
value: Union[str, int, float],
|
|
691
|
+
columns: Optional[List[str]] = None,
|
|
692
|
+
) -> List[Dict]:
|
|
375
693
|
"""Fill missing values in DataFrame."""
|
|
376
694
|
df = self._validate_df(records)
|
|
377
695
|
if columns:
|
|
@@ -379,7 +697,8 @@ class PandasTool(BaseTool):
|
|
|
379
697
|
df[columns] = df[columns].fillna(value)
|
|
380
698
|
else:
|
|
381
699
|
df = df.fillna(value)
|
|
382
|
-
|
|
700
|
+
result = self._to_json_serializable(df)
|
|
701
|
+
return cast(List[Dict], result)
|
|
383
702
|
|
|
384
703
|
def astype(self, records: List[Dict], dtypes: Dict[str, str]) -> List[Dict]:
|
|
385
704
|
"""Convert column types in DataFrame."""
|
|
@@ -387,7 +706,8 @@ class PandasTool(BaseTool):
|
|
|
387
706
|
self._validate_columns(df, list(dtypes.keys()))
|
|
388
707
|
try:
|
|
389
708
|
df = df.astype(dtypes)
|
|
390
|
-
|
|
709
|
+
result = self._to_json_serializable(df)
|
|
710
|
+
return cast(List[Dict], result)
|
|
391
711
|
except Exception as e:
|
|
392
712
|
raise DataFrameError(f"Type conversion failed: {e}")
|
|
393
713
|
|
|
@@ -396,24 +716,24 @@ class PandasTool(BaseTool):
|
|
|
396
716
|
df = self._validate_df(records)
|
|
397
717
|
self._validate_columns(df, columns)
|
|
398
718
|
allowed_funcs = {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
719
|
+
"upper": lambda x: x.upper() if isinstance(x, str) else x,
|
|
720
|
+
"lower": lambda x: x.lower() if isinstance(x, str) else x,
|
|
721
|
+
"strip": lambda x: x.strip() if isinstance(x, str) else x,
|
|
722
|
+
"capitalize": lambda x: (x.capitalize() if isinstance(x, str) else x),
|
|
723
|
+
"title": lambda x: x.title() if isinstance(x, str) else x,
|
|
724
|
+
"len": lambda x: len(str(x)) if pd.notna(x) else 0,
|
|
725
|
+
"abs": lambda x: (abs(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
|
|
726
|
+
"round": lambda x: (round(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
|
|
727
|
+
"ceil": lambda x: (np.ceil(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
|
|
728
|
+
"floor": lambda x: (np.floor(float(x)) if pd.notna(x) and not isinstance(x, str) else x),
|
|
729
|
+
"int": lambda x: (int(float(x)) if pd.notna(x) and not isinstance(x, str) else None),
|
|
730
|
+
"float": lambda x: (float(x) if pd.notna(x) and not isinstance(x, str) else None),
|
|
731
|
+
"str": lambda x: str(x) if pd.notna(x) else "",
|
|
732
|
+
"bool": lambda x: bool(x) if pd.notna(x) else False,
|
|
733
|
+
"date_only": lambda x: (x.date() if isinstance(x, pd.Timestamp) else x),
|
|
734
|
+
"year": lambda x: x.year if isinstance(x, pd.Timestamp) else None,
|
|
735
|
+
"month": lambda x: (x.month if isinstance(x, pd.Timestamp) else None),
|
|
736
|
+
"day": lambda x: x.day if isinstance(x, pd.Timestamp) else None,
|
|
417
737
|
}
|
|
418
738
|
try:
|
|
419
739
|
if axis == 0:
|
|
@@ -421,7 +741,8 @@ class PandasTool(BaseTool):
|
|
|
421
741
|
df[col] = df[col].apply(allowed_funcs[func])
|
|
422
742
|
else:
|
|
423
743
|
df[columns] = df[columns].apply(allowed_funcs[func], axis=1)
|
|
424
|
-
|
|
744
|
+
result = self._to_json_serializable(df)
|
|
745
|
+
return cast(List[Dict], result)
|
|
425
746
|
except Exception as e:
|
|
426
747
|
raise DataFrameError(f"Apply failed: {e}")
|
|
427
748
|
|
|
@@ -431,7 +752,8 @@ class PandasTool(BaseTool):
|
|
|
431
752
|
self._validate_columns(df, id_vars + value_vars)
|
|
432
753
|
try:
|
|
433
754
|
df = pd.melt(df, id_vars=id_vars, value_vars=value_vars)
|
|
434
|
-
|
|
755
|
+
result = self._to_json_serializable(df)
|
|
756
|
+
return cast(List[Dict], result)
|
|
435
757
|
except Exception as e:
|
|
436
758
|
raise DataFrameError(f"Melt failed: {e}")
|
|
437
759
|
|
|
@@ -441,7 +763,11 @@ class PandasTool(BaseTool):
|
|
|
441
763
|
self._validate_columns(df, [index, columns, values])
|
|
442
764
|
try:
|
|
443
765
|
df = df.pivot(index=index, columns=columns, values=values)
|
|
444
|
-
|
|
766
|
+
result = self._to_json_serializable(df.reset_index())
|
|
767
|
+
# Ensure we return a list
|
|
768
|
+
if isinstance(result, dict):
|
|
769
|
+
return [result]
|
|
770
|
+
return result
|
|
445
771
|
except Exception as e:
|
|
446
772
|
raise DataFrameError(f"Pivot failed: {e}")
|
|
447
773
|
|
|
@@ -450,7 +776,8 @@ class PandasTool(BaseTool):
|
|
|
450
776
|
df = self._validate_df(records)
|
|
451
777
|
try:
|
|
452
778
|
df = df.stack().reset_index()
|
|
453
|
-
|
|
779
|
+
result = self._to_json_serializable(df)
|
|
780
|
+
return cast(List[Dict], result)
|
|
454
781
|
except Exception as e:
|
|
455
782
|
raise DataFrameError(f"Stack failed: {e}")
|
|
456
783
|
|
|
@@ -459,7 +786,8 @@ class PandasTool(BaseTool):
|
|
|
459
786
|
df = self._validate_df(records)
|
|
460
787
|
try:
|
|
461
788
|
df = df.unstack(level=level).reset_index()
|
|
462
|
-
|
|
789
|
+
result = self._to_json_serializable(df)
|
|
790
|
+
return cast(List[Dict], result)
|
|
463
791
|
except Exception as e:
|
|
464
792
|
raise DataFrameError(f"Unstack failed: {e}")
|
|
465
793
|
|
|
@@ -470,7 +798,8 @@ class PandasTool(BaseTool):
|
|
|
470
798
|
for col in columns:
|
|
471
799
|
if df[col].dtype == "object":
|
|
472
800
|
df[col] = df[col].str.strip()
|
|
473
|
-
|
|
801
|
+
result = self._to_json_serializable(df)
|
|
802
|
+
return cast(List[Dict], result)
|
|
474
803
|
|
|
475
804
|
def to_numeric(self, records: List[Dict], columns: List[str]) -> List[Dict]:
|
|
476
805
|
"""Convert columns to numeric type."""
|
|
@@ -479,18 +808,25 @@ class PandasTool(BaseTool):
|
|
|
479
808
|
try:
|
|
480
809
|
for col in columns:
|
|
481
810
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
482
|
-
|
|
811
|
+
result = self._to_json_serializable(df)
|
|
812
|
+
return cast(List[Dict], result)
|
|
483
813
|
except Exception as e:
|
|
484
814
|
raise DataFrameError(f"To numeric failed: {e}")
|
|
485
815
|
|
|
486
|
-
def to_datetime(
|
|
816
|
+
def to_datetime(
|
|
817
|
+
self,
|
|
818
|
+
records: List[Dict],
|
|
819
|
+
columns: List[str],
|
|
820
|
+
format: Optional[str] = None,
|
|
821
|
+
) -> List[Dict]:
|
|
487
822
|
"""Convert columns to datetime type."""
|
|
488
823
|
df = self._validate_df(records)
|
|
489
824
|
self._validate_columns(df, columns)
|
|
490
825
|
try:
|
|
491
826
|
for col in columns:
|
|
492
827
|
df[col] = pd.to_datetime(df[col], format=format, errors="coerce")
|
|
493
|
-
|
|
828
|
+
result = self._to_json_serializable(df)
|
|
829
|
+
return cast(List[Dict], result)
|
|
494
830
|
except Exception as e:
|
|
495
831
|
raise DataFrameError(f"To datetime failed: {e}")
|
|
496
832
|
|
|
@@ -500,7 +836,8 @@ class PandasTool(BaseTool):
|
|
|
500
836
|
if columns:
|
|
501
837
|
self._validate_columns(df, columns)
|
|
502
838
|
df = df[columns]
|
|
503
|
-
|
|
839
|
+
result = self._to_json_serializable(df.select_dtypes(include=np.number).mean())
|
|
840
|
+
return cast(Dict, result)
|
|
504
841
|
|
|
505
842
|
def sum(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
|
|
506
843
|
"""Compute sum of numeric columns."""
|
|
@@ -508,7 +845,8 @@ class PandasTool(BaseTool):
|
|
|
508
845
|
if columns:
|
|
509
846
|
self._validate_columns(df, columns)
|
|
510
847
|
df = df[columns]
|
|
511
|
-
|
|
848
|
+
result = self._to_json_serializable(df.select_dtypes(include=np.number).sum())
|
|
849
|
+
return cast(Dict, result)
|
|
512
850
|
|
|
513
851
|
def count(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
|
|
514
852
|
"""Compute count of non-null values."""
|
|
@@ -516,7 +854,8 @@ class PandasTool(BaseTool):
|
|
|
516
854
|
if columns:
|
|
517
855
|
self._validate_columns(df, columns)
|
|
518
856
|
df = df[columns]
|
|
519
|
-
|
|
857
|
+
result = self._to_json_serializable(df.count())
|
|
858
|
+
return cast(Dict, result)
|
|
520
859
|
|
|
521
860
|
def min(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
|
|
522
861
|
"""Compute minimum values."""
|
|
@@ -524,7 +863,8 @@ class PandasTool(BaseTool):
|
|
|
524
863
|
if columns:
|
|
525
864
|
self._validate_columns(df, columns)
|
|
526
865
|
df = df[columns]
|
|
527
|
-
|
|
866
|
+
result = self._to_json_serializable(df.min())
|
|
867
|
+
return cast(Dict, result)
|
|
528
868
|
|
|
529
869
|
def max(self, records: List[Dict], columns: Optional[List[str]] = None) -> Dict:
|
|
530
870
|
"""Compute maximum values."""
|
|
@@ -532,9 +872,16 @@ class PandasTool(BaseTool):
|
|
|
532
872
|
if columns:
|
|
533
873
|
self._validate_columns(df, columns)
|
|
534
874
|
df = df[columns]
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
875
|
+
result = self._to_json_serializable(df.max())
|
|
876
|
+
return cast(Dict, result)
|
|
877
|
+
|
|
878
|
+
def rolling(
|
|
879
|
+
self,
|
|
880
|
+
records: List[Dict],
|
|
881
|
+
columns: List[str],
|
|
882
|
+
window: int,
|
|
883
|
+
function: str = "mean",
|
|
884
|
+
) -> List[Dict]:
|
|
538
885
|
"""Apply rolling window function to columns."""
|
|
539
886
|
df = self._validate_df(records)
|
|
540
887
|
self._validate_columns(df, columns)
|
|
@@ -545,21 +892,30 @@ class PandasTool(BaseTool):
|
|
|
545
892
|
for col in columns:
|
|
546
893
|
if pd.api.types.is_numeric_dtype(df[col]):
|
|
547
894
|
df[f"{col}_{function}_{window}"] = getattr(df[col].rolling(window), function)()
|
|
548
|
-
|
|
895
|
+
result = self._to_json_serializable(df)
|
|
896
|
+
return cast(List[Dict], result)
|
|
549
897
|
except Exception as e:
|
|
550
898
|
raise DataFrameError(f"Rolling operation failed: {e}")
|
|
551
899
|
|
|
552
900
|
def head(self, records: List[Dict], n: int = 5) -> List[Dict]:
|
|
553
901
|
"""Return first n rows of DataFrame."""
|
|
554
902
|
df = self._validate_df(records)
|
|
555
|
-
|
|
903
|
+
result = self._to_json_serializable(df.head(n))
|
|
904
|
+
return cast(List[Dict], result)
|
|
556
905
|
|
|
557
906
|
def tail(self, records: List[Dict], n: int = 5) -> List[Dict]:
|
|
558
907
|
"""Return last n rows of DataFrame."""
|
|
559
908
|
df = self._validate_df(records)
|
|
560
|
-
|
|
909
|
+
result = self._to_json_serializable(df.tail(n))
|
|
910
|
+
return cast(List[Dict], result)
|
|
561
911
|
|
|
562
|
-
def sample(
|
|
912
|
+
def sample(
|
|
913
|
+
self,
|
|
914
|
+
records: List[Dict],
|
|
915
|
+
n: int = 5,
|
|
916
|
+
random_state: Optional[int] = None,
|
|
917
|
+
) -> List[Dict]:
|
|
563
918
|
"""Return random sample of n rows from DataFrame."""
|
|
564
919
|
df = self._validate_df(records)
|
|
565
|
-
|
|
920
|
+
result = self._to_json_serializable(df.sample(n=min(n, len(df)), random_state=random_state))
|
|
921
|
+
return cast(List[Dict], result)
|