aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,732 @@
1
+ import os
2
+ import json
3
+ import csv
4
+ import tempfile
5
+ import logging
6
+ from typing import Dict, Any, List, Optional, Union, Tuple
7
+ from enum import Enum
8
+
9
+ from pydantic import BaseModel, Field, field_validator, ConfigDict
10
+ import numpy as np
11
+ import pandas as pd
12
+ import matplotlib.pyplot as plt
13
+ import seaborn as sns
14
+
15
+ from aiecs.tools import register_tool
16
+ from aiecs.tools.base_tool import BaseTool
17
+ from aiecs.tools.tool_executor import measure_execution_time
18
+
19
+ # Enums for configuration options
20
+
21
+
22
+ class ExportFormat(str, Enum):
23
+ JSON = "json"
24
+ CSV = "csv"
25
+ HTML = "html"
26
+ EXCEL = "excel"
27
+ MARKDOWN = "markdown"
28
+
29
+
30
+ class VisualizationType(str, Enum):
31
+ HISTOGRAM = "histogram"
32
+ BOXPLOT = "boxplot"
33
+ SCATTER = "scatter"
34
+ BAR = "bar"
35
+ LINE = "line"
36
+ HEATMAP = "heatmap"
37
+ PAIR = "pair"
38
+
39
+
40
+ @register_tool("chart")
41
+ class ChartTool(BaseTool):
42
+ """Chart and visualization tool: creates charts and exports data in various formats."""
43
+
44
+ # Configuration schema
45
+ class Config(BaseModel):
46
+ """Configuration for the chart tool"""
47
+
48
+ model_config = ConfigDict(env_prefix="CHART_TOOL_")
49
+
50
+ export_dir: str = Field(
51
+ default=os.path.join(tempfile.gettempdir(), "chart_exports"),
52
+ description="Directory to export files to",
53
+ )
54
+ plot_dpi: int = Field(default=100, description="DPI for plot exports")
55
+ plot_figsize: Tuple[int, int] = Field(
56
+ default=(10, 6),
57
+ description="Default figure size (width, height) in inches",
58
+ )
59
+ allowed_extensions: List[str] = Field(
60
+ default=[
61
+ ".csv",
62
+ ".xlsx",
63
+ ".xls",
64
+ ".json",
65
+ ".parquet",
66
+ ".feather",
67
+ ".sav",
68
+ ".sas7bdat",
69
+ ".por",
70
+ ],
71
+ description="Allowed file extensions",
72
+ )
73
+
74
+ # Input schemas for operations
75
+ class ReadDataSchema(BaseModel):
76
+ """Schema for reading data files"""
77
+
78
+ file_path: str = Field(description="Path to the data file")
79
+ nrows: Optional[int] = Field(default=None, description="Number of rows to read")
80
+ sheet_name: Optional[Union[str, int]] = Field(
81
+ default=0, description="Sheet name or index for Excel files"
82
+ )
83
+ export_format: Optional[ExportFormat] = Field(
84
+ default=None, description="Format to export results in"
85
+ )
86
+ export_path: Optional[str] = Field(default=None, description="Path to export results to")
87
+
88
+ @field_validator("file_path")
89
+ @classmethod
90
+ def validate_file_path(cls, v):
91
+ if not os.path.isfile(v):
92
+ raise ValueError(f"File not found: {v}")
93
+ return v
94
+
95
+ @field_validator("export_path")
96
+ @classmethod
97
+ def validate_export_path(cls, v, info):
98
+ if v and "export_format" not in info.data:
99
+ raise ValueError("export_format must be specified when export_path is provided")
100
+ return v
101
+
102
+ class VisualizationSchema(BaseModel):
103
+ """Schema for data visualization"""
104
+
105
+ file_path: str = Field(description="Path to the data file")
106
+ plot_type: VisualizationType = Field(description="Type of visualization to create")
107
+ x: Optional[str] = Field(default=None, description="Column to use for x-axis")
108
+ y: Optional[str] = Field(default=None, description="Column to use for y-axis")
109
+ hue: Optional[str] = Field(default=None, description="Column to use for color encoding")
110
+ variables: Optional[List[str]] = Field(
111
+ default=None,
112
+ description="List of variables to include in the visualization",
113
+ )
114
+ title: Optional[str] = Field(default=None, description="Title for the visualization")
115
+ figsize: Optional[Tuple[int, int]] = Field(
116
+ default=None, description="Figure size (width, height) in inches"
117
+ )
118
+ output_path: Optional[str] = Field(
119
+ default=None, description="Path to save the visualization"
120
+ )
121
+ dpi: Optional[int] = Field(default=None, description="DPI for the visualization")
122
+ export_format: Optional[ExportFormat] = Field(
123
+ default=None, description="Format to export results in"
124
+ )
125
+ export_path: Optional[str] = Field(default=None, description="Path to export results to")
126
+
127
+ @field_validator("file_path")
128
+ @classmethod
129
+ def validate_file_path(cls, v):
130
+ if not os.path.isfile(v):
131
+ raise ValueError(f"File not found: {v}")
132
+ return v
133
+
134
+ @field_validator("export_path")
135
+ @classmethod
136
+ def validate_export_path(cls, v, info):
137
+ if v and "export_format" not in info.data:
138
+ raise ValueError("export_format must be specified when export_path is provided")
139
+ return v
140
+
141
+ class ExportDataSchema(BaseModel):
142
+ """Schema for exporting data"""
143
+
144
+ file_path: str = Field(description="Path to the data file")
145
+ variables: Optional[List[str]] = Field(
146
+ default=None,
147
+ description="List of variables to include in the export",
148
+ )
149
+ format: ExportFormat = Field(description="Format to export data in")
150
+ export_path: Optional[str] = Field(
151
+ default=None, description="Path to save the exported data"
152
+ )
153
+ export_format: Optional[ExportFormat] = Field(
154
+ default=None, description="Format to export results in"
155
+ )
156
+
157
+ @field_validator("file_path")
158
+ @classmethod
159
+ def validate_file_path(cls, v):
160
+ if not os.path.isfile(v):
161
+ raise ValueError(f"File not found: {v}")
162
+ return v
163
+
164
+ @field_validator("export_path")
165
+ @classmethod
166
+ def validate_export_path(cls, v, info):
167
+ if v and "export_format" not in info.data:
168
+ raise ValueError("export_format must be specified when export_path is provided")
169
+ return v
170
+
171
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
172
+ """
173
+ Initialize the chart tool
174
+
175
+ Args:
176
+ config: Optional configuration for the tool
177
+ """
178
+ super().__init__(config)
179
+
180
+ # Parse configuration
181
+ self.config = self.Config(**(config or {}))
182
+
183
+ # Create export directory if it doesn't exist
184
+ os.makedirs(self.config.export_dir, exist_ok=True)
185
+
186
+ # Set up logger
187
+ self.logger = logging.getLogger(__name__)
188
+
189
+ # Set default matplotlib style
190
+ plt.style.use("seaborn-v0_8-whitegrid")
191
+
192
+ def _load_data(
193
+ self,
194
+ file_path: str,
195
+ nrows: Optional[int] = None,
196
+ sheet_name: Optional[Union[str, int]] = 0,
197
+ ) -> pd.DataFrame:
198
+ """
199
+ Load data from various file formats into a pandas DataFrame
200
+
201
+ Args:
202
+ file_path: Path to the data file
203
+ nrows: Number of rows to read
204
+ sheet_name: Sheet name or index for Excel files
205
+
206
+ Returns:
207
+ Loaded DataFrame
208
+ """
209
+ # Determine file type and read accordingly
210
+ ext = os.path.splitext(file_path)[1].lower()
211
+
212
+ try:
213
+ if ext == ".sav":
214
+ import pyreadstat
215
+
216
+ df, meta = pyreadstat.read_sav(file_path)
217
+ return df
218
+ elif ext == ".sas7bdat":
219
+ import pyreadstat
220
+
221
+ df, meta = pyreadstat.read_sas7bdat(file_path)
222
+ return df
223
+ elif ext == ".por":
224
+ import pyreadstat
225
+
226
+ df, meta = pyreadstat.read_por(file_path)
227
+ return df
228
+ elif ext == ".csv":
229
+ return pd.read_csv(file_path, nrows=nrows)
230
+ elif ext in [".xlsx", ".xls"]:
231
+ return pd.read_excel(file_path, sheet_name=sheet_name, nrows=nrows)
232
+ elif ext == ".json":
233
+ return pd.read_json(file_path)
234
+ elif ext == ".parquet":
235
+ return pd.read_parquet(file_path)
236
+ elif ext == ".feather":
237
+ return pd.read_feather(file_path)
238
+ else:
239
+ raise ValueError(f"Unsupported file format: {ext}")
240
+ except Exception as e:
241
+ raise ValueError(f"Error reading file {file_path}: {str(e)}")
242
+
243
+ def _export_result(self, result: Dict[str, Any], path: str, format: ExportFormat) -> None:
244
+ """
245
+ Export results to the specified format
246
+
247
+ Args:
248
+ result: Result to export
249
+ path: Path to save the exported result
250
+ format: Format to export in
251
+ """
252
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
253
+
254
+ try:
255
+ if format == ExportFormat.JSON:
256
+ # Convert numpy types to Python native types
257
+ def json_serialize(obj):
258
+ if isinstance(obj, (np.integer, np.int64)):
259
+ return int(obj)
260
+ elif isinstance(obj, (np.floating, np.float64)):
261
+ return float(obj)
262
+ elif isinstance(obj, np.ndarray):
263
+ return obj.tolist()
264
+ elif isinstance(obj, pd.DataFrame):
265
+ return obj.to_dict(orient="records")
266
+ return str(obj)
267
+
268
+ with open(path, "w") as f:
269
+ json.dump(result, f, default=json_serialize, indent=2)
270
+
271
+ elif format == ExportFormat.CSV:
272
+ # Find the first dict or DataFrame in the result
273
+ data_to_export = None
274
+ for key, value in result.items():
275
+ if isinstance(value, dict) and value:
276
+ data_to_export = pd.DataFrame(value)
277
+ break
278
+ elif isinstance(value, pd.DataFrame):
279
+ data_to_export = value
280
+ break
281
+
282
+ if data_to_export is not None:
283
+ data_to_export.to_csv(path, index=False)
284
+ else:
285
+ # Fallback: convert the entire result to a flat structure
286
+ flat_data = {}
287
+ for k, v in result.items():
288
+ if not isinstance(v, (dict, list, pd.DataFrame)):
289
+ flat_data[k] = v
290
+
291
+ with open(path, "w", newline="") as f:
292
+ writer = csv.writer(f)
293
+ writer.writerow(flat_data.keys())
294
+ writer.writerow(flat_data.values())
295
+
296
+ elif format == ExportFormat.HTML:
297
+ # Convert to HTML table
298
+ html_content = "<html><body><h1>Chart Results</h1>"
299
+ for key, value in result.items():
300
+ html_content += f"<h2>{key}</h2>"
301
+ if isinstance(value, pd.DataFrame):
302
+ html_content += value.to_html()
303
+ elif isinstance(value, dict):
304
+ html_content += (
305
+ "<table border='1'><tr><th>Parameter</th><th>Value</th></tr>"
306
+ )
307
+ for k, v in value.items():
308
+ html_content += f"<tr><td>{k}</td><td>{v}</td></tr>"
309
+ html_content += "</table>"
310
+ else:
311
+ html_content += f"<p>{value}</p>"
312
+ html_content += "</body></html>"
313
+
314
+ with open(path, "w") as f:
315
+ f.write(html_content)
316
+
317
+ elif format == ExportFormat.EXCEL:
318
+ with pd.ExcelWriter(path) as writer:
319
+ for key, value in result.items():
320
+ if isinstance(value, pd.DataFrame):
321
+ # Excel sheet names limited to 31 chars
322
+ value.to_excel(writer, sheet_name=key[:31])
323
+ elif isinstance(value, dict):
324
+ pd.DataFrame(value, index=[0]).to_excel(writer, sheet_name=key[:31])
325
+ else:
326
+ pd.DataFrame({key: [value]}).to_excel(writer, sheet_name="Summary")
327
+
328
+ elif format == ExportFormat.MARKDOWN:
329
+ with open(path, "w") as f:
330
+ f.write("# Chart Results\n\n")
331
+ for key, value in result.items():
332
+ f.write(f"## {key}\n\n")
333
+ if isinstance(value, pd.DataFrame):
334
+ f.write(value.to_markdown())
335
+ elif isinstance(value, dict):
336
+ f.write("| Parameter | Value |\n|-----------|-------|\n")
337
+ for k, v in value.items():
338
+ f.write(f"| {k} | {v} |\n")
339
+ else:
340
+ f.write(f"{value}\n\n")
341
+
342
+ return path
343
+ except Exception as e:
344
+ raise ValueError(f"Error exporting to {format}: {str(e)}")
345
+
346
+ def _create_visualization(
347
+ self,
348
+ df: pd.DataFrame,
349
+ plot_type: VisualizationType,
350
+ x: Optional[str] = None,
351
+ y: Optional[str] = None,
352
+ hue: Optional[str] = None,
353
+ variables: Optional[List[str]] = None,
354
+ title: Optional[str] = None,
355
+ figsize: Optional[Tuple[int, int]] = None,
356
+ output_path: Optional[str] = None,
357
+ dpi: Optional[int] = None,
358
+ ) -> str:
359
+ """
360
+ Create a visualization based on the parameters and return the path to the saved image
361
+
362
+ Args:
363
+ df: DataFrame to visualize
364
+ plot_type: Type of visualization to create
365
+ x: Column to use for x-axis
366
+ y: Column to use for y-axis
367
+ hue: Column to use for color encoding
368
+ variables: List of variables to include in the visualization
369
+ title: Title for the visualization
370
+ figsize: Figure size (width, height) in inches
371
+ output_path: Path to save the visualization
372
+ dpi: DPI for the visualization
373
+
374
+ Returns:
375
+ Path to the saved visualization
376
+ """
377
+ if not output_path:
378
+ output_path = os.path.join(self.config.export_dir, f"plot_{os.urandom(4).hex()}.png")
379
+ elif not os.path.isabs(output_path):
380
+ output_path = os.path.join(self.config.export_dir, output_path)
381
+
382
+ os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
383
+
384
+ try:
385
+ figsize = figsize or self.config.plot_figsize
386
+ dpi = dpi or self.config.plot_dpi
387
+
388
+ plt.figure(figsize=figsize)
389
+
390
+ if plot_type == VisualizationType.HISTOGRAM:
391
+ if variables:
392
+ for var in variables:
393
+ sns.histplot(data=df, x=var, kde=True, label=var)
394
+ plt.legend()
395
+ else:
396
+ sns.histplot(data=df, x=x, hue=hue)
397
+
398
+ elif plot_type == VisualizationType.BOXPLOT:
399
+ sns.boxplot(data=df, x=x, y=y, hue=hue)
400
+
401
+ elif plot_type == VisualizationType.SCATTER:
402
+ sns.scatterplot(data=df, x=x, y=y, hue=hue)
403
+
404
+ elif plot_type == VisualizationType.BAR:
405
+ sns.barplot(data=df, x=x, y=y, hue=hue)
406
+
407
+ elif plot_type == VisualizationType.LINE:
408
+ sns.lineplot(data=df, x=x, y=y, hue=hue)
409
+
410
+ elif plot_type == VisualizationType.HEATMAP:
411
+ if variables:
412
+ corr = df[variables].corr()
413
+ else:
414
+ corr = df.corr()
415
+ sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
416
+
417
+ elif plot_type == VisualizationType.PAIR:
418
+ if variables:
419
+ plot_vars = variables + [hue] if hue else variables
420
+ sns.pairplot(df[plot_vars], hue=hue)
421
+ else:
422
+ sns.pairplot(df, hue=hue)
423
+
424
+ if title:
425
+ plt.title(title)
426
+
427
+ plt.tight_layout()
428
+ plt.savefig(output_path, dpi=dpi)
429
+ plt.close()
430
+
431
+ return output_path
432
+ except Exception as e:
433
+ raise ValueError(f"Error creating visualization: {str(e)}")
434
+
435
+ def _validate_variables(self, df: pd.DataFrame, variables: List[str]) -> None:
436
+ """
437
+ Validate that variables exist in the DataFrame
438
+
439
+ Args:
440
+ df: DataFrame to check
441
+ variables: List of variables to validate
442
+
443
+ Raises:
444
+ ValueError: If any variables are not found in the DataFrame
445
+ """
446
+ if not variables:
447
+ return
448
+
449
+ available_columns = set(df.columns)
450
+ missing = [col for col in variables if col not in available_columns]
451
+ if missing:
452
+ raise ValueError(
453
+ f"Variables not found in dataset: {', '.join(missing)}. Available columns: {list(available_columns)}"
454
+ )
455
+
456
+ def _to_json_serializable(
457
+ self, result: Union[pd.DataFrame, pd.Series, Dict]
458
+ ) -> Union[List[Dict], Dict]:
459
+ """
460
+ Convert result to JSON serializable format
461
+
462
+ Args:
463
+ result: Result to convert
464
+
465
+ Returns:
466
+ JSON serializable result
467
+ """
468
+ if isinstance(result, pd.DataFrame):
469
+ # Handle datetime columns
470
+ for col in result.select_dtypes(include=["datetime64"]).columns:
471
+ result[col] = result[col].dt.strftime("%Y-%m-%d %H:%M:%S")
472
+ return result.to_dict(orient="records")
473
+ elif isinstance(result, pd.Series):
474
+ if pd.api.types.is_datetime64_any_dtype(result):
475
+ result = result.dt.strftime("%Y-%m-%d %H:%M:%S")
476
+ return result.to_dict()
477
+ elif isinstance(result, dict):
478
+ # Handle numpy types and datetime objects
479
+ def convert_value(v):
480
+ if isinstance(v, (np.floating, np.integer)):
481
+ return float(v)
482
+ elif isinstance(v, np.bool_):
483
+ return bool(v)
484
+ elif isinstance(v, (pd.Timestamp, np.datetime64)):
485
+ return str(v)
486
+ elif isinstance(v, np.ndarray):
487
+ return v.tolist()
488
+ elif pd.isna(v):
489
+ return None
490
+ return v
491
+
492
+ return {k: convert_value(v) for k, v in result.items()}
493
+ return result
494
+
495
+ @measure_execution_time
496
+ def read_data(
497
+ self,
498
+ file_path: str,
499
+ nrows: Optional[int] = None,
500
+ sheet_name: Optional[Union[str, int]] = 0,
501
+ export_format: Optional[ExportFormat] = None,
502
+ export_path: Optional[str] = None,
503
+ ) -> Dict[str, Any]:
504
+ """
505
+ Read data from various file formats
506
+
507
+ Args:
508
+ file_path: Path to the data file
509
+ nrows: Number of rows to read
510
+ sheet_name: Sheet name or index for Excel files
511
+ export_format: Format to export results in
512
+ export_path: Path to export results to
513
+
514
+ Returns:
515
+ Dictionary with data summary
516
+ """
517
+ # Validate file path
518
+ if not os.path.isfile(file_path):
519
+ raise ValueError(f"File not found: {file_path}")
520
+
521
+ # Check file extension
522
+ ext = os.path.splitext(file_path)[1].lower()
523
+ if ext not in self.config.allowed_extensions:
524
+ raise ValueError(
525
+ f"Extension '{ext}' not allowed. Supported formats: {', '.join(self.config.allowed_extensions)}"
526
+ )
527
+
528
+ # Load data
529
+ df = self._load_data(file_path, nrows, sheet_name)
530
+
531
+ # Create result
532
+ result = {
533
+ "variables": df.columns.tolist(),
534
+ "observations": len(df),
535
+ "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
536
+ # MB
537
+ "memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024),
538
+ "preview": df.head(5).to_dict(orient="records"),
539
+ }
540
+
541
+ # Handle export if requested
542
+ if export_format and export_path:
543
+ if not os.path.isabs(export_path):
544
+ export_path = os.path.join(self.config.export_dir, export_path)
545
+
546
+ self._export_result(result, export_path, export_format)
547
+ result["exported_to"] = export_path
548
+
549
+ return result
550
+
551
+ @measure_execution_time
552
+ def visualize(
553
+ self,
554
+ file_path: str,
555
+ plot_type: VisualizationType,
556
+ x: Optional[str] = None,
557
+ y: Optional[str] = None,
558
+ hue: Optional[str] = None,
559
+ variables: Optional[List[str]] = None,
560
+ title: Optional[str] = None,
561
+ figsize: Optional[Tuple[int, int]] = None,
562
+ output_path: Optional[str] = None,
563
+ dpi: Optional[int] = None,
564
+ export_format: Optional[ExportFormat] = None,
565
+ export_path: Optional[str] = None,
566
+ ) -> Dict[str, Any]:
567
+ """
568
+ Create data visualizations
569
+
570
+ Args:
571
+ file_path: Path to the data file
572
+ plot_type: Type of visualization to create
573
+ x: Column to use for x-axis
574
+ y: Column to use for y-axis
575
+ hue: Column to use for color encoding
576
+ variables: List of variables to include in the visualization
577
+ title: Title for the visualization
578
+ figsize: Figure size (width, height) in inches
579
+ output_path: Path to save the visualization
580
+ dpi: DPI for the visualization
581
+ export_format: Format to export results in
582
+ export_path: Path to export results to
583
+
584
+ Returns:
585
+ Dictionary with visualization details
586
+ """
587
+ # Validate file path
588
+ if not os.path.isfile(file_path):
589
+ raise ValueError(f"File not found: {file_path}")
590
+
591
+ # Check file extension
592
+ ext = os.path.splitext(file_path)[1].lower()
593
+ if ext not in self.config.allowed_extensions:
594
+ raise ValueError(
595
+ f"Extension '{ext}' not allowed. Supported formats: {', '.join(self.config.allowed_extensions)}"
596
+ )
597
+
598
+ # Load data
599
+ df = self._load_data(file_path)
600
+
601
+ # Validate variables
602
+ vars_to_check = []
603
+ if variables:
604
+ vars_to_check.extend(variables)
605
+ if x:
606
+ vars_to_check.append(x)
607
+ if y:
608
+ vars_to_check.append(y)
609
+ if hue:
610
+ vars_to_check.append(hue)
611
+
612
+ self._validate_variables(df, vars_to_check)
613
+
614
+ # Create visualization
615
+ output_path = self._create_visualization(
616
+ df,
617
+ plot_type,
618
+ x,
619
+ y,
620
+ hue,
621
+ variables,
622
+ title,
623
+ figsize,
624
+ output_path,
625
+ dpi,
626
+ )
627
+
628
+ # Create result
629
+ result = {
630
+ "plot_type": plot_type,
631
+ "output_path": output_path,
632
+ "variables": variables or [x, y, hue],
633
+ "title": title or f"{plot_type.capitalize()} Plot",
634
+ }
635
+
636
+ # Handle export if requested
637
+ if export_format and export_path:
638
+ if not os.path.isabs(export_path):
639
+ export_path = os.path.join(self.config.export_dir, export_path)
640
+
641
+ self._export_result(result, export_path, export_format)
642
+ result["exported_to"] = export_path
643
+
644
+ return result
645
+
646
+ @measure_execution_time
647
+ def export_data(
648
+ self,
649
+ file_path: str,
650
+ format: ExportFormat,
651
+ variables: Optional[List[str]] = None,
652
+ export_path: Optional[str] = None,
653
+ export_format: Optional[ExportFormat] = None,
654
+ ) -> Dict[str, Any]:
655
+ """
656
+ Export data to various formats
657
+
658
+ Args:
659
+ file_path: Path to the data file
660
+ format: Format to export data in
661
+ variables: List of variables to include in the export
662
+ export_path: Path to save the exported data
663
+ export_format: Format to export results in
664
+
665
+ Returns:
666
+ Dictionary with export details
667
+ """
668
+ # Validate file path
669
+ if not os.path.isfile(file_path):
670
+ raise ValueError(f"File not found: {file_path}")
671
+
672
+ # Check file extension
673
+ ext = os.path.splitext(file_path)[1].lower()
674
+ if ext not in self.config.allowed_extensions:
675
+ raise ValueError(
676
+ f"Extension '{ext}' not allowed. Supported formats: {', '.join(self.config.allowed_extensions)}"
677
+ )
678
+
679
+ # Load data
680
+ df = self._load_data(file_path)
681
+
682
+ # Validate variables
683
+ if variables:
684
+ self._validate_variables(df, variables)
685
+ df = df[variables]
686
+
687
+ # Determine export path
688
+ if not export_path:
689
+ ext = "." + format.value
690
+ if format == ExportFormat.EXCEL:
691
+ ext = ".xlsx"
692
+ export_path = os.path.join(self.config.export_dir, f"export_{os.urandom(4).hex()}{ext}")
693
+ elif not os.path.isabs(export_path):
694
+ export_path = os.path.join(self.config.export_dir, export_path)
695
+
696
+ # Create export directory if it doesn't exist
697
+ os.makedirs(os.path.dirname(os.path.abspath(export_path)), exist_ok=True)
698
+
699
+ # Export data
700
+ try:
701
+ if format == ExportFormat.JSON:
702
+ df.to_json(export_path, orient="records", indent=2)
703
+ elif format == ExportFormat.CSV:
704
+ df.to_csv(export_path, index=False)
705
+ elif format == ExportFormat.HTML:
706
+ df.to_html(export_path)
707
+ elif format == ExportFormat.EXCEL:
708
+ df.to_excel(export_path, index=False)
709
+ elif format == ExportFormat.MARKDOWN:
710
+ with open(export_path, "w") as f:
711
+ f.write(df.to_markdown())
712
+ except Exception as e:
713
+ raise ValueError(f"Error exporting to {format}: {str(e)}")
714
+
715
+ # Create result
716
+ result = {
717
+ "format": format,
718
+ "path": export_path,
719
+ "rows": len(df),
720
+ "columns": len(df.columns),
721
+ "variables": df.columns.tolist(),
722
+ }
723
+
724
+ # Handle export if requested
725
+ if export_format and export_path:
726
+ if not os.path.isabs(export_path):
727
+ export_path = os.path.join(self.config.export_dir, export_path)
728
+
729
+ self._export_result(result, export_path, export_format)
730
+ result["exported_to"] = export_path
731
+
732
+ return result