aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,9 @@
1
+ """
2
+ Utils Module
3
+
4
+ Contains shared validation and utility functions.
5
+ """
6
+
7
+ from aiecs.tools.apisource.utils.validators import DataValidator
8
+
9
+ __all__ = ["DataValidator"]
@@ -0,0 +1,338 @@
1
+ """
2
+ Shared Validation Utilities for API Providers
3
+
4
+ Common validation functions for data quality assessment:
5
+ - Detect outliers in numeric data
6
+ - Find gaps in time series
7
+ - Check data completeness
8
+ - Validate data types and ranges
9
+ """
10
+
11
+ import logging
12
+ from datetime import datetime
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class DataValidator:
19
+ """
20
+ Provides common data validation methods for API providers.
21
+ """
22
+
23
+ @staticmethod
24
+ def detect_outliers(
25
+ values: List[float], method: str = "iqr", threshold: float = 1.5
26
+ ) -> List[int]:
27
+ """
28
+ Detect outliers in numeric data.
29
+
30
+ Args:
31
+ values: List of numeric values
32
+ method: Detection method ('iqr' or 'zscore')
33
+ threshold: Threshold for outlier detection
34
+ - For IQR: typically 1.5 or 3.0
35
+ - For Z-score: typically 2.0 or 3.0
36
+
37
+ Returns:
38
+ List of indices where outliers were detected
39
+ """
40
+ if not values or len(values) < 4:
41
+ return []
42
+
43
+ outlier_indices = []
44
+
45
+ if method == "iqr":
46
+ # Interquartile Range method
47
+ sorted_values = sorted(values)
48
+ n = len(sorted_values)
49
+
50
+ q1_idx = n // 4
51
+ q3_idx = 3 * n // 4
52
+
53
+ q1 = sorted_values[q1_idx]
54
+ q3 = sorted_values[q3_idx]
55
+ iqr = q3 - q1
56
+
57
+ lower_bound = q1 - threshold * iqr
58
+ upper_bound = q3 + threshold * iqr
59
+
60
+ for i, value in enumerate(values):
61
+ if value < lower_bound or value > upper_bound:
62
+ outlier_indices.append(i)
63
+
64
+ elif method == "zscore":
65
+ # Z-score method
66
+ mean = sum(values) / len(values)
67
+ variance = sum((x - mean) ** 2 for x in values) / len(values)
68
+ std_dev = variance**0.5
69
+
70
+ if std_dev == 0:
71
+ return []
72
+
73
+ for i, value in enumerate(values):
74
+ z_score = abs((value - mean) / std_dev)
75
+ if z_score > threshold:
76
+ outlier_indices.append(i)
77
+
78
+ return outlier_indices
79
+
80
+ @staticmethod
81
+ def detect_time_gaps(
82
+ data: List[Dict[str, Any]],
83
+ date_field: str = "date",
84
+ expected_frequency: Optional[str] = None,
85
+ ) -> List[Dict[str, Any]]:
86
+ """
87
+ Detect gaps in time series data.
88
+
89
+ Args:
90
+ data: List of data items with date fields
91
+ date_field: Name of the date field
92
+ expected_frequency: Expected frequency ('daily', 'weekly', 'monthly', 'quarterly', 'annual')
93
+
94
+ Returns:
95
+ List of gap information dictionaries
96
+ """
97
+ if len(data) < 2:
98
+ return []
99
+
100
+ gaps = []
101
+
102
+ # Parse dates
103
+ dates = []
104
+ for i, item in enumerate(data):
105
+ if date_field in item:
106
+ try:
107
+ date_str = str(item[date_field])
108
+ if "T" in date_str:
109
+ date_obj = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
110
+ else:
111
+ date_obj = datetime.strptime(date_str[:10], "%Y-%m-%d")
112
+ dates.append((i, date_obj))
113
+ except (ValueError, TypeError):
114
+ continue
115
+
116
+ if len(dates) < 2:
117
+ return []
118
+
119
+ # Sort by date
120
+ dates.sort(key=lambda x: x[1])
121
+
122
+ # Determine expected gap if not specified
123
+ if expected_frequency is None:
124
+ # Estimate from first few intervals
125
+ if len(dates) >= 3:
126
+ intervals = [
127
+ (dates[i + 1][1] - dates[i][1]).days for i in range(min(3, len(dates) - 1))
128
+ ]
129
+ avg_interval = sum(intervals) / len(intervals)
130
+
131
+ if avg_interval <= 2:
132
+ expected_frequency = "daily"
133
+ elif avg_interval <= 10:
134
+ expected_frequency = "weekly"
135
+ elif avg_interval <= 40:
136
+ expected_frequency = "monthly"
137
+ elif avg_interval <= 120:
138
+ expected_frequency = "quarterly"
139
+ else:
140
+ expected_frequency = "annual"
141
+
142
+ # Define expected gaps in days
143
+ frequency_gaps = {
144
+ "daily": 1,
145
+ "weekly": 7,
146
+ "monthly": 31,
147
+ "quarterly": 92,
148
+ "annual": 365,
149
+ }
150
+
151
+ expected_gap_days = frequency_gaps.get(expected_frequency, 31)
152
+ tolerance = expected_gap_days * 0.5 # 50% tolerance
153
+
154
+ # Check for gaps
155
+ for i in range(len(dates) - 1):
156
+ idx1, date1 = dates[i]
157
+ idx2, date2 = dates[i + 1]
158
+
159
+ gap_days = (date2 - date1).days
160
+
161
+ if gap_days > expected_gap_days + tolerance:
162
+ gaps.append(
163
+ {
164
+ "start_index": idx1,
165
+ "end_index": idx2,
166
+ "start_date": date1.isoformat(),
167
+ "end_date": date2.isoformat(),
168
+ "gap_days": gap_days,
169
+ "expected_days": expected_gap_days,
170
+ }
171
+ )
172
+
173
+ return gaps
174
+
175
+ @staticmethod
176
+ def check_data_completeness(
177
+ data: List[Dict[str, Any]],
178
+ value_field: str = "value",
179
+ missing_indicators: Optional[List[Any]] = None,
180
+ ) -> Dict[str, Any]:
181
+ """
182
+ Check completeness of data.
183
+
184
+ Args:
185
+ data: List of data items
186
+ value_field: Name of the value field to check
187
+ missing_indicators: Values that indicate missing data (e.g., ['.', None, 'NA'])
188
+
189
+ Returns:
190
+ Completeness statistics dictionary
191
+ """
192
+ if missing_indicators is None:
193
+ missing_indicators = [".", None, "NA", "N/A", "", "null"]
194
+
195
+ total_records = len(data)
196
+ if total_records == 0:
197
+ return {
198
+ "total_records": 0,
199
+ "missing_count": 0,
200
+ "completeness": 1.0,
201
+ "missing_indices": [],
202
+ }
203
+
204
+ missing_count = 0
205
+ missing_indices = []
206
+
207
+ for i, item in enumerate(data):
208
+ if value_field in item:
209
+ value = item[value_field]
210
+ # Check if value is missing
211
+ if value in missing_indicators:
212
+ missing_count += 1
213
+ missing_indices.append(i)
214
+ elif isinstance(value, str) and value.strip() in missing_indicators:
215
+ missing_count += 1
216
+ missing_indices.append(i)
217
+ else:
218
+ # Field doesn't exist
219
+ missing_count += 1
220
+ missing_indices.append(i)
221
+
222
+ completeness = (total_records - missing_count) / total_records
223
+
224
+ return {
225
+ "total_records": total_records,
226
+ "missing_count": missing_count,
227
+ "present_count": total_records - missing_count,
228
+ "completeness": round(completeness, 4),
229
+ "missing_indices": missing_indices[:10], # Limit to first 10
230
+ }
231
+
232
+ @staticmethod
233
+ def calculate_value_range(
234
+ data: List[Dict[str, Any]],
235
+ value_field: str = "value",
236
+ missing_indicators: Optional[List[Any]] = None,
237
+ ) -> Optional[Dict[str, float]]:
238
+ """
239
+ Calculate min, max, mean of numeric values.
240
+
241
+ Args:
242
+ data: List of data items
243
+ value_field: Name of the value field
244
+ missing_indicators: Values to skip
245
+
246
+ Returns:
247
+ Dictionary with min, max, mean, or None if no valid data
248
+ """
249
+ if missing_indicators is None:
250
+ missing_indicators = [".", None, "NA", "N/A", "", "null"]
251
+
252
+ numeric_values = []
253
+
254
+ for item in data:
255
+ if value_field in item:
256
+ value = item[value_field]
257
+
258
+ # Skip missing indicators
259
+ if value in missing_indicators:
260
+ continue
261
+
262
+ # Try to convert to float
263
+ try:
264
+ if isinstance(value, (int, float)):
265
+ numeric_values.append(float(value))
266
+ elif isinstance(value, str):
267
+ # Clean string (remove commas, etc.)
268
+ cleaned = value.strip().replace(",", "")
269
+ if cleaned and cleaned not in missing_indicators:
270
+ numeric_values.append(float(cleaned))
271
+ except (ValueError, TypeError):
272
+ continue
273
+
274
+ if not numeric_values:
275
+ return None
276
+
277
+ return {
278
+ "min": min(numeric_values),
279
+ "max": max(numeric_values),
280
+ "mean": sum(numeric_values) / len(numeric_values),
281
+ "count": len(numeric_values),
282
+ }
283
+
284
+ @staticmethod
285
+ def infer_data_frequency(data: List[Dict[str, Any]], date_field: str = "date") -> Optional[str]:
286
+ """
287
+ Infer the frequency of time series data.
288
+
289
+ Args:
290
+ data: List of data items with dates
291
+ date_field: Name of the date field
292
+
293
+ Returns:
294
+ Frequency string or None
295
+ """
296
+ if len(data) < 3:
297
+ return None
298
+
299
+ # Parse dates
300
+ dates = []
301
+ for item in data:
302
+ if date_field in item:
303
+ try:
304
+ date_str = str(item[date_field])
305
+ if "T" in date_str:
306
+ date_obj = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
307
+ else:
308
+ date_obj = datetime.strptime(date_str[:10], "%Y-%m-%d")
309
+ dates.append(date_obj)
310
+ except (ValueError, TypeError):
311
+ continue
312
+
313
+ if len(dates) < 3:
314
+ return None
315
+
316
+ # Sort dates
317
+ dates.sort()
318
+
319
+ # Calculate intervals
320
+ intervals = [(dates[i + 1] - dates[i]).days for i in range(len(dates) - 1)]
321
+
322
+ # Calculate median interval
323
+ intervals.sort()
324
+ median_interval = intervals[len(intervals) // 2]
325
+
326
+ # Classify frequency
327
+ if median_interval <= 2:
328
+ return "daily"
329
+ elif median_interval <= 10:
330
+ return "weekly"
331
+ elif median_interval <= 40:
332
+ return "monthly"
333
+ elif median_interval <= 120:
334
+ return "quarterly"
335
+ elif median_interval <= 400:
336
+ return "annual"
337
+ else:
338
+ return "irregular"
@@ -0,0 +1,201 @@
1
+ import inspect
2
+ import logging
3
+ from typing import Any, Dict, List, Optional, Type
4
+
5
+ from pydantic import BaseModel, ValidationError
6
+ import re
7
+
8
+ from aiecs.tools.tool_executor import (
9
+ InputValidationError,
10
+ SecurityError,
11
+ get_executor,
12
+ )
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class BaseTool:
18
+ """
19
+ Base class for all tools, providing common functionality:
20
+ - Input validation with Pydantic schemas
21
+ - Caching with TTL and content-based keys
22
+ - Concurrency with async/sync execution
23
+ - Error handling with retries and context
24
+ - Performance optimization with metrics
25
+ - Logging with structured output
26
+
27
+ Tools inheriting from this class focus on business logic, leveraging
28
+ the executor's cross-cutting concerns.
29
+
30
+ Example:
31
+ class MyTool(BaseTool):
32
+ class ReadSchema(BaseModel):
33
+ path: str
34
+
35
+ @validate_input(ReadSchema)
36
+ @cache_result(ttl=300)
37
+ @run_in_executor
38
+ @measure_execution_time
39
+ @sanitize_input
40
+ def read(self, path: str):
41
+ # Implementation
42
+ pass
43
+ """
44
+
45
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
46
+ """
47
+ Initialize the tool with optional configuration.
48
+
49
+ Args:
50
+ config (Dict[str, Any], optional): Tool-specific configuration.
51
+
52
+ Raises:
53
+ ValueError: If config is invalid.
54
+ """
55
+ self._executor = get_executor(config)
56
+ self._config = config or {}
57
+ self._schemas: Dict[str, Type[BaseModel]] = {}
58
+ self._async_methods: List[str] = []
59
+ self._register_schemas()
60
+ self._register_async_methods()
61
+
62
+ def _register_schemas(self) -> None:
63
+ """
64
+ Register Pydantic schemas for operations by inspecting inner Schema classes.
65
+
66
+ Example:
67
+ class MyTool(BaseTool):
68
+ class ReadSchema(BaseModel):
69
+ path: str
70
+ def read(self, path: str):
71
+ pass
72
+ # Registers 'read' -> ReadSchema
73
+ """
74
+ for attr_name in dir(self.__class__):
75
+ attr = getattr(self.__class__, attr_name)
76
+ if (
77
+ isinstance(attr, type)
78
+ and issubclass(attr, BaseModel)
79
+ and attr.__name__.endswith("Schema")
80
+ ):
81
+ op_name = attr.__name__.replace("Schema", "").lower()
82
+ self._schemas[op_name] = attr
83
+
84
+ def _register_async_methods(self) -> None:
85
+ """
86
+ Register async methods for proper execution handling.
87
+ """
88
+ for attr_name in dir(self.__class__):
89
+ attr = getattr(self.__class__, attr_name)
90
+ if inspect.iscoroutinefunction(attr) and not attr_name.startswith("_"):
91
+ self._async_methods.append(attr_name)
92
+
93
+ def _sanitize_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
94
+ """
95
+ Sanitize keyword arguments to prevent injection attacks.
96
+
97
+ Args:
98
+ kwargs (Dict[str, Any]): Input keyword arguments.
99
+
100
+ Returns:
101
+ Dict[str, Any]: Sanitized keyword arguments.
102
+
103
+ Raises:
104
+ SecurityError: If kwargs contain malicious content.
105
+ """
106
+ sanitized = {}
107
+ for k, v in kwargs.items():
108
+ if isinstance(v, str) and re.search(
109
+ r"(\bSELECT\b|\bINSERT\b|--|;|/\*)", v, re.IGNORECASE
110
+ ):
111
+ raise SecurityError(f"Input parameter '{k}' contains potentially malicious content")
112
+ sanitized[k] = v
113
+ return sanitized
114
+
115
+ def run(self, op: str, **kwargs) -> Any:
116
+ """
117
+ Execute a synchronous operation with parameters.
118
+
119
+ Args:
120
+ op (str): The name of the operation to execute.
121
+ **kwargs: The parameters to pass to the operation.
122
+
123
+ Returns:
124
+ Any: The result of the operation.
125
+
126
+ Raises:
127
+ ToolExecutionError: If the operation fails.
128
+ InputValidationError: If input parameters are invalid.
129
+ SecurityError: If inputs contain malicious content.
130
+ """
131
+ schema_class = self._schemas.get(op)
132
+ if schema_class:
133
+ try:
134
+ schema = schema_class(**kwargs)
135
+ kwargs = schema.model_dump(exclude_unset=True)
136
+ except ValidationError as e:
137
+ raise InputValidationError(f"Invalid input parameters: {e}")
138
+ kwargs = self._sanitize_kwargs(kwargs)
139
+ return self._executor.execute(self, op, **kwargs)
140
+
141
+ async def run_async(self, op: str, **kwargs) -> Any:
142
+ """
143
+ Execute an asynchronous operation with parameters.
144
+
145
+ Args:
146
+ op (str): The name of the operation to execute.
147
+ **kwargs: The parameters to pass to the operation.
148
+
149
+ Returns:
150
+ Any: The result of the operation.
151
+
152
+ Raises:
153
+ ToolExecutionError: If the operation fails.
154
+ InputValidationError: If input parameters are invalid.
155
+ SecurityError: If inputs contain malicious content.
156
+ """
157
+ schema_class = self._schemas.get(op)
158
+ if schema_class:
159
+ try:
160
+ schema = schema_class(**kwargs)
161
+ kwargs = schema.model_dump(exclude_unset=True)
162
+ except ValidationError as e:
163
+ raise InputValidationError(f"Invalid input parameters: {e}")
164
+ kwargs = self._sanitize_kwargs(kwargs)
165
+ return await self._executor.execute_async(self, op, **kwargs)
166
+
167
+ async def run_batch(self, operations: List[Dict[str, Any]]) -> List[Any]:
168
+ """
169
+ Execute multiple operations in parallel.
170
+
171
+ Args:
172
+ operations (List[Dict[str, Any]]): List of operation dictionaries with 'op' and 'kwargs'.
173
+
174
+ Returns:
175
+ List[Any]: List of operation results.
176
+
177
+ Raises:
178
+ ToolExecutionError: If any operation fails.
179
+ InputValidationError: If input parameters are invalid.
180
+ """
181
+ return await self._executor.execute_batch(self, operations)
182
+
183
+ def _get_method_schema(self, method_name: str) -> Optional[Type[BaseModel]]:
184
+ """
185
+ Get the schema for a method if it exists.
186
+
187
+ Args:
188
+ method_name (str): The name of the method.
189
+
190
+ Returns:
191
+ Optional[Type[BaseModel]]: The schema class or None.
192
+ """
193
+ if method_name in self._schemas:
194
+ return self._schemas[method_name]
195
+ schema_name = method_name[0].upper() + method_name[1:] + "Schema"
196
+ for attr_name in dir(self.__class__):
197
+ if attr_name == schema_name:
198
+ attr = getattr(self.__class__, attr_name)
199
+ if isinstance(attr, type) and issubclass(attr, BaseModel):
200
+ return attr
201
+ return None
@@ -0,0 +1,121 @@
1
+ # python-middleware/aiecs/tools/docs/__init__.py
2
+
3
+ """
4
+ Document Tools Module
5
+
6
+ This module contains specialized tools for document processing and analysis:
7
+ - document_parser_tool: Modern high-performance document parsing with AI
8
+ - ai_document_orchestrator: AI-powered document processing orchestrator
9
+ """
10
+
11
+ # Lazy import all document tools to avoid heavy dependencies at import time
12
+ import os
13
+
14
+ # Define available document tools for lazy loading
15
+ _AVAILABLE_DOC_TOOLS = [
16
+ "document_parser_tool",
17
+ "ai_document_orchestrator",
18
+ "document_writer_tool",
19
+ "ai_document_writer_orchestrator",
20
+ "document_creator_tool",
21
+ "document_layout_tool",
22
+ "content_insertion_tool",
23
+ ]
24
+
25
+ # Track which tools have been loaded
26
+ _LOADED_DOC_TOOLS = set()
27
+
28
+
29
+ def _lazy_load_doc_tool(tool_name: str):
30
+ """Lazy load a specific document tool module"""
31
+ if tool_name in _LOADED_DOC_TOOLS:
32
+ return
33
+
34
+ # Mark as loading to prevent infinite recursion
35
+ _LOADED_DOC_TOOLS.add(tool_name)
36
+
37
+ try:
38
+ if tool_name == "document_parser_tool":
39
+ from . import document_parser_tool
40
+
41
+ globals()["document_parser_tool"] = document_parser_tool
42
+ elif tool_name == "ai_document_orchestrator":
43
+ from . import ai_document_orchestrator
44
+
45
+ globals()["ai_document_orchestrator"] = ai_document_orchestrator
46
+ elif tool_name == "document_writer_tool":
47
+ from . import document_writer_tool
48
+
49
+ globals()["document_writer_tool"] = document_writer_tool
50
+ elif tool_name == "ai_document_writer_orchestrator":
51
+ from . import ai_document_writer_orchestrator
52
+
53
+ globals()["ai_document_writer_orchestrator"] = ai_document_writer_orchestrator
54
+ elif tool_name == "document_creator_tool":
55
+ from . import document_creator_tool
56
+
57
+ globals()["document_creator_tool"] = document_creator_tool
58
+ elif tool_name == "document_layout_tool":
59
+ from . import document_layout_tool
60
+
61
+ globals()["document_layout_tool"] = document_layout_tool
62
+ elif tool_name == "content_insertion_tool":
63
+ from . import content_insertion_tool
64
+
65
+ globals()["content_insertion_tool"] = content_insertion_tool
66
+
67
+ except ImportError as e:
68
+ # Remove from loaded set if import failed
69
+ _LOADED_DOC_TOOLS.discard(tool_name)
70
+ print(f"Warning: Could not import {tool_name}: {e}")
71
+
72
+
73
+ def __getattr__(name: str):
74
+ """
75
+ Lazy loading mechanism for document tools.
76
+
77
+ This allows importing tools like:
78
+ from aiecs.tools.docs import document_parser_tool
79
+ """
80
+ if name in _AVAILABLE_DOC_TOOLS:
81
+ _lazy_load_doc_tool(name)
82
+ if name in globals():
83
+ return globals()[name]
84
+
85
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
86
+
87
+
88
+ def list_doc_tools():
89
+ """List all available document tools"""
90
+ return _AVAILABLE_DOC_TOOLS.copy()
91
+
92
+
93
+ def load_all_doc_tools():
94
+ """Load all available document tools"""
95
+ for tool_name in _AVAILABLE_DOC_TOOLS:
96
+ _lazy_load_doc_tool(tool_name)
97
+
98
+
99
+ # Auto-discovery of tool modules in this directory
100
+
101
+
102
+ def _discover_doc_tools():
103
+ """Discover document tool modules in the current directory"""
104
+ current_dir = os.path.dirname(__file__)
105
+ if not current_dir:
106
+ return
107
+
108
+ for filename in os.listdir(current_dir):
109
+ if filename.endswith("_tool.py") and not filename.startswith("__"):
110
+ tool_name = filename[:-3] # Remove .py extension
111
+ if tool_name not in _AVAILABLE_DOC_TOOLS:
112
+ _AVAILABLE_DOC_TOOLS.append(tool_name)
113
+
114
+
115
+ # Discover tools on import
116
+ _discover_doc_tools()
117
+
118
+ __all__ = [
119
+ "list_doc_tools",
120
+ "load_all_doc_tools",
121
+ ] + _AVAILABLE_DOC_TOOLS