aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import logging
3
- from typing import Dict, List, Any, Optional
3
+ from typing import Dict, List, Any
4
4
  from aiecs.tools import get_tool
5
5
  from aiecs.tools.tool_executor import ToolExecutor
6
6
  from aiecs.utils.execution_utils import ExecutionUtils
@@ -14,27 +14,33 @@ class OperationExecutor:
14
14
  Core logic for handling operation execution
15
15
  """
16
16
 
17
- def __init__(self, tool_executor: ToolExecutor, execution_utils: ExecutionUtils, config: Dict[str, Any]):
17
+ def __init__(
18
+ self,
19
+ tool_executor: ToolExecutor,
20
+ execution_utils: ExecutionUtils,
21
+ config: Dict[str, Any],
22
+ ):
18
23
  self.tool_executor = tool_executor
19
24
  self.execution_utils = execution_utils
20
25
  self.config = config
21
- self._tool_instances = {}
22
- self.semaphore = asyncio.Semaphore(config.get('rate_limit_requests_per_second', 5))
26
+ self._tool_instances: Dict[str, Any] = {}
27
+ self.semaphore = asyncio.Semaphore(config.get("rate_limit_requests_per_second", 5))
23
28
 
24
29
  def _filter_tool_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
25
30
  """
26
31
  Filter out system-related parameters, keeping only parameters needed by tool methods
27
32
  """
28
33
  # System-related parameters that should not be passed to tool methods
29
- system_params = {'user_id', 'task_id', 'op'}
34
+ system_params = {"user_id", "task_id", "op"}
30
35
  return {k: v for k, v in params.items() if k not in system_params}
31
36
 
32
37
  def _filter_tool_call_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
33
38
  """
34
39
  Filter out system-related parameters in tool calls, but keep 'op' parameter (needed by BaseTool.run())
35
40
  """
36
- # Only filter user and task IDs, keep 'op' parameter for BaseTool.run() to use
37
- system_params = {'user_id', 'task_id'}
41
+ # Only filter user and task IDs, keep 'op' parameter for BaseTool.run()
42
+ # to use
43
+ system_params = {"user_id", "task_id"}
38
44
  return {k: v for k, v in params.items() if k not in system_params}
39
45
 
40
46
  async def execute_operation(self, operation_spec: str, params: Dict[str, Any]) -> Any:
@@ -44,7 +50,9 @@ class OperationExecutor:
44
50
  if "." not in operation_spec:
45
51
  raise ValueError(f"Invalid operation spec: {operation_spec}, expected 'tool_name.operation_name'")
46
52
 
47
- tool_name, operation_name = operation_spec.split(".", 1)
53
+ parts = operation_spec.split(".", 1)
54
+ tool_name: str = parts[0]
55
+ operation_name: str = parts[1]
48
56
 
49
57
  # Get or create tool instance
50
58
  if tool_name not in self._tool_instances:
@@ -69,29 +77,37 @@ class OperationExecutor:
69
77
  Batch execute operations with rate limiting
70
78
  """
71
79
  results = []
72
- batch_size = self.config.get('batch_size', 10)
73
- rate_limit = self.config.get('rate_limit_requests_per_second', 5)
80
+ batch_size = self.config.get("batch_size", 10)
81
+ rate_limit = self.config.get("rate_limit_requests_per_second", 5)
74
82
 
75
83
  for i in range(0, len(operations), batch_size):
76
- batch = operations[i:i + batch_size]
84
+ batch = operations[i : i + batch_size]
77
85
  batch_results = await asyncio.gather(
78
86
  *[self.execute_operation(op["operation"], op.get("params", {})) for op in batch],
79
- return_exceptions=True
87
+ return_exceptions=True,
80
88
  )
81
89
  results.extend(batch_results)
82
90
  await asyncio.sleep(1.0 / rate_limit)
83
91
 
84
92
  return results
85
93
 
86
- async def execute_operations_sequence(self, operations: List[Dict[str, Any]], user_id: str, task_id: str,
87
- stop_on_failure: bool = False, save_callback=None) -> List[TaskStepResult]:
94
+ async def execute_operations_sequence(
95
+ self,
96
+ operations: List[Dict[str, Any]],
97
+ user_id: str,
98
+ task_id: str,
99
+ stop_on_failure: bool = False,
100
+ save_callback=None,
101
+ ) -> List[TaskStepResult]:
88
102
  """
89
103
  Execute operations sequence sequentially, with option to stop on failure
90
104
  """
91
- results = []
105
+ results: List[TaskStepResult] = []
92
106
 
93
107
  for step, op_info in enumerate(operations):
94
108
  operation_spec = op_info.get("operation")
109
+ if not isinstance(operation_spec, str):
110
+ raise ValueError(f"Invalid operation spec: {operation_spec}, expected string")
95
111
  params = op_info.get("params", {})
96
112
 
97
113
  # Process parameter references
@@ -104,7 +120,7 @@ class OperationExecutor:
104
120
  result=result,
105
121
  completed=True,
106
122
  message=f"Completed operation {operation_spec}",
107
- status=TaskStatus.COMPLETED.value
123
+ status=TaskStatus.COMPLETED.value,
108
124
  )
109
125
  except Exception as e:
110
126
  step_result = TaskStepResult(
@@ -114,7 +130,7 @@ class OperationExecutor:
114
130
  message=f"Failed to execute {operation_spec}",
115
131
  status=TaskStatus.FAILED.value,
116
132
  error_code=ErrorCode.EXECUTION_ERROR.value,
117
- error_message=str(e)
133
+ error_message=str(e),
118
134
  )
119
135
 
120
136
  if stop_on_failure:
@@ -138,9 +154,9 @@ class OperationExecutor:
138
154
  processed = {}
139
155
 
140
156
  for name, value in params.items():
141
- if isinstance(value, str) and value.startswith('$result['):
157
+ if isinstance(value, str) and value.startswith("$result["):
142
158
  try:
143
- ref_parts = value[8:].split(']', 1)
159
+ ref_parts = value[8:].split("]", 1)
144
160
  idx = int(ref_parts[0])
145
161
 
146
162
  if idx >= len(results):
@@ -148,9 +164,10 @@ class OperationExecutor:
148
164
 
149
165
  ref_value = results[idx].result
150
166
 
151
- # Handle nested attribute access, such as $result[0].data.field
152
- if len(ref_parts) > 1 and ref_parts[1].startswith('.'):
153
- for attr in ref_parts[1][1:].split('.'):
167
+ # Handle nested attribute access, such as
168
+ # $result[0].data.field
169
+ if len(ref_parts) > 1 and ref_parts[1].startswith("."):
170
+ for attr in ref_parts[1][1:].split("."):
154
171
  if attr:
155
172
  if isinstance(ref_value, dict):
156
173
  ref_value = ref_value.get(attr)
@@ -171,14 +188,14 @@ class OperationExecutor:
171
188
  Execute batch tool calls with rate limiting
172
189
  """
173
190
  results = []
174
- batch_size = self.config.get('batch_size', 10)
175
- rate_limit = self.config.get('rate_limit_requests_per_second', 5)
191
+ batch_size = self.config.get("batch_size", 10)
192
+ rate_limit = self.config.get("rate_limit_requests_per_second", 5)
176
193
 
177
194
  for i in range(0, len(tool_calls), batch_size):
178
- batch = tool_calls[i:i + batch_size]
195
+ batch = tool_calls[i : i + batch_size]
179
196
  batch_results = await asyncio.gather(
180
197
  *[self._execute_tool_call(call, tool_executor_func) for call in batch],
181
- return_exceptions=True
198
+ return_exceptions=True,
182
199
  )
183
200
  results.extend(batch_results)
184
201
  await asyncio.sleep(1.0 / rate_limit)
@@ -190,11 +207,14 @@ class OperationExecutor:
190
207
  Execute a single tool call with rate limiting
191
208
  """
192
209
  async with self.semaphore:
193
- tool_name = call.get("tool")
210
+ tool_name_raw = call.get("tool")
211
+ if not isinstance(tool_name_raw, str):
212
+ raise ValueError(f"Invalid tool name: {tool_name_raw}, expected string")
213
+ tool_name: str = tool_name_raw
194
214
  params = call.get("params", {})
195
215
 
196
216
  # Use context-aware caching
197
- if self.config.get('enable_cache', True):
217
+ if self.config.get("enable_cache", True):
198
218
  user_id = params.get("user_id", "anonymous")
199
219
  task_id = params.get("task_id", "none")
200
220
  cache_key = self.execution_utils.generate_cache_key("tool_call", user_id, task_id, (), params)
@@ -211,14 +231,16 @@ class OperationExecutor:
211
231
  if tool_name not in self._tool_instances:
212
232
  self._tool_instances[tool_name] = get_tool(tool_name)
213
233
  tool = self._tool_instances[tool_name]
214
-
215
- # Filter parameters, remove system-related parameters (but keep 'op' parameter)
234
+
235
+ # Filter parameters, remove system-related parameters (but keep
236
+ # 'op' parameter)
216
237
  tool_params = self._filter_tool_call_params(params)
217
- # Execute through BaseTool.run method, passing filtered parameters
238
+ # Execute through BaseTool.run method, passing filtered
239
+ # parameters
218
240
  result = await self.tool_executor.execute_async(tool, "run", **tool_params)
219
241
 
220
242
  # Cache result
221
- if self.config.get('enable_cache', True):
243
+ if self.config.get("enable_cache", True):
222
244
  self.execution_utils.add_to_cache(cache_key, result)
223
245
 
224
246
  return result
@@ -230,7 +252,7 @@ class OperationExecutor:
230
252
  import re
231
253
 
232
254
  tool_calls = []
233
- tool_pattern = r'\{\{(\w+)\((.*?)\)\}\}'
255
+ tool_pattern = r"\{\{(\w+)\((.*?)\)\}\}"
234
256
  matches = re.finditer(tool_pattern, description)
235
257
 
236
258
  for match in matches:
@@ -256,10 +278,7 @@ class OperationExecutor:
256
278
 
257
279
  params[param_name] = param_value
258
280
 
259
- tool_calls.append({
260
- "tool": tool_name,
261
- "params": params
262
- })
281
+ tool_calls.append({"tool": tool_name, "params": params})
263
282
 
264
283
  return tool_calls
265
284
 
@@ -271,9 +290,11 @@ class OperationExecutor:
271
290
 
272
291
  for i, op_info in enumerate(operations):
273
292
  operation_spec = op_info.get("operation")
293
+ if not isinstance(operation_spec, str):
294
+ raise ValueError(f"Invalid operation spec: {operation_spec}, expected string")
274
295
  params = op_info.get("params", {})
275
296
 
276
- async def execute_single_op(spec, p, index):
297
+ async def execute_single_op(spec: str, p: Dict[str, Any], index: int) -> TaskStepResult:
277
298
  try:
278
299
  result = await self.execute_operation(spec, p)
279
300
  return TaskStepResult(
@@ -281,7 +302,7 @@ class OperationExecutor:
281
302
  result=result,
282
303
  completed=True,
283
304
  message=f"Completed parallel operation {spec}",
284
- status=TaskStatus.COMPLETED.value
305
+ status=TaskStatus.COMPLETED.value,
285
306
  )
286
307
  except Exception as e:
287
308
  return TaskStepResult(
@@ -291,7 +312,7 @@ class OperationExecutor:
291
312
  message=f"Failed parallel operation {spec}",
292
313
  status=TaskStatus.FAILED.value,
293
314
  error_code=ErrorCode.EXECUTION_ERROR.value,
294
- error_message=str(e)
315
+ error_message=str(e),
295
316
  )
296
317
 
297
318
  tasks.append(execute_single_op(operation_spec, params, i))
@@ -299,19 +320,23 @@ class OperationExecutor:
299
320
  results = await asyncio.gather(*tasks, return_exceptions=True)
300
321
 
301
322
  # Handle exception results
302
- processed_results = []
323
+ processed_results: List[TaskStepResult] = []
303
324
  for i, result in enumerate(results):
304
325
  if isinstance(result, Exception):
305
- processed_results.append(TaskStepResult(
306
- step=f"parallel_{i}_error",
307
- result=None,
308
- completed=False,
309
- message=f"Parallel operation failed with exception",
310
- status=TaskStatus.FAILED.value,
311
- error_code=ErrorCode.EXECUTION_ERROR.value,
312
- error_message=str(result)
313
- ))
326
+ processed_results.append(
327
+ TaskStepResult(
328
+ step=f"parallel_{i}_error",
329
+ result=None,
330
+ completed=False,
331
+ message="Parallel operation failed with exception",
332
+ status=TaskStatus.FAILED.value,
333
+ error_code=ErrorCode.EXECUTION_ERROR.value,
334
+ error_message=str(result),
335
+ )
336
+ )
314
337
  else:
338
+ # result is TaskStepResult here because execute_single_op always returns TaskStepResult
339
+ assert isinstance(result, TaskStepResult), f"Expected TaskStepResult, got {type(result)}"
315
340
  processed_results.append(result)
316
341
 
317
342
  return processed_results
@@ -334,8 +359,8 @@ class OperationExecutor:
334
359
  "tool_names": list(self._tool_instances.keys()),
335
360
  "semaphore_value": self.semaphore._value,
336
361
  "config": {
337
- "batch_size": self.config.get('batch_size', 10),
338
- "rate_limit": self.config.get('rate_limit_requests_per_second', 5),
339
- "enable_cache": self.config.get('enable_cache', True)
340
- }
362
+ "batch_size": self.config.get("batch_size", 10),
363
+ "rate_limit": self.config.get("rate_limit_requests_per_second", 5),
364
+ "enable_cache": self.config.get("enable_cache", True),
365
+ },
341
366
  }
@@ -0,0 +1,7 @@
1
+ """
2
+ Knowledge Graph Application Layer
3
+
4
+ This module contains application services and use cases for knowledge graph operations.
5
+ """
6
+
7
+ __all__ = []
@@ -0,0 +1,37 @@
1
+ """
2
+ Knowledge Graph Builder Pipeline
3
+
4
+ Orchestrates document-to-graph conversion workflow.
5
+ """
6
+
7
+ from aiecs.application.knowledge_graph.builder.graph_builder import (
8
+ GraphBuilder,
9
+ )
10
+ from aiecs.application.knowledge_graph.builder.document_builder import (
11
+ DocumentGraphBuilder,
12
+ )
13
+ from aiecs.application.knowledge_graph.builder.text_chunker import TextChunker
14
+ from aiecs.application.knowledge_graph.builder.schema_mapping import (
15
+ SchemaMapping,
16
+ EntityMapping,
17
+ RelationMapping,
18
+ PropertyTransformation,
19
+ TransformationType,
20
+ )
21
+ from aiecs.application.knowledge_graph.builder.structured_pipeline import (
22
+ StructuredDataPipeline,
23
+ ImportResult,
24
+ )
25
+
26
+ __all__ = [
27
+ "GraphBuilder",
28
+ "DocumentGraphBuilder",
29
+ "TextChunker",
30
+ "SchemaMapping",
31
+ "EntityMapping",
32
+ "RelationMapping",
33
+ "PropertyTransformation",
34
+ "TransformationType",
35
+ "StructuredDataPipeline",
36
+ "ImportResult",
37
+ ]
@@ -0,0 +1,302 @@
1
+ """
2
+ Data Quality Validation for Knowledge Graph Import
3
+
4
+ Provides validation capabilities to ensure data quality during import,
5
+ including range validation, outlier detection, completeness checks, and
6
+ type consistency validation.
7
+ """
8
+
9
+ from typing import Dict, List, Optional, Any, Set, Union
10
+ from dataclasses import dataclass, field
11
+ from enum import Enum
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Check for pandas and numpy availability
17
+ try:
18
+ import pandas as pd
19
+ import numpy as np
20
+ PANDAS_AVAILABLE = True
21
+ except ImportError:
22
+ PANDAS_AVAILABLE = False
23
+
24
+
25
+ class ViolationType(Enum):
26
+ """Types of data quality violations"""
27
+ RANGE_VIOLATION = "range_violation"
28
+ OUTLIER = "outlier"
29
+ MISSING_VALUE = "missing_value"
30
+ TYPE_MISMATCH = "type_mismatch"
31
+
32
+
33
+ @dataclass
34
+ class ValidationViolation:
35
+ """
36
+ Represents a single data quality violation
37
+
38
+ Attributes:
39
+ violation_type: Type of violation
40
+ property_name: Property that violated the rule
41
+ row_id: Identifier of the row with violation
42
+ value: The violating value
43
+ expected: Expected value or constraint
44
+ message: Human-readable description
45
+ """
46
+ violation_type: ViolationType
47
+ property_name: str
48
+ row_id: Any
49
+ value: Any
50
+ expected: Any
51
+ message: str
52
+
53
+
54
+ @dataclass
55
+ class QualityReport:
56
+ """
57
+ Data quality validation report
58
+
59
+ Attributes:
60
+ total_rows: Total number of rows validated
61
+ violations: List of all violations found
62
+ completeness: Completeness percentage per property
63
+ outlier_count: Number of outliers detected per property
64
+ range_violations: Number of range violations per property
65
+ type_violations: Number of type violations per property
66
+ passed: Whether validation passed (no critical violations)
67
+ """
68
+ total_rows: int
69
+ violations: List[ValidationViolation] = field(default_factory=list)
70
+ completeness: Dict[str, float] = field(default_factory=dict)
71
+ outlier_count: Dict[str, int] = field(default_factory=dict)
72
+ range_violations: Dict[str, int] = field(default_factory=dict)
73
+ type_violations: Dict[str, int] = field(default_factory=dict)
74
+ passed: bool = True
75
+
76
+ def add_violation(self, violation: ValidationViolation):
77
+ """Add a violation to the report"""
78
+ self.violations.append(violation)
79
+
80
+ # Update counts
81
+ if violation.violation_type == ViolationType.RANGE_VIOLATION:
82
+ self.range_violations[violation.property_name] = \
83
+ self.range_violations.get(violation.property_name, 0) + 1
84
+ elif violation.violation_type == ViolationType.OUTLIER:
85
+ self.outlier_count[violation.property_name] = \
86
+ self.outlier_count.get(violation.property_name, 0) + 1
87
+ elif violation.violation_type == ViolationType.TYPE_MISMATCH:
88
+ self.type_violations[violation.property_name] = \
89
+ self.type_violations.get(violation.property_name, 0) + 1
90
+
91
+ def get_summary(self) -> Dict[str, Any]:
92
+ """Get a summary of the quality report"""
93
+ return {
94
+ "total_rows": self.total_rows,
95
+ "total_violations": len(self.violations),
96
+ "range_violations": sum(self.range_violations.values()),
97
+ "outliers": sum(self.outlier_count.values()),
98
+ "type_violations": sum(self.type_violations.values()),
99
+ "completeness": self.completeness,
100
+ "passed": self.passed
101
+ }
102
+
103
+
104
+ @dataclass
105
+ class RangeRule:
106
+ """Range validation rule for numeric properties"""
107
+ min_value: Optional[float] = None
108
+ max_value: Optional[float] = None
109
+
110
+
111
+ @dataclass
112
+ class ValidationConfig:
113
+ """
114
+ Configuration for data quality validation
115
+
116
+ Attributes:
117
+ range_rules: Range validation rules per property
118
+ required_properties: Set of required properties
119
+ detect_outliers: Whether to detect outliers (3 std devs)
120
+ fail_on_violations: Whether to fail import on violations
121
+ max_violation_rate: Maximum allowed violation rate (0.0-1.0)
122
+ """
123
+ range_rules: Dict[str, RangeRule] = field(default_factory=dict)
124
+ required_properties: Set[str] = field(default_factory=set)
125
+ detect_outliers: bool = False
126
+ fail_on_violations: bool = False
127
+ max_violation_rate: float = 0.1 # 10% by default
128
+
129
+
130
+ class DataQualityValidator:
131
+ """
132
+ Validates data quality during knowledge graph import
133
+
134
+ Provides range validation, outlier detection, completeness checks,
135
+ and type consistency validation.
136
+ """
137
+
138
+ def __init__(self, config: Optional[ValidationConfig] = None):
139
+ """
140
+ Initialize validator with configuration
141
+
142
+ Args:
143
+ config: Validation configuration
144
+ """
145
+ self.config = config or ValidationConfig()
146
+ self._property_stats: Dict[str, Dict[str, float]] = {}
147
+
148
+ def validate_dataframe(self, df: 'pd.DataFrame', id_column: Optional[str] = None) -> QualityReport:
149
+ """
150
+ Validate a pandas DataFrame
151
+
152
+ Args:
153
+ df: DataFrame to validate
154
+ id_column: Column to use as row identifier
155
+
156
+ Returns:
157
+ QualityReport with validation results
158
+ """
159
+ if not PANDAS_AVAILABLE:
160
+ raise ImportError("pandas and numpy are required for data quality validation")
161
+
162
+ report = QualityReport(total_rows=len(df))
163
+
164
+ # Use index as row ID if no id_column specified
165
+ row_ids = df[id_column] if id_column and id_column in df.columns else df.index
166
+
167
+ # Check completeness
168
+ self._check_completeness(df, report)
169
+
170
+ # Check required properties
171
+ self._check_required_properties(df, row_ids, report)
172
+
173
+ # Validate ranges
174
+ self._validate_ranges(df, row_ids, report)
175
+
176
+ # Detect outliers
177
+ if self.config.detect_outliers:
178
+ self._detect_outliers(df, row_ids, report)
179
+
180
+ # Check if validation passed
181
+ violation_rate = len(report.violations) / max(report.total_rows, 1)
182
+ if self.config.fail_on_violations and violation_rate > self.config.max_violation_rate:
183
+ report.passed = False
184
+
185
+ return report
186
+
187
+ def _check_completeness(self, df: 'pd.DataFrame', report: QualityReport):
188
+ """Check completeness of properties"""
189
+ for col in df.columns:
190
+ non_null_count = df[col].notna().sum()
191
+ completeness = non_null_count / len(df) if len(df) > 0 else 0.0
192
+ report.completeness[col] = completeness
193
+
194
+ def _check_required_properties(self, df: 'pd.DataFrame', row_ids: Any, report: QualityReport):
195
+ """Check that required properties are present and non-null"""
196
+ for prop in self.config.required_properties:
197
+ if prop not in df.columns:
198
+ # Property missing entirely
199
+ violation = ValidationViolation(
200
+ violation_type=ViolationType.MISSING_VALUE,
201
+ property_name=prop,
202
+ row_id="ALL",
203
+ value=None,
204
+ expected="required property",
205
+ message=f"Required property '{prop}' is missing from dataset"
206
+ )
207
+ report.add_violation(violation)
208
+ else:
209
+ # Check for null values in required property
210
+ null_mask = df[prop].isna()
211
+ for idx in df[null_mask].index:
212
+ row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
213
+ violation = ValidationViolation(
214
+ violation_type=ViolationType.MISSING_VALUE,
215
+ property_name=prop,
216
+ row_id=row_id,
217
+ value=None,
218
+ expected="non-null value",
219
+ message=f"Required property '{prop}' is null in row {row_id}"
220
+ )
221
+ report.add_violation(violation)
222
+
223
+ def _validate_ranges(self, df: 'pd.DataFrame', row_ids: Any, report: QualityReport):
224
+ """Validate numeric properties are within specified ranges"""
225
+ for prop, rule in self.config.range_rules.items():
226
+ if prop not in df.columns:
227
+ continue
228
+
229
+ # Only validate numeric columns
230
+ if not pd.api.types.is_numeric_dtype(df[prop]):
231
+ continue
232
+
233
+ # Check min value
234
+ if rule.min_value is not None:
235
+ violations_mask = df[prop] < rule.min_value
236
+ for idx in df[violations_mask].index:
237
+ row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
238
+ value = df[prop].iloc[idx]
239
+ violation = ValidationViolation(
240
+ violation_type=ViolationType.RANGE_VIOLATION,
241
+ property_name=prop,
242
+ row_id=row_id,
243
+ value=value,
244
+ expected=f">= {rule.min_value}",
245
+ message=f"Value {value} is below minimum {rule.min_value} for property '{prop}' in row {row_id}"
246
+ )
247
+ report.add_violation(violation)
248
+
249
+ # Check max value
250
+ if rule.max_value is not None:
251
+ violations_mask = df[prop] > rule.max_value
252
+ for idx in df[violations_mask].index:
253
+ row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
254
+ value = df[prop].iloc[idx]
255
+ violation = ValidationViolation(
256
+ violation_type=ViolationType.RANGE_VIOLATION,
257
+ property_name=prop,
258
+ row_id=row_id,
259
+ value=value,
260
+ expected=f"<= {rule.max_value}",
261
+ message=f"Value {value} is above maximum {rule.max_value} for property '{prop}' in row {row_id}"
262
+ )
263
+ report.add_violation(violation)
264
+
265
+ def _detect_outliers(self, df: 'pd.DataFrame', row_ids: Any, report: QualityReport):
266
+ """Detect outliers using 3 standard deviations rule"""
267
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
268
+
269
+ for col in numeric_cols:
270
+ # Skip if all values are null
271
+ if df[col].isna().all():
272
+ continue
273
+
274
+ # Calculate mean and std
275
+ mean = df[col].mean()
276
+ std = df[col].std()
277
+
278
+ # Skip if std is 0 or NaN
279
+ if pd.isna(std) or std == 0:
280
+ continue
281
+
282
+ # Store stats for later use
283
+ self._property_stats[col] = {"mean": mean, "std": std}
284
+
285
+ # Detect outliers (beyond 3 standard deviations)
286
+ lower_bound = mean - 3 * std
287
+ upper_bound = mean + 3 * std
288
+ outliers_mask = (df[col] < lower_bound) | (df[col] > upper_bound)
289
+
290
+ for idx in df[outliers_mask].index:
291
+ row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
292
+ value = df[col].iloc[idx]
293
+ violation = ValidationViolation(
294
+ violation_type=ViolationType.OUTLIER,
295
+ property_name=col,
296
+ row_id=row_id,
297
+ value=value,
298
+ expected=f"within [{lower_bound:.2f}, {upper_bound:.2f}]",
299
+ message=f"Value {value} is an outlier (>3 std devs) for property '{col}' in row {row_id}"
300
+ )
301
+ report.add_violation(violation)
302
+