aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,635 @@
1
+ """
2
+ Query Optimizer
3
+
4
+ Advanced query optimization for knowledge graph queries.
5
+ """
6
+
7
+ from typing import List, Dict, Any, Optional, Set, Tuple
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ import logging
11
+
12
+ from aiecs.domain.knowledge_graph.models.query_plan import QueryPlan, QueryStep
13
+ from aiecs.domain.knowledge_graph.models.query import QueryType
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OptimizationRule(str, Enum):
19
+ """Query optimization rules"""
20
+
21
+ PREDICATE_PUSHDOWN = "predicate_pushdown"
22
+ JOIN_REORDERING = "join_reordering"
23
+ REDUNDANT_ELIMINATION = "redundant_elimination"
24
+ FILTER_EARLY = "filter_early"
25
+ COST_BASED = "cost_based"
26
+
27
+
28
+ @dataclass
29
+ class QueryStatistics:
30
+ """
31
+ Query execution statistics for cost estimation
32
+
33
+ Attributes:
34
+ entity_count: Estimated number of entities in graph
35
+ relation_count: Estimated number of relations in graph
36
+ avg_degree: Average node degree (connections per entity)
37
+ entity_type_counts: Count of entities per type
38
+ relation_type_counts: Count of relations per type
39
+ """
40
+
41
+ entity_count: int = 1000
42
+ relation_count: int = 5000
43
+ avg_degree: float = 5.0
44
+ entity_type_counts: Dict[str, int] = field(default_factory=dict)
45
+ relation_type_counts: Dict[str, int] = field(default_factory=dict)
46
+
47
+ def get_selectivity(self, entity_type: Optional[str] = None) -> float:
48
+ """
49
+ Estimate selectivity (fraction of entities matching filter)
50
+
51
+ Args:
52
+ entity_type: Entity type filter
53
+
54
+ Returns:
55
+ Selectivity estimate (0.0-1.0)
56
+ """
57
+ if entity_type and entity_type in self.entity_type_counts:
58
+ return self.entity_type_counts[entity_type] / max(self.entity_count, 1)
59
+ return 1.0 # No filter = all entities
60
+
61
+
62
+ @dataclass
63
+ class OptimizationResult:
64
+ """
65
+ Result of query optimization
66
+
67
+ Attributes:
68
+ original_plan: Original query plan
69
+ optimized_plan: Optimized query plan
70
+ rules_applied: List of optimization rules applied
71
+ estimated_cost_reduction: Estimated cost reduction (0.0-1.0)
72
+ explanation: Human-readable explanation of optimizations
73
+ """
74
+
75
+ original_plan: QueryPlan
76
+ optimized_plan: QueryPlan
77
+ rules_applied: List[str] = field(default_factory=list)
78
+ estimated_cost_reduction: float = 0.0
79
+ explanation: str = ""
80
+
81
+
82
+ class QueryOptimizer:
83
+ """
84
+ Advanced Query Optimizer
85
+
86
+ Optimizes query execution plans using various optimization techniques:
87
+ - Predicate push-down: Move filters earlier in execution
88
+ - Join reordering: Reorder multi-hop queries for efficiency
89
+ - Redundant operation elimination: Remove duplicate operations
90
+ - Cost-based optimization: Choose execution order based on cost estimates
91
+
92
+ Example:
93
+ ```python
94
+ optimizer = QueryOptimizer(statistics=stats)
95
+
96
+ # Optimize a query plan
97
+ result = optimizer.optimize(plan)
98
+
99
+ print(f"Cost reduction: {result.estimated_cost_reduction:.1%}")
100
+ print(f"Rules applied: {result.rules_applied}")
101
+ ```
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ statistics: Optional[QueryStatistics] = None,
107
+ enable_rules: Optional[List[OptimizationRule]] = None,
108
+ ):
109
+ """
110
+ Initialize query optimizer
111
+
112
+ Args:
113
+ statistics: Query statistics for cost estimation
114
+ enable_rules: List of optimization rules to enable (None = all)
115
+ """
116
+ self.statistics = statistics or QueryStatistics()
117
+ self.enable_rules = enable_rules or list(OptimizationRule)
118
+ self._optimization_count = 0
119
+
120
+ def optimize(self, plan: QueryPlan) -> OptimizationResult:
121
+ """
122
+ Optimize a query execution plan
123
+
124
+ Args:
125
+ plan: Original query plan
126
+
127
+ Returns:
128
+ Optimization result with optimized plan
129
+ """
130
+ if plan.optimized:
131
+ logger.debug(f"Plan {plan.plan_id} already optimized")
132
+ return OptimizationResult(
133
+ original_plan=plan,
134
+ optimized_plan=plan,
135
+ explanation="Plan already optimized",
136
+ )
137
+
138
+ original_cost = plan.total_estimated_cost
139
+ optimized_steps = list(plan.steps)
140
+ rules_applied = []
141
+
142
+ # Apply optimization rules in order
143
+ if OptimizationRule.REDUNDANT_ELIMINATION in self.enable_rules:
144
+ optimized_steps, eliminated = self._eliminate_redundant_operations(optimized_steps)
145
+ if eliminated > 0:
146
+ rules_applied.append(f"redundant_elimination (removed {eliminated} ops)")
147
+
148
+ if OptimizationRule.PREDICATE_PUSHDOWN in self.enable_rules:
149
+ optimized_steps, pushed = self._push_down_predicates(optimized_steps)
150
+ if pushed > 0:
151
+ rules_applied.append(f"predicate_pushdown (pushed {pushed} filters)")
152
+
153
+ if OptimizationRule.JOIN_REORDERING in self.enable_rules:
154
+ optimized_steps = self._reorder_joins(optimized_steps)
155
+ rules_applied.append("join_reordering")
156
+
157
+ if OptimizationRule.COST_BASED in self.enable_rules:
158
+ optimized_steps = self._cost_based_reordering(optimized_steps)
159
+ rules_applied.append("cost_based_reordering")
160
+
161
+ # Create optimized plan
162
+ optimized_plan = QueryPlan(
163
+ plan_id=plan.plan_id,
164
+ original_query=plan.original_query,
165
+ steps=optimized_steps,
166
+ optimized=True,
167
+ explanation=plan.explanation,
168
+ metadata=plan.metadata,
169
+ )
170
+ optimized_plan.total_estimated_cost = optimized_plan.calculate_total_cost()
171
+
172
+ # Calculate cost reduction
173
+ cost_reduction = 0.0
174
+ if original_cost > 0:
175
+ cost_reduction = (original_cost - optimized_plan.total_estimated_cost) / original_cost
176
+
177
+ self._optimization_count += 1
178
+
179
+ explanation = self._generate_explanation(
180
+ plan, optimized_plan, rules_applied, cost_reduction
181
+ )
182
+
183
+ return OptimizationResult(
184
+ original_plan=plan,
185
+ optimized_plan=optimized_plan,
186
+ rules_applied=rules_applied,
187
+ estimated_cost_reduction=cost_reduction,
188
+ explanation=explanation,
189
+ )
190
+
191
+ def _eliminate_redundant_operations(
192
+ self, steps: List[QueryStep]
193
+ ) -> Tuple[List[QueryStep], int]:
194
+ """
195
+ Eliminate redundant operations
196
+
197
+ Args:
198
+ steps: Query steps
199
+
200
+ Returns:
201
+ Tuple of (optimized steps, number of operations eliminated)
202
+ """
203
+ seen_operations: Dict[str, QueryStep] = {}
204
+ optimized = []
205
+ eliminated = 0
206
+
207
+ for step in steps:
208
+ # Create a signature for this operation
209
+ signature = self._get_operation_signature(step)
210
+
211
+ if signature in seen_operations:
212
+ # Redundant operation - update dependencies to point to
213
+ # original
214
+ original_step = seen_operations[signature]
215
+
216
+ # Update other steps that depend on this redundant step
217
+ for other_step in steps:
218
+ if step.step_id in other_step.depends_on:
219
+ # Replace dependency with original step
220
+ other_step.depends_on = [
221
+ (original_step.step_id if dep == step.step_id else dep)
222
+ for dep in other_step.depends_on
223
+ ]
224
+
225
+ eliminated += 1
226
+ logger.debug(
227
+ f"Eliminated redundant operation: {step.step_id} -> {original_step.step_id}"
228
+ )
229
+ else:
230
+ seen_operations[signature] = step
231
+ optimized.append(step)
232
+
233
+ return optimized, eliminated
234
+
235
+ def _get_operation_signature(self, step: QueryStep) -> str:
236
+ """
237
+ Get a signature for an operation to detect duplicates
238
+
239
+ Args:
240
+ step: Query step
241
+
242
+ Returns:
243
+ Signature string
244
+ """
245
+ query = step.query
246
+ parts = [
247
+ str(step.operation),
248
+ str(query.query_type),
249
+ str(query.entity_id or ""),
250
+ str(query.entity_type or ""),
251
+ str(query.relation_type or ""),
252
+ str(sorted(query.properties.items()) if query.properties else ""),
253
+ ]
254
+ return "|".join(parts)
255
+
256
+ def _push_down_predicates(self, steps: List[QueryStep]) -> Tuple[List[QueryStep], int]:
257
+ """
258
+ Push predicates (filters) earlier in execution
259
+
260
+ Strategy: Move property filters to the earliest possible step
261
+
262
+ Args:
263
+ steps: Query steps
264
+
265
+ Returns:
266
+ Tuple of (optimized steps, number of predicates pushed)
267
+ """
268
+ pushed_count = 0
269
+
270
+ # Find filter steps
271
+ for i, step in enumerate(steps):
272
+ if not step.query.properties:
273
+ continue
274
+
275
+ # Check if we can push this filter to an earlier step
276
+ for j in range(i):
277
+ earlier_step = steps[j]
278
+
279
+ # Can only push to steps this one depends on
280
+ if earlier_step.step_id not in step.depends_on:
281
+ continue
282
+
283
+ # Check if filter is applicable to earlier step
284
+ if self._can_apply_filter(earlier_step, step.query.properties):
285
+ # Move filter to earlier step
286
+ earlier_step.query.properties.update(step.query.properties)
287
+ step.query.properties = {}
288
+ pushed_count += 1
289
+ logger.debug(f"Pushed filter from {step.step_id} to {earlier_step.step_id}")
290
+ break
291
+
292
+ return steps, pushed_count
293
+
294
+ def _can_apply_filter(self, step: QueryStep, properties: Dict[str, Any]) -> bool:
295
+ """
296
+ Check if a filter can be applied to a step
297
+
298
+ Args:
299
+ step: Query step
300
+ properties: Property filters
301
+
302
+ Returns:
303
+ True if filter can be applied
304
+ """
305
+ # Can apply filters to entity lookup and vector search
306
+ return step.query.query_type in [
307
+ QueryType.ENTITY_LOOKUP,
308
+ QueryType.VECTOR_SEARCH,
309
+ QueryType.TRAVERSAL,
310
+ ]
311
+
312
+ def _reorder_joins(self, steps: List[QueryStep]) -> List[QueryStep]:
313
+ """
314
+ Reorder join operations (multi-hop queries) for efficiency
315
+
316
+ Strategy: Execute most selective operations first
317
+
318
+ Args:
319
+ steps: Query steps
320
+
321
+ Returns:
322
+ Reordered steps
323
+ """
324
+ # Group steps by dependency level
325
+ levels = self._get_dependency_levels(steps)
326
+
327
+ reordered = []
328
+ for level_steps in levels:
329
+ # Sort by selectivity (most selective first)
330
+ sorted_level = sorted(level_steps, key=lambda s: self._estimate_selectivity(s))
331
+ reordered.extend(sorted_level)
332
+
333
+ return reordered
334
+
335
+ def _estimate_selectivity(self, step: QueryStep) -> float:
336
+ """
337
+ Estimate selectivity of a query step (fraction of results returned)
338
+
339
+ Lower selectivity = fewer results = should execute first
340
+
341
+ Args:
342
+ step: Query step
343
+
344
+ Returns:
345
+ Selectivity estimate (0.0-1.0)
346
+ """
347
+ query = step.query
348
+ selectivity = 1.0
349
+
350
+ # Entity type filter
351
+ if query.entity_type:
352
+ selectivity *= self.statistics.get_selectivity(query.entity_type)
353
+
354
+ # Property filters
355
+ if query.properties:
356
+ # Each property filter reduces selectivity
357
+ selectivity *= 0.5 ** len(query.properties)
358
+
359
+ # Score threshold
360
+ if query.score_threshold > 0:
361
+ selectivity *= 1.0 - query.score_threshold
362
+
363
+ # Max results limit
364
+ if query.max_results:
365
+ # Estimate based on total entity count
366
+ limit_selectivity = query.max_results / max(self.statistics.entity_count, 1)
367
+ selectivity = min(selectivity, limit_selectivity)
368
+
369
+ return selectivity
370
+
371
+ def _cost_based_reordering(self, steps: List[QueryStep]) -> List[QueryStep]:
372
+ """
373
+ Reorder steps based on estimated cost
374
+
375
+ Strategy: Execute cheaper operations first within each dependency level
376
+
377
+ Args:
378
+ steps: Query steps
379
+
380
+ Returns:
381
+ Reordered steps
382
+ """
383
+ levels = self._get_dependency_levels(steps)
384
+
385
+ reordered = []
386
+ for level_steps in levels:
387
+ # Sort by estimated cost (ascending)
388
+ sorted_level = sorted(level_steps, key=lambda s: self._estimate_step_cost(s))
389
+ reordered.extend(sorted_level)
390
+
391
+ return reordered
392
+
393
+ def _estimate_step_cost(self, step: QueryStep) -> float:
394
+ """
395
+ Estimate execution cost of a query step
396
+
397
+ Args:
398
+ step: Query step
399
+
400
+ Returns:
401
+ Estimated cost (higher = more expensive)
402
+ """
403
+ query = step.query
404
+ base_cost = step.estimated_cost
405
+
406
+ # Adjust based on query type
407
+ if query.query_type == QueryType.VECTOR_SEARCH:
408
+ # Vector search is expensive
409
+ base_cost *= 2.0
410
+ elif query.query_type == QueryType.PATH_FINDING:
411
+ # Path finding is very expensive
412
+ base_cost *= 3.0
413
+ elif query.query_type == QueryType.TRAVERSAL:
414
+ # Traversal cost depends on depth
415
+ base_cost *= 1.0 + query.max_depth * 0.5
416
+
417
+ # Adjust based on expected result size
418
+ selectivity = self._estimate_selectivity(step)
419
+ expected_results = selectivity * self.statistics.entity_count
420
+
421
+ # More results = higher cost
422
+ base_cost *= 1.0 + expected_results / 1000.0
423
+
424
+ return base_cost
425
+
426
+ def _get_dependency_levels(self, steps: List[QueryStep]) -> List[List[QueryStep]]:
427
+ """
428
+ Group steps by dependency level
429
+
430
+ Args:
431
+ steps: Query steps
432
+
433
+ Returns:
434
+ List of lists, where each inner list contains steps at the same dependency level
435
+ """
436
+ levels: List[List[QueryStep]] = []
437
+ remaining = list(steps)
438
+ completed: Set[str] = set()
439
+
440
+ while remaining:
441
+ # Find steps with all dependencies satisfied
442
+ current_level = [
443
+ step for step in remaining if all(dep in completed for dep in step.depends_on)
444
+ ]
445
+
446
+ if not current_level:
447
+ # Circular dependency or error
448
+ logger.warning("Circular dependency detected in query plan")
449
+ break
450
+
451
+ levels.append(current_level)
452
+
453
+ # Mark these steps as completed
454
+ for step in current_level:
455
+ completed.add(step.step_id)
456
+ remaining.remove(step)
457
+
458
+ return levels
459
+
460
+ def _generate_explanation(
461
+ self,
462
+ original_plan: QueryPlan,
463
+ optimized_plan: QueryPlan,
464
+ rules_applied: List[str],
465
+ cost_reduction: float,
466
+ ) -> str:
467
+ """
468
+ Generate human-readable explanation of optimizations
469
+
470
+ Args:
471
+ original_plan: Original query plan
472
+ optimized_plan: Optimized query plan
473
+ rules_applied: List of rules applied
474
+ cost_reduction: Estimated cost reduction
475
+
476
+ Returns:
477
+ Explanation string
478
+ """
479
+ parts = [
480
+ f"Optimized query plan {original_plan.plan_id}:",
481
+ f"- Original cost: {original_plan.total_estimated_cost:.3f}",
482
+ f"- Optimized cost: {optimized_plan.total_estimated_cost:.3f}",
483
+ f"- Cost reduction: {cost_reduction:.1%}",
484
+ f"- Steps: {len(original_plan.steps)} -> {len(optimized_plan.steps)}",
485
+ ]
486
+
487
+ if rules_applied:
488
+ parts.append(f"- Rules applied: {', '.join(rules_applied)}")
489
+
490
+ return "\n".join(parts)
491
+
492
+ def update_statistics(self, statistics: QueryStatistics) -> None:
493
+ """
494
+ Update query statistics
495
+
496
+ Args:
497
+ statistics: New query statistics
498
+ """
499
+ self.statistics = statistics
500
+ logger.info(
501
+ f"Updated query statistics: {statistics.entity_count} entities, {statistics.relation_count} relations"
502
+ )
503
+
504
+ def get_optimization_count(self) -> int:
505
+ """Get number of optimizations performed"""
506
+ return self._optimization_count
507
+
508
+ def __repr__(self) -> str:
509
+ return f"QueryOptimizer(rules={len(self.enable_rules)}, optimizations={self._optimization_count})"
510
+
511
+
512
+ class QueryStatisticsCollector:
513
+ """
514
+ Collects query execution statistics for cost estimation
515
+
516
+ Tracks:
517
+ - Entity and relation counts
518
+ - Entity/relation type distributions
519
+ - Average node degree
520
+ - Query execution times
521
+
522
+ Example:
523
+ ```python
524
+ collector = QueryStatisticsCollector()
525
+
526
+ # Collect from graph store
527
+ stats = collector.collect_from_graph_store(graph_store)
528
+
529
+ # Use for optimization
530
+ optimizer = QueryOptimizer(statistics=stats)
531
+ ```
532
+ """
533
+
534
+ def __init__(self):
535
+ """Initialize statistics collector"""
536
+ self._execution_times: List[float] = []
537
+
538
+ def collect_from_graph_store(self, graph_store) -> QueryStatistics:
539
+ """
540
+ Collect statistics from a graph store
541
+
542
+ Args:
543
+ graph_store: Graph store instance
544
+
545
+ Returns:
546
+ Query statistics
547
+ """
548
+ from aiecs.domain.knowledge_graph.graph_store import GraphStore
549
+
550
+ if not isinstance(graph_store, GraphStore):
551
+ logger.warning("Invalid graph store type")
552
+ return QueryStatistics()
553
+
554
+ # Count entities and relations
555
+ entity_count = len(graph_store.entities)
556
+ relation_count = len(graph_store.relations)
557
+
558
+ # Count by type
559
+ entity_type_counts: Dict[str, int] = {}
560
+ for entity in graph_store.entities.values():
561
+ entity_type = entity.entity_type
562
+ entity_type_counts[entity_type] = entity_type_counts.get(entity_type, 0) + 1
563
+
564
+ relation_type_counts: Dict[str, int] = {}
565
+ for relation in graph_store.relations.values():
566
+ relation_type = relation.relation_type
567
+ relation_type_counts[relation_type] = relation_type_counts.get(relation_type, 0) + 1
568
+
569
+ # Calculate average degree
570
+ degree_sum = 0
571
+ for entity_id in graph_store.entities:
572
+ outgoing = len(graph_store.get_outgoing_relations(entity_id))
573
+ incoming = len(graph_store.get_incoming_relations(entity_id))
574
+ degree_sum += outgoing + incoming
575
+
576
+ avg_degree = degree_sum / max(entity_count, 1)
577
+
578
+ stats = QueryStatistics(
579
+ entity_count=entity_count,
580
+ relation_count=relation_count,
581
+ avg_degree=avg_degree,
582
+ entity_type_counts=entity_type_counts,
583
+ relation_type_counts=relation_type_counts,
584
+ )
585
+
586
+ logger.info(
587
+ f"Collected statistics: {entity_count} entities, {relation_count} relations, avg degree {avg_degree:.1f}"
588
+ )
589
+
590
+ return stats
591
+
592
+ def record_execution_time(self, execution_time_ms: float) -> None:
593
+ """
594
+ Record query execution time
595
+
596
+ Args:
597
+ execution_time_ms: Execution time in milliseconds
598
+ """
599
+ self._execution_times.append(execution_time_ms)
600
+
601
+ # Keep only last 1000 executions
602
+ if len(self._execution_times) > 1000:
603
+ self._execution_times = self._execution_times[-1000:]
604
+
605
+ def get_average_execution_time(self) -> float:
606
+ """
607
+ Get average query execution time
608
+
609
+ Returns:
610
+ Average execution time in milliseconds
611
+ """
612
+ if not self._execution_times:
613
+ return 0.0
614
+ return sum(self._execution_times) / len(self._execution_times)
615
+
616
+ def get_execution_percentile(self, percentile: float) -> float:
617
+ """
618
+ Get execution time percentile
619
+
620
+ Args:
621
+ percentile: Percentile (0.0-1.0)
622
+
623
+ Returns:
624
+ Execution time at percentile
625
+ """
626
+ if not self._execution_times:
627
+ return 0.0
628
+
629
+ sorted_times = sorted(self._execution_times)
630
+ index = int(len(sorted_times) * percentile)
631
+ return sorted_times[min(index, len(sorted_times) - 1)]
632
+
633
+ def reset(self) -> None:
634
+ """Reset collected statistics"""
635
+ self._execution_times = []