PyPI - aiecs - Versions diffs - 1.5.1__py3-none-any.whl - Mend

aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (302) hide show

aiecs/__init__.py +72 -0
aiecs/__main__.py +41 -0
aiecs/aiecs_client.py +469 -0
aiecs/application/__init__.py +10 -0
aiecs/application/executors/__init__.py +10 -0
aiecs/application/executors/operation_executor.py +363 -0
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +100 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
aiecs/application/knowledge_graph/search/reranker.py +295 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +484 -0
aiecs/config/__init__.py +16 -0
aiecs/config/config.py +498 -0
aiecs/config/graph_config.py +137 -0
aiecs/config/registry.py +23 -0
aiecs/core/__init__.py +46 -0
aiecs/core/interface/__init__.py +34 -0
aiecs/core/interface/execution_interface.py +152 -0
aiecs/core/interface/storage_interface.py +171 -0
aiecs/domain/__init__.py +289 -0
aiecs/domain/agent/__init__.py +189 -0
aiecs/domain/agent/base_agent.py +697 -0
aiecs/domain/agent/exceptions.py +103 -0
aiecs/domain/agent/graph_aware_mixin.py +559 -0
aiecs/domain/agent/hybrid_agent.py +490 -0
aiecs/domain/agent/integration/__init__.py +26 -0
aiecs/domain/agent/integration/context_compressor.py +222 -0
aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
aiecs/domain/agent/integration/retry_policy.py +219 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +646 -0
aiecs/domain/agent/lifecycle.py +296 -0
aiecs/domain/agent/llm_agent.py +300 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +197 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +160 -0
aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
aiecs/domain/agent/models.py +317 -0
aiecs/domain/agent/observability.py +407 -0
aiecs/domain/agent/persistence.py +289 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +161 -0
aiecs/domain/agent/prompts/formatters.py +189 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +260 -0
aiecs/domain/agent/tool_agent.py +257 -0
aiecs/domain/agent/tools/__init__.py +12 -0
aiecs/domain/agent/tools/schema_generator.py +221 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +477 -0
aiecs/domain/community/analytics.py +481 -0
aiecs/domain/community/collaborative_workflow.py +642 -0
aiecs/domain/community/communication_hub.py +645 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +800 -0
aiecs/domain/community/community_manager.py +813 -0
aiecs/domain/community/decision_engine.py +879 -0
aiecs/domain/community/exceptions.py +225 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +268 -0
aiecs/domain/community/resource_manager.py +457 -0
aiecs/domain/community/shared_context_manager.py +603 -0
aiecs/domain/context/__init__.py +58 -0
aiecs/domain/context/context_engine.py +989 -0
aiecs/domain/context/conversation_models.py +354 -0
aiecs/domain/context/graph_memory.py +467 -0
aiecs/domain/execution/__init__.py +12 -0
aiecs/domain/execution/model.py +57 -0
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +130 -0
aiecs/domain/knowledge_graph/models/evidence.py +194 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
aiecs/domain/knowledge_graph/models/path.py +179 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
aiecs/domain/knowledge_graph/models/query.py +272 -0
aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
aiecs/domain/knowledge_graph/models/relation.py +136 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
aiecs/domain/task/__init__.py +13 -0
aiecs/domain/task/dsl_processor.py +613 -0
aiecs/domain/task/model.py +62 -0
aiecs/domain/task/task_context.py +268 -0
aiecs/infrastructure/__init__.py +24 -0
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +601 -0
aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
aiecs/infrastructure/graph_storage/cache.py +429 -0
aiecs/infrastructure/graph_storage/distributed.py +226 -0
aiecs/infrastructure/graph_storage/error_handling.py +390 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +514 -0
aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
aiecs/infrastructure/graph_storage/metrics.py +357 -0
aiecs/infrastructure/graph_storage/migration.py +413 -0
aiecs/infrastructure/graph_storage/pagination.py +471 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
aiecs/infrastructure/graph_storage/postgres.py +871 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +623 -0
aiecs/infrastructure/graph_storage/streaming.py +495 -0
aiecs/infrastructure/messaging/__init__.py +13 -0
aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
aiecs/infrastructure/messaging/websocket_manager.py +298 -0
aiecs/infrastructure/monitoring/__init__.py +34 -0
aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
aiecs/infrastructure/monitoring/structured_logger.py +48 -0
aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
aiecs/infrastructure/persistence/__init__.py +24 -0
aiecs/infrastructure/persistence/context_engine_client.py +187 -0
aiecs/infrastructure/persistence/database_manager.py +333 -0
aiecs/infrastructure/persistence/file_storage.py +754 -0
aiecs/infrastructure/persistence/redis_client.py +220 -0
aiecs/llm/__init__.py +86 -0
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/callbacks/custom_callbacks.py +264 -0
aiecs/llm/client_factory.py +420 -0
aiecs/llm/clients/__init__.py +33 -0
aiecs/llm/clients/base_client.py +193 -0
aiecs/llm/clients/googleai_client.py +181 -0
aiecs/llm/clients/openai_client.py +131 -0
aiecs/llm/clients/vertex_client.py +437 -0
aiecs/llm/clients/xai_client.py +184 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +275 -0
aiecs/llm/config/config_validator.py +236 -0
aiecs/llm/config/model_config.py +151 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +91 -0
aiecs/main.py +363 -0
aiecs/scripts/__init__.py +3 -0
aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/version_manager.py +215 -0
aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
aiecs/scripts/dependance_check/__init__.py +17 -0
aiecs/scripts/dependance_check/dependency_checker.py +938 -0
aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
aiecs/scripts/tools_develop/README.md +449 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
aiecs/scripts/tools_develop/verify_tools.py +356 -0
aiecs/tasks/__init__.py +1 -0
aiecs/tasks/worker.py +172 -0
aiecs/tools/__init__.py +299 -0
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +303 -0
aiecs/tools/apisource/providers/__init__.py +115 -0
aiecs/tools/apisource/providers/base.py +664 -0
aiecs/tools/apisource/providers/census.py +401 -0
aiecs/tools/apisource/providers/fred.py +564 -0
aiecs/tools/apisource/providers/newsapi.py +412 -0
aiecs/tools/apisource/providers/worldbank.py +357 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +375 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
aiecs/tools/apisource/tool.py +850 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +338 -0
aiecs/tools/base_tool.py +201 -0
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +599 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
aiecs/tools/docs/content_insertion_tool.py +1333 -0
aiecs/tools/docs/document_creator_tool.py +1317 -0
aiecs/tools/docs/document_layout_tool.py +1166 -0
aiecs/tools/docs/document_parser_tool.py +994 -0
aiecs/tools/docs/document_writer_tool.py +1818 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
aiecs/tools/langchain_adapter.py +542 -0
aiecs/tools/schema_generator.py +275 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +589 -0
aiecs/tools/search_tool/cache.py +260 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +216 -0
aiecs/tools/search_tool/core.py +749 -0
aiecs/tools/search_tool/deduplicator.py +123 -0
aiecs/tools/search_tool/error_handler.py +271 -0
aiecs/tools/search_tool/metrics.py +371 -0
aiecs/tools/search_tool/rate_limiter.py +178 -0
aiecs/tools/search_tool/schemas.py +277 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
aiecs/tools/statistics/data_loader_tool.py +564 -0
aiecs/tools/statistics/data_profiler_tool.py +658 -0
aiecs/tools/statistics/data_transformer_tool.py +573 -0
aiecs/tools/statistics/data_visualizer_tool.py +495 -0
aiecs/tools/statistics/model_trainer_tool.py +487 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
aiecs/tools/task_tools/__init__.py +86 -0
aiecs/tools/task_tools/chart_tool.py +732 -0
aiecs/tools/task_tools/classfire_tool.py +922 -0
aiecs/tools/task_tools/image_tool.py +447 -0
aiecs/tools/task_tools/office_tool.py +684 -0
aiecs/tools/task_tools/pandas_tool.py +635 -0
aiecs/tools/task_tools/report_tool.py +635 -0
aiecs/tools/task_tools/research_tool.py +392 -0
aiecs/tools/task_tools/scraper_tool.py +715 -0
aiecs/tools/task_tools/stats_tool.py +688 -0
aiecs/tools/temp_file_manager.py +130 -0
aiecs/tools/tool_executor/__init__.py +37 -0
aiecs/tools/tool_executor/tool_executor.py +881 -0
aiecs/utils/LLM_output_structor.py +445 -0
aiecs/utils/__init__.py +34 -0
aiecs/utils/base_callback.py +47 -0
aiecs/utils/cache_provider.py +695 -0
aiecs/utils/execution_utils.py +184 -0
aiecs/utils/logging.py +1 -0
aiecs/utils/prompt_loader.py +14 -0
aiecs/utils/token_usage_repository.py +323 -0
aiecs/ws/__init__.py +0 -0
aiecs/ws/socket_server.py +52 -0
aiecs-1.5.1.dist-info/METADATA +608 -0
aiecs-1.5.1.dist-info/RECORD +302 -0
aiecs-1.5.1.dist-info/WHEEL +5 -0
aiecs-1.5.1.dist-info/entry_points.txt +10 -0
aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
aiecs-1.5.1.dist-info/top_level.txt +1 -0

aiecs/infrastructure/graph_storage/query_optimizer.py ADDED Viewed

@@ -0,0 +1,635 @@
+"""
+Query Optimizer
+Advanced query optimization for knowledge graph queries.
+"""
+from typing import List, Dict, Any, Optional, Set, Tuple
+from dataclasses import dataclass, field
+from enum import Enum
+import logging
+from aiecs.domain.knowledge_graph.models.query_plan import QueryPlan, QueryStep
+from aiecs.domain.knowledge_graph.models.query import QueryType
+logger = logging.getLogger(__name__)
+class OptimizationRule(str, Enum):
+    """Query optimization rules"""
+    PREDICATE_PUSHDOWN = "predicate_pushdown"
+    JOIN_REORDERING = "join_reordering"
+    REDUNDANT_ELIMINATION = "redundant_elimination"
+    FILTER_EARLY = "filter_early"
+    COST_BASED = "cost_based"
+@dataclass
+class QueryStatistics:
+    """
+    Query execution statistics for cost estimation
+    Attributes:
+        entity_count: Estimated number of entities in graph
+        relation_count: Estimated number of relations in graph
+        avg_degree: Average node degree (connections per entity)
+        entity_type_counts: Count of entities per type
+        relation_type_counts: Count of relations per type
+    """
+    entity_count: int = 1000
+    relation_count: int = 5000
+    avg_degree: float = 5.0
+    entity_type_counts: Dict[str, int] = field(default_factory=dict)
+    relation_type_counts: Dict[str, int] = field(default_factory=dict)
+    def get_selectivity(self, entity_type: Optional[str] = None) -> float:
+        """
+        Estimate selectivity (fraction of entities matching filter)
+        Args:
+            entity_type: Entity type filter
+        Returns:
+            Selectivity estimate (0.0-1.0)
+        """
+        if entity_type and entity_type in self.entity_type_counts:
+            return self.entity_type_counts[entity_type] / max(self.entity_count, 1)
+        return 1.0  # No filter = all entities
+@dataclass
+class OptimizationResult:
+    """
+    Result of query optimization
+    Attributes:
+        original_plan: Original query plan
+        optimized_plan: Optimized query plan
+        rules_applied: List of optimization rules applied
+        estimated_cost_reduction: Estimated cost reduction (0.0-1.0)
+        explanation: Human-readable explanation of optimizations
+    """
+    original_plan: QueryPlan
+    optimized_plan: QueryPlan
+    rules_applied: List[str] = field(default_factory=list)
+    estimated_cost_reduction: float = 0.0
+    explanation: str = ""
+class QueryOptimizer:
+    """
+    Advanced Query Optimizer
+    Optimizes query execution plans using various optimization techniques:
+    - Predicate push-down: Move filters earlier in execution
+    - Join reordering: Reorder multi-hop queries for efficiency
+    - Redundant operation elimination: Remove duplicate operations
+    - Cost-based optimization: Choose execution order based on cost estimates
+    Example:
+        ```python
+        optimizer = QueryOptimizer(statistics=stats)
+        # Optimize a query plan
+        result = optimizer.optimize(plan)
+        print(f"Cost reduction: {result.estimated_cost_reduction:.1%}")
+        print(f"Rules applied: {result.rules_applied}")
+        ```
+    """
+    def __init__(
+        self,
+        statistics: Optional[QueryStatistics] = None,
+        enable_rules: Optional[List[OptimizationRule]] = None,
+    ):
+        """
+        Initialize query optimizer
+        Args:
+            statistics: Query statistics for cost estimation
+            enable_rules: List of optimization rules to enable (None = all)
+        """
+        self.statistics = statistics or QueryStatistics()
+        self.enable_rules = enable_rules or list(OptimizationRule)
+        self._optimization_count = 0
+    def optimize(self, plan: QueryPlan) -> OptimizationResult:
+        """
+        Optimize a query execution plan
+        Args:
+            plan: Original query plan
+        Returns:
+            Optimization result with optimized plan
+        """
+        if plan.optimized:
+            logger.debug(f"Plan {plan.plan_id} already optimized")
+            return OptimizationResult(
+                original_plan=plan,
+                optimized_plan=plan,
+                explanation="Plan already optimized",
+            )
+        original_cost = plan.total_estimated_cost
+        optimized_steps = list(plan.steps)
+        rules_applied = []
+        # Apply optimization rules in order
+        if OptimizationRule.REDUNDANT_ELIMINATION in self.enable_rules:
+            optimized_steps, eliminated = self._eliminate_redundant_operations(optimized_steps)
+            if eliminated > 0:
+                rules_applied.append(f"redundant_elimination (removed {eliminated} ops)")
+        if OptimizationRule.PREDICATE_PUSHDOWN in self.enable_rules:
+            optimized_steps, pushed = self._push_down_predicates(optimized_steps)
+            if pushed > 0:
+                rules_applied.append(f"predicate_pushdown (pushed {pushed} filters)")
+        if OptimizationRule.JOIN_REORDERING in self.enable_rules:
+            optimized_steps = self._reorder_joins(optimized_steps)
+            rules_applied.append("join_reordering")
+        if OptimizationRule.COST_BASED in self.enable_rules:
+            optimized_steps = self._cost_based_reordering(optimized_steps)
+            rules_applied.append("cost_based_reordering")
+        # Create optimized plan
+        optimized_plan = QueryPlan(
+            plan_id=plan.plan_id,
+            original_query=plan.original_query,
+            steps=optimized_steps,
+            optimized=True,
+            explanation=plan.explanation,
+            metadata=plan.metadata,
+        )
+        optimized_plan.total_estimated_cost = optimized_plan.calculate_total_cost()
+        # Calculate cost reduction
+        cost_reduction = 0.0
+        if original_cost > 0:
+            cost_reduction = (original_cost - optimized_plan.total_estimated_cost) / original_cost
+        self._optimization_count += 1
+        explanation = self._generate_explanation(
+            plan, optimized_plan, rules_applied, cost_reduction
+        )
+        return OptimizationResult(
+            original_plan=plan,
+            optimized_plan=optimized_plan,
+            rules_applied=rules_applied,
+            estimated_cost_reduction=cost_reduction,
+            explanation=explanation,
+        )
+    def _eliminate_redundant_operations(
+        self, steps: List[QueryStep]
+    ) -> Tuple[List[QueryStep], int]:
+        """
+        Eliminate redundant operations
+        Args:
+            steps: Query steps
+        Returns:
+            Tuple of (optimized steps, number of operations eliminated)
+        """
+        seen_operations: Dict[str, QueryStep] = {}
+        optimized = []
+        eliminated = 0
+        for step in steps:
+            # Create a signature for this operation
+            signature = self._get_operation_signature(step)
+            if signature in seen_operations:
+                # Redundant operation - update dependencies to point to
+                # original
+                original_step = seen_operations[signature]
+                # Update other steps that depend on this redundant step
+                for other_step in steps:
+                    if step.step_id in other_step.depends_on:
+                        # Replace dependency with original step
+                        other_step.depends_on = [
+                            (original_step.step_id if dep == step.step_id else dep)
+                            for dep in other_step.depends_on
+                        ]
+                eliminated += 1
+                logger.debug(
+                    f"Eliminated redundant operation: {step.step_id} -> {original_step.step_id}"
+                )
+            else:
+                seen_operations[signature] = step
+                optimized.append(step)
+        return optimized, eliminated
+    def _get_operation_signature(self, step: QueryStep) -> str:
+        """
+        Get a signature for an operation to detect duplicates
+        Args:
+            step: Query step
+        Returns:
+            Signature string
+        """
+        query = step.query
+        parts = [
+            str(step.operation),
+            str(query.query_type),
+            str(query.entity_id or ""),
+            str(query.entity_type or ""),
+            str(query.relation_type or ""),
+            str(sorted(query.properties.items()) if query.properties else ""),
+        ]
+        return "|".join(parts)
+    def _push_down_predicates(self, steps: List[QueryStep]) -> Tuple[List[QueryStep], int]:
+        """
+        Push predicates (filters) earlier in execution
+        Strategy: Move property filters to the earliest possible step
+        Args:
+            steps: Query steps
+        Returns:
+            Tuple of (optimized steps, number of predicates pushed)
+        """
+        pushed_count = 0
+        # Find filter steps
+        for i, step in enumerate(steps):
+            if not step.query.properties:
+                continue
+            # Check if we can push this filter to an earlier step
+            for j in range(i):
+                earlier_step = steps[j]
+                # Can only push to steps this one depends on
+                if earlier_step.step_id not in step.depends_on:
+                    continue
+                # Check if filter is applicable to earlier step
+                if self._can_apply_filter(earlier_step, step.query.properties):
+                    # Move filter to earlier step
+                    earlier_step.query.properties.update(step.query.properties)
+                    step.query.properties = {}
+                    pushed_count += 1
+                    logger.debug(f"Pushed filter from {step.step_id} to {earlier_step.step_id}")
+                    break
+        return steps, pushed_count
+    def _can_apply_filter(self, step: QueryStep, properties: Dict[str, Any]) -> bool:
+        """
+        Check if a filter can be applied to a step
+        Args:
+            step: Query step
+            properties: Property filters
+        Returns:
+            True if filter can be applied
+        """
+        # Can apply filters to entity lookup and vector search
+        return step.query.query_type in [
+            QueryType.ENTITY_LOOKUP,
+            QueryType.VECTOR_SEARCH,
+            QueryType.TRAVERSAL,
+        ]
+    def _reorder_joins(self, steps: List[QueryStep]) -> List[QueryStep]:
+        """
+        Reorder join operations (multi-hop queries) for efficiency
+        Strategy: Execute most selective operations first
+        Args:
+            steps: Query steps
+        Returns:
+            Reordered steps
+        """
+        # Group steps by dependency level
+        levels = self._get_dependency_levels(steps)
+        reordered = []
+        for level_steps in levels:
+            # Sort by selectivity (most selective first)
+            sorted_level = sorted(level_steps, key=lambda s: self._estimate_selectivity(s))
+            reordered.extend(sorted_level)
+        return reordered
+    def _estimate_selectivity(self, step: QueryStep) -> float:
+        """
+        Estimate selectivity of a query step (fraction of results returned)
+        Lower selectivity = fewer results = should execute first
+        Args:
+            step: Query step
+        Returns:
+            Selectivity estimate (0.0-1.0)
+        """
+        query = step.query
+        selectivity = 1.0
+        # Entity type filter
+        if query.entity_type:
+            selectivity *= self.statistics.get_selectivity(query.entity_type)
+        # Property filters
+        if query.properties:
+            # Each property filter reduces selectivity
+            selectivity *= 0.5 ** len(query.properties)
+        # Score threshold
+        if query.score_threshold > 0:
+            selectivity *= 1.0 - query.score_threshold
+        # Max results limit
+        if query.max_results:
+            # Estimate based on total entity count
+            limit_selectivity = query.max_results / max(self.statistics.entity_count, 1)
+            selectivity = min(selectivity, limit_selectivity)
+        return selectivity
+    def _cost_based_reordering(self, steps: List[QueryStep]) -> List[QueryStep]:
+        """
+        Reorder steps based on estimated cost
+        Strategy: Execute cheaper operations first within each dependency level
+        Args:
+            steps: Query steps
+        Returns:
+            Reordered steps
+        """
+        levels = self._get_dependency_levels(steps)
+        reordered = []
+        for level_steps in levels:
+            # Sort by estimated cost (ascending)
+            sorted_level = sorted(level_steps, key=lambda s: self._estimate_step_cost(s))
+            reordered.extend(sorted_level)
+        return reordered
+    def _estimate_step_cost(self, step: QueryStep) -> float:
+        """
+        Estimate execution cost of a query step
+        Args:
+            step: Query step
+        Returns:
+            Estimated cost (higher = more expensive)
+        """
+        query = step.query
+        base_cost = step.estimated_cost
+        # Adjust based on query type
+        if query.query_type == QueryType.VECTOR_SEARCH:
+            # Vector search is expensive
+            base_cost *= 2.0
+        elif query.query_type == QueryType.PATH_FINDING:
+            # Path finding is very expensive
+            base_cost *= 3.0
+        elif query.query_type == QueryType.TRAVERSAL:
+            # Traversal cost depends on depth
+            base_cost *= 1.0 + query.max_depth * 0.5
+        # Adjust based on expected result size
+        selectivity = self._estimate_selectivity(step)
+        expected_results = selectivity * self.statistics.entity_count
+        # More results = higher cost
+        base_cost *= 1.0 + expected_results / 1000.0
+        return base_cost
+    def _get_dependency_levels(self, steps: List[QueryStep]) -> List[List[QueryStep]]:
+        """
+        Group steps by dependency level
+        Args:
+            steps: Query steps
+        Returns:
+            List of lists, where each inner list contains steps at the same dependency level
+        """
+        levels: List[List[QueryStep]] = []
+        remaining = list(steps)
+        completed: Set[str] = set()
+        while remaining:
+            # Find steps with all dependencies satisfied
+            current_level = [
+                step for step in remaining if all(dep in completed for dep in step.depends_on)
+            ]
+            if not current_level:
+                # Circular dependency or error
+                logger.warning("Circular dependency detected in query plan")
+                break
+            levels.append(current_level)
+            # Mark these steps as completed
+            for step in current_level:
+                completed.add(step.step_id)
+                remaining.remove(step)
+        return levels
+    def _generate_explanation(
+        self,
+        original_plan: QueryPlan,
+        optimized_plan: QueryPlan,
+        rules_applied: List[str],
+        cost_reduction: float,
+    ) -> str:
+        """
+        Generate human-readable explanation of optimizations
+        Args:
+            original_plan: Original query plan
+            optimized_plan: Optimized query plan
+            rules_applied: List of rules applied
+            cost_reduction: Estimated cost reduction
+        Returns:
+            Explanation string
+        """
+        parts = [
+            f"Optimized query plan {original_plan.plan_id}:",
+            f"- Original cost: {original_plan.total_estimated_cost:.3f}",
+            f"- Optimized cost: {optimized_plan.total_estimated_cost:.3f}",
+            f"- Cost reduction: {cost_reduction:.1%}",
+            f"- Steps: {len(original_plan.steps)} -> {len(optimized_plan.steps)}",
+        ]
+        if rules_applied:
+            parts.append(f"- Rules applied: {', '.join(rules_applied)}")
+        return "\n".join(parts)
+    def update_statistics(self, statistics: QueryStatistics) -> None:
+        """
+        Update query statistics
+        Args:
+            statistics: New query statistics
+        """
+        self.statistics = statistics
+        logger.info(
+            f"Updated query statistics: {statistics.entity_count} entities, {statistics.relation_count} relations"
+        )
+    def get_optimization_count(self) -> int:
+        """Get number of optimizations performed"""
+        return self._optimization_count
+    def __repr__(self) -> str:
+        return f"QueryOptimizer(rules={len(self.enable_rules)}, optimizations={self._optimization_count})"
+class QueryStatisticsCollector:
+    """
+    Collects query execution statistics for cost estimation
+    Tracks:
+    - Entity and relation counts
+    - Entity/relation type distributions
+    - Average node degree
+    - Query execution times
+    Example:
+        ```python
+        collector = QueryStatisticsCollector()
+        # Collect from graph store
+        stats = collector.collect_from_graph_store(graph_store)
+        # Use for optimization
+        optimizer = QueryOptimizer(statistics=stats)
+        ```
+    """
+    def __init__(self):
+        """Initialize statistics collector"""
+        self._execution_times: List[float] = []
+    def collect_from_graph_store(self, graph_store) -> QueryStatistics:
+        """
+        Collect statistics from a graph store
+        Args:
+            graph_store: Graph store instance
+        Returns:
+            Query statistics
+        """
+        from aiecs.domain.knowledge_graph.graph_store import GraphStore
+        if not isinstance(graph_store, GraphStore):
+            logger.warning("Invalid graph store type")
+            return QueryStatistics()
+        # Count entities and relations
+        entity_count = len(graph_store.entities)
+        relation_count = len(graph_store.relations)
+        # Count by type
+        entity_type_counts: Dict[str, int] = {}
+        for entity in graph_store.entities.values():
+            entity_type = entity.entity_type
+            entity_type_counts[entity_type] = entity_type_counts.get(entity_type, 0) + 1
+        relation_type_counts: Dict[str, int] = {}
+        for relation in graph_store.relations.values():
+            relation_type = relation.relation_type
+            relation_type_counts[relation_type] = relation_type_counts.get(relation_type, 0) + 1
+        # Calculate average degree
+        degree_sum = 0
+        for entity_id in graph_store.entities:
+            outgoing = len(graph_store.get_outgoing_relations(entity_id))
+            incoming = len(graph_store.get_incoming_relations(entity_id))
+            degree_sum += outgoing + incoming
+        avg_degree = degree_sum / max(entity_count, 1)
+        stats = QueryStatistics(
+            entity_count=entity_count,
+            relation_count=relation_count,
+            avg_degree=avg_degree,
+            entity_type_counts=entity_type_counts,
+            relation_type_counts=relation_type_counts,
+        )
+        logger.info(
+            f"Collected statistics: {entity_count} entities, {relation_count} relations, avg degree {avg_degree:.1f}"
+        )
+        return stats
+    def record_execution_time(self, execution_time_ms: float) -> None:
+        """
+        Record query execution time
+        Args:
+            execution_time_ms: Execution time in milliseconds
+        """
+        self._execution_times.append(execution_time_ms)
+        # Keep only last 1000 executions
+        if len(self._execution_times) > 1000:
+            self._execution_times = self._execution_times[-1000:]
+    def get_average_execution_time(self) -> float:
+        """
+        Get average query execution time
+        Returns:
+            Average execution time in milliseconds
+        """
+        if not self._execution_times:
+            return 0.0
+        return sum(self._execution_times) / len(self._execution_times)
+    def get_execution_percentile(self, percentile: float) -> float:
+        """
+        Get execution time percentile
+        Args:
+            percentile: Percentile (0.0-1.0)
+        Returns:
+            Execution time at percentile
+        """
+        if not self._execution_times:
+            return 0.0
+        sorted_times = sorted(self._execution_times)
+        index = int(len(sorted_times) * percentile)
+        return sorted_times[min(index, len(sorted_times) - 1)]
+    def reset(self) -> None:
+        """Reset collected statistics"""
+        self._execution_times = []