aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,872 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Query Planner
|
|
3
|
+
|
|
4
|
+
Translates natural language queries to structured graph query plans.
|
|
5
|
+
Decomposes complex queries into executable steps and optimizes execution order.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import uuid
|
|
9
|
+
import re
|
|
10
|
+
from typing import Optional, List, Dict, Any, Set, Union
|
|
11
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.query import GraphQuery, QueryType
|
|
13
|
+
from aiecs.domain.knowledge_graph.models.query_plan import (
|
|
14
|
+
QueryPlan,
|
|
15
|
+
QueryStep,
|
|
16
|
+
QueryOperation,
|
|
17
|
+
OptimizationStrategy,
|
|
18
|
+
)
|
|
19
|
+
from aiecs.infrastructure.graph_storage.query_optimizer import (
|
|
20
|
+
QueryOptimizer,
|
|
21
|
+
QueryStatisticsCollector,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Import LogicQueryParser for DSL support
|
|
25
|
+
try:
|
|
26
|
+
from aiecs.application.knowledge_graph.reasoning.logic_parser import (
|
|
27
|
+
LogicQueryParser,
|
|
28
|
+
ParserError,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
LOGIC_PARSER_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
LOGIC_PARSER_AVAILABLE = False
|
|
34
|
+
LogicQueryParser = None
|
|
35
|
+
ParserError = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class QueryPlanner:
|
|
39
|
+
"""
|
|
40
|
+
Query Planning Engine
|
|
41
|
+
|
|
42
|
+
Translates natural language queries into structured, optimized execution plans.
|
|
43
|
+
|
|
44
|
+
Features:
|
|
45
|
+
- Natural language to graph query translation
|
|
46
|
+
- Query decomposition (complex queries → multiple steps)
|
|
47
|
+
- Query optimization (reorder operations for efficiency)
|
|
48
|
+
- Cost estimation
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
```python
|
|
52
|
+
planner = QueryPlanner(graph_store)
|
|
53
|
+
|
|
54
|
+
# Plan a complex query
|
|
55
|
+
plan = planner.plan_query(
|
|
56
|
+
"Who works at companies that Alice knows people at?"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Optimize the plan
|
|
60
|
+
optimized_plan = planner.optimize_plan(
|
|
61
|
+
plan,
|
|
62
|
+
strategy=OptimizationStrategy.MINIMIZE_COST
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
graph_store: GraphStore,
|
|
70
|
+
enable_advanced_optimization: bool = True,
|
|
71
|
+
schema: Optional[Any] = None,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Initialize query planner
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
graph_store: Graph storage backend for queries
|
|
78
|
+
enable_advanced_optimization: Enable advanced query optimization (default: True)
|
|
79
|
+
schema: Optional schema manager for logic query validation
|
|
80
|
+
"""
|
|
81
|
+
self.graph_store = graph_store
|
|
82
|
+
self.schema = schema
|
|
83
|
+
|
|
84
|
+
# Pattern templates for query understanding
|
|
85
|
+
self.query_patterns = self._initialize_query_patterns()
|
|
86
|
+
|
|
87
|
+
# Advanced query optimizer
|
|
88
|
+
self._enable_advanced_optimization = enable_advanced_optimization
|
|
89
|
+
if enable_advanced_optimization:
|
|
90
|
+
# Collect statistics from graph store
|
|
91
|
+
collector = QueryStatisticsCollector()
|
|
92
|
+
statistics = collector.collect_from_graph_store(graph_store)
|
|
93
|
+
|
|
94
|
+
# Initialize optimizer
|
|
95
|
+
self._optimizer = QueryOptimizer(statistics=statistics)
|
|
96
|
+
self._statistics_collector = collector
|
|
97
|
+
else:
|
|
98
|
+
self._optimizer = None
|
|
99
|
+
|
|
100
|
+
# Logic query parser (if available)
|
|
101
|
+
if LOGIC_PARSER_AVAILABLE and schema is not None:
|
|
102
|
+
self._logic_parser = LogicQueryParser(schema=schema)
|
|
103
|
+
else:
|
|
104
|
+
self._logic_parser = None
|
|
105
|
+
self._statistics_collector = None
|
|
106
|
+
|
|
107
|
+
def _initialize_query_patterns(self) -> List[Dict[str, Any]]:
|
|
108
|
+
"""Initialize query pattern matchers"""
|
|
109
|
+
return [
|
|
110
|
+
{
|
|
111
|
+
"pattern": r"find (.*?) with (.*?) = (['\"]?.+?['\"]?)",
|
|
112
|
+
"type": "entity_lookup_by_property",
|
|
113
|
+
"operations": ["filter"],
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"pattern": r"who (works at|is employed by) (.*?)",
|
|
117
|
+
"type": "relation_traversal",
|
|
118
|
+
"operations": ["entity_lookup", "traversal"],
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
"pattern": r"what (companies|organizations) does (.*?) know people at",
|
|
122
|
+
"type": "multi_hop_query",
|
|
123
|
+
"operations": ["entity_lookup", "traversal", "traversal"],
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"pattern": r"(similar|related) to (.*?)",
|
|
127
|
+
"type": "vector_search",
|
|
128
|
+
"operations": ["vector_search"],
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"pattern": r"path from (.*?) to (.*?)",
|
|
132
|
+
"type": "path_finding",
|
|
133
|
+
"operations": ["path_finding"],
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"pattern": r"neighbors of (.*?)",
|
|
137
|
+
"type": "neighbor_query",
|
|
138
|
+
"operations": ["entity_lookup", "traversal"],
|
|
139
|
+
},
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
def plan_query(
|
|
143
|
+
self,
|
|
144
|
+
natural_language_query: str,
|
|
145
|
+
context: Optional[Dict[str, Any]] = None,
|
|
146
|
+
) -> QueryPlan:
|
|
147
|
+
"""
|
|
148
|
+
Create an execution plan from natural language query
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
natural_language_query: Natural language query string
|
|
152
|
+
context: Optional context (e.g., embeddings, entity IDs)
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Query execution plan
|
|
156
|
+
|
|
157
|
+
Example:
|
|
158
|
+
```python
|
|
159
|
+
plan = planner.plan_query(
|
|
160
|
+
"Find papers similar to 'Deep Learning' and their authors"
|
|
161
|
+
)
|
|
162
|
+
```
|
|
163
|
+
"""
|
|
164
|
+
context = context or {}
|
|
165
|
+
plan_id = f"plan_{uuid.uuid4().hex[:8]}"
|
|
166
|
+
|
|
167
|
+
# Analyze query structure
|
|
168
|
+
query_info = self._analyze_query(natural_language_query)
|
|
169
|
+
|
|
170
|
+
# Decompose into steps
|
|
171
|
+
steps = self._decompose_query(natural_language_query, query_info, context)
|
|
172
|
+
|
|
173
|
+
# Create plan
|
|
174
|
+
plan = QueryPlan(
|
|
175
|
+
plan_id=plan_id,
|
|
176
|
+
original_query=natural_language_query,
|
|
177
|
+
steps=steps,
|
|
178
|
+
explanation=self._generate_explanation(steps),
|
|
179
|
+
metadata={"query_info": query_info},
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Calculate total cost
|
|
183
|
+
plan.total_estimated_cost = plan.calculate_total_cost()
|
|
184
|
+
|
|
185
|
+
return plan
|
|
186
|
+
|
|
187
|
+
def _analyze_query(self, query: str) -> Dict[str, Any]:
|
|
188
|
+
"""
|
|
189
|
+
Analyze query to determine type and complexity
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
query: Natural language query
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Query analysis information
|
|
196
|
+
"""
|
|
197
|
+
query_lower = query.lower()
|
|
198
|
+
|
|
199
|
+
# Match against known patterns
|
|
200
|
+
matched_pattern = None
|
|
201
|
+
for pattern_info in self.query_patterns:
|
|
202
|
+
if re.search(pattern_info["pattern"], query_lower):
|
|
203
|
+
matched_pattern = pattern_info
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
# Determine complexity
|
|
207
|
+
is_multi_hop = any(
|
|
208
|
+
keyword in query_lower
|
|
209
|
+
for keyword in [
|
|
210
|
+
"who works at",
|
|
211
|
+
"people at",
|
|
212
|
+
"friends of",
|
|
213
|
+
"colleagues",
|
|
214
|
+
"through",
|
|
215
|
+
"connected to",
|
|
216
|
+
"related through",
|
|
217
|
+
]
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
has_vector_search = any(
|
|
221
|
+
keyword in query_lower for keyword in ["similar", "related", "like", "semantically"]
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
has_path_finding = any(
|
|
225
|
+
keyword in query_lower for keyword in ["path", "route", "connection", "how to get"]
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"matched_pattern": matched_pattern,
|
|
230
|
+
"is_multi_hop": is_multi_hop,
|
|
231
|
+
"has_vector_search": has_vector_search,
|
|
232
|
+
"has_path_finding": has_path_finding,
|
|
233
|
+
"complexity": self._estimate_complexity(query_lower),
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
def _estimate_complexity(self, query: str) -> str:
|
|
237
|
+
"""Estimate query complexity"""
|
|
238
|
+
hop_indicators = query.count("who") + query.count("what") + query.count("which")
|
|
239
|
+
|
|
240
|
+
if hop_indicators > 2 or "through" in query:
|
|
241
|
+
return "high"
|
|
242
|
+
elif hop_indicators > 0 or any(k in query for k in ["find", "get", "show"]):
|
|
243
|
+
return "medium"
|
|
244
|
+
else:
|
|
245
|
+
return "low"
|
|
246
|
+
|
|
247
|
+
def _decompose_query(
|
|
248
|
+
self, query: str, query_info: Dict[str, Any], context: Dict[str, Any]
|
|
249
|
+
) -> List[QueryStep]:
|
|
250
|
+
"""
|
|
251
|
+
Decompose query into executable steps
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
query: Natural language query
|
|
255
|
+
query_info: Query analysis information
|
|
256
|
+
context: Query context
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of query steps
|
|
260
|
+
"""
|
|
261
|
+
steps = []
|
|
262
|
+
|
|
263
|
+
# Use matched pattern if available
|
|
264
|
+
if query_info["matched_pattern"]:
|
|
265
|
+
steps = self._create_steps_from_pattern(query, query_info["matched_pattern"], context)
|
|
266
|
+
else:
|
|
267
|
+
# Fall back to generic decomposition
|
|
268
|
+
steps = self._create_generic_steps(query, query_info, context)
|
|
269
|
+
|
|
270
|
+
return steps
|
|
271
|
+
|
|
272
|
+
def _create_steps_from_pattern(
|
|
273
|
+
self, query: str, pattern_info: Dict[str, Any], context: Dict[str, Any]
|
|
274
|
+
) -> List[QueryStep]:
|
|
275
|
+
"""Create steps based on matched pattern"""
|
|
276
|
+
steps = []
|
|
277
|
+
query_type = pattern_info["type"]
|
|
278
|
+
|
|
279
|
+
if query_type == "entity_lookup_by_property":
|
|
280
|
+
# Single step: filter entities by property
|
|
281
|
+
steps.append(
|
|
282
|
+
QueryStep(
|
|
283
|
+
step_id="step_1",
|
|
284
|
+
operation=QueryOperation.FILTER,
|
|
285
|
+
query=GraphQuery(
|
|
286
|
+
query_type=QueryType.CUSTOM,
|
|
287
|
+
properties=context.get("properties", {}),
|
|
288
|
+
max_results=context.get("max_results", 10),
|
|
289
|
+
),
|
|
290
|
+
description="Filter entities by properties",
|
|
291
|
+
estimated_cost=0.3,
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
elif query_type == "relation_traversal":
|
|
296
|
+
# Two steps: lookup entity, traverse relations
|
|
297
|
+
steps.append(
|
|
298
|
+
QueryStep(
|
|
299
|
+
step_id="step_1",
|
|
300
|
+
operation=QueryOperation.ENTITY_LOOKUP,
|
|
301
|
+
query=GraphQuery(
|
|
302
|
+
query_type=QueryType.ENTITY_LOOKUP,
|
|
303
|
+
entity_id=context.get("entity_id"),
|
|
304
|
+
max_results=1,
|
|
305
|
+
),
|
|
306
|
+
description="Look up starting entity",
|
|
307
|
+
estimated_cost=0.2,
|
|
308
|
+
)
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
steps.append(
|
|
312
|
+
QueryStep(
|
|
313
|
+
step_id="step_2",
|
|
314
|
+
operation=QueryOperation.TRAVERSAL,
|
|
315
|
+
query=GraphQuery(
|
|
316
|
+
query_type=QueryType.TRAVERSAL,
|
|
317
|
+
relation_type=context.get("relation_type"),
|
|
318
|
+
max_depth=context.get("max_depth", 1),
|
|
319
|
+
max_results=context.get("max_results", 10),
|
|
320
|
+
),
|
|
321
|
+
depends_on=["step_1"],
|
|
322
|
+
description="Traverse relations from starting entity",
|
|
323
|
+
estimated_cost=0.5,
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
elif query_type == "multi_hop_query":
|
|
328
|
+
# Multiple hops
|
|
329
|
+
steps = self._create_multi_hop_steps(query, context)
|
|
330
|
+
|
|
331
|
+
elif query_type == "vector_search":
|
|
332
|
+
# Single step: vector similarity search
|
|
333
|
+
steps.append(
|
|
334
|
+
QueryStep(
|
|
335
|
+
step_id="step_1",
|
|
336
|
+
operation=QueryOperation.VECTOR_SEARCH,
|
|
337
|
+
query=GraphQuery(
|
|
338
|
+
query_type=QueryType.VECTOR_SEARCH,
|
|
339
|
+
embedding=context.get("query_embedding"),
|
|
340
|
+
entity_type=context.get("entity_type"),
|
|
341
|
+
max_results=context.get("max_results", 10),
|
|
342
|
+
score_threshold=context.get("score_threshold", 0.7),
|
|
343
|
+
),
|
|
344
|
+
description="Find semantically similar entities",
|
|
345
|
+
estimated_cost=0.4,
|
|
346
|
+
)
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
elif query_type == "path_finding":
|
|
350
|
+
# Single step: find path between entities
|
|
351
|
+
steps.append(
|
|
352
|
+
QueryStep(
|
|
353
|
+
step_id="step_1",
|
|
354
|
+
operation=QueryOperation.TRAVERSAL,
|
|
355
|
+
query=GraphQuery(
|
|
356
|
+
query_type=QueryType.PATH_FINDING,
|
|
357
|
+
source_entity_id=context.get("source_id"),
|
|
358
|
+
target_entity_id=context.get("target_id"),
|
|
359
|
+
max_depth=context.get("max_depth", 5),
|
|
360
|
+
max_results=context.get("max_results", 10),
|
|
361
|
+
),
|
|
362
|
+
description="Find paths between entities",
|
|
363
|
+
estimated_cost=0.7,
|
|
364
|
+
)
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
elif query_type == "neighbor_query":
|
|
368
|
+
# Two steps: lookup + get neighbors
|
|
369
|
+
steps.append(
|
|
370
|
+
QueryStep(
|
|
371
|
+
step_id="step_1",
|
|
372
|
+
operation=QueryOperation.ENTITY_LOOKUP,
|
|
373
|
+
query=GraphQuery(
|
|
374
|
+
query_type=QueryType.ENTITY_LOOKUP,
|
|
375
|
+
entity_id=context.get("entity_id"),
|
|
376
|
+
max_results=1,
|
|
377
|
+
),
|
|
378
|
+
description="Look up central entity",
|
|
379
|
+
estimated_cost=0.2,
|
|
380
|
+
)
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
steps.append(
|
|
384
|
+
QueryStep(
|
|
385
|
+
step_id="step_2",
|
|
386
|
+
operation=QueryOperation.TRAVERSAL,
|
|
387
|
+
query=GraphQuery(
|
|
388
|
+
query_type=QueryType.TRAVERSAL,
|
|
389
|
+
max_depth=1,
|
|
390
|
+
max_results=context.get("max_results", 20),
|
|
391
|
+
),
|
|
392
|
+
depends_on=["step_1"],
|
|
393
|
+
description="Get neighboring entities",
|
|
394
|
+
estimated_cost=0.4,
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
return steps
|
|
399
|
+
|
|
400
|
+
def _create_multi_hop_steps(self, query: str, context: Dict[str, Any]) -> List[QueryStep]:
|
|
401
|
+
"""Create steps for multi-hop query"""
|
|
402
|
+
steps = []
|
|
403
|
+
num_hops = context.get("num_hops", 2)
|
|
404
|
+
|
|
405
|
+
# Step 1: Find starting entity
|
|
406
|
+
steps.append(
|
|
407
|
+
QueryStep(
|
|
408
|
+
step_id="step_1",
|
|
409
|
+
operation=QueryOperation.ENTITY_LOOKUP,
|
|
410
|
+
query=GraphQuery(
|
|
411
|
+
query_type=QueryType.ENTITY_LOOKUP,
|
|
412
|
+
entity_id=context.get("start_entity_id"),
|
|
413
|
+
max_results=1,
|
|
414
|
+
),
|
|
415
|
+
description="Find starting entity",
|
|
416
|
+
estimated_cost=0.2,
|
|
417
|
+
)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# Create hop steps
|
|
421
|
+
for i in range(num_hops):
|
|
422
|
+
hop_num = i + 1
|
|
423
|
+
step_id = f"step_{hop_num + 1}"
|
|
424
|
+
depends_on = [f"step_{hop_num}"]
|
|
425
|
+
|
|
426
|
+
steps.append(
|
|
427
|
+
QueryStep(
|
|
428
|
+
step_id=step_id,
|
|
429
|
+
operation=QueryOperation.TRAVERSAL,
|
|
430
|
+
query=GraphQuery(
|
|
431
|
+
query_type=QueryType.TRAVERSAL,
|
|
432
|
+
relation_type=context.get(f"hop{hop_num}_relation"),
|
|
433
|
+
max_depth=1,
|
|
434
|
+
max_results=context.get("max_results", 20),
|
|
435
|
+
),
|
|
436
|
+
depends_on=depends_on,
|
|
437
|
+
description=f"Hop {hop_num}: Traverse to next level",
|
|
438
|
+
estimated_cost=0.4 + (0.1 * i), # Cost increases with depth
|
|
439
|
+
)
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return steps
|
|
443
|
+
|
|
444
|
+
def _create_generic_steps(
|
|
445
|
+
self, query: str, query_info: Dict[str, Any], context: Dict[str, Any]
|
|
446
|
+
) -> List[QueryStep]:
|
|
447
|
+
"""Create generic steps when no pattern matches"""
|
|
448
|
+
steps = []
|
|
449
|
+
|
|
450
|
+
# Priority 1: If start_entity_id is provided, use traversal
|
|
451
|
+
if context.get("start_entity_id"):
|
|
452
|
+
# Step 1: Lookup starting entity
|
|
453
|
+
steps.append(
|
|
454
|
+
QueryStep(
|
|
455
|
+
step_id="step_1",
|
|
456
|
+
operation=QueryOperation.ENTITY_LOOKUP,
|
|
457
|
+
query=GraphQuery(
|
|
458
|
+
query_type=QueryType.ENTITY_LOOKUP,
|
|
459
|
+
entity_id=context.get("start_entity_id"),
|
|
460
|
+
max_results=1,
|
|
461
|
+
),
|
|
462
|
+
description="Look up starting entity",
|
|
463
|
+
estimated_cost=0.2,
|
|
464
|
+
)
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# Step 2: Traverse from starting entity
|
|
468
|
+
target_id = context.get("target_entity_id")
|
|
469
|
+
if target_id:
|
|
470
|
+
# Path finding if target is specified
|
|
471
|
+
steps.append(
|
|
472
|
+
QueryStep(
|
|
473
|
+
step_id="step_2",
|
|
474
|
+
operation=QueryOperation.TRAVERSAL,
|
|
475
|
+
query=GraphQuery(
|
|
476
|
+
query_type=QueryType.PATH_FINDING,
|
|
477
|
+
source_entity_id=context.get("start_entity_id"),
|
|
478
|
+
target_entity_id=target_id,
|
|
479
|
+
max_depth=context.get("max_hops", 3),
|
|
480
|
+
max_results=context.get("max_results", 10),
|
|
481
|
+
),
|
|
482
|
+
depends_on=["step_1"],
|
|
483
|
+
description="Find paths from start to target entity",
|
|
484
|
+
estimated_cost=0.6,
|
|
485
|
+
)
|
|
486
|
+
)
|
|
487
|
+
else:
|
|
488
|
+
# General traversal if no target
|
|
489
|
+
steps.append(
|
|
490
|
+
QueryStep(
|
|
491
|
+
step_id="step_2",
|
|
492
|
+
operation=QueryOperation.TRAVERSAL,
|
|
493
|
+
query=GraphQuery(
|
|
494
|
+
query_type=QueryType.TRAVERSAL,
|
|
495
|
+
entity_id=context.get("start_entity_id"),
|
|
496
|
+
relation_type=(
|
|
497
|
+
context.get("relation_types", [None])[0]
|
|
498
|
+
if context.get("relation_types")
|
|
499
|
+
else None
|
|
500
|
+
),
|
|
501
|
+
max_depth=context.get("max_hops", 3),
|
|
502
|
+
max_results=context.get("max_results", 10),
|
|
503
|
+
),
|
|
504
|
+
depends_on=["step_1"],
|
|
505
|
+
description="Traverse from starting entity",
|
|
506
|
+
estimated_cost=0.5,
|
|
507
|
+
)
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
# Priority 2: If query_embedding is provided, use vector search
|
|
511
|
+
elif context.get("query_embedding"):
|
|
512
|
+
steps.append(
|
|
513
|
+
QueryStep(
|
|
514
|
+
step_id="step_1",
|
|
515
|
+
operation=QueryOperation.VECTOR_SEARCH,
|
|
516
|
+
query=GraphQuery(
|
|
517
|
+
query_type=QueryType.VECTOR_SEARCH,
|
|
518
|
+
embedding=context.get("query_embedding"),
|
|
519
|
+
entity_type=context.get("entity_type"),
|
|
520
|
+
max_results=context.get("max_results", 10),
|
|
521
|
+
score_threshold=context.get("score_threshold", 0.5),
|
|
522
|
+
),
|
|
523
|
+
description="Search for relevant entities using vector similarity",
|
|
524
|
+
estimated_cost=0.5,
|
|
525
|
+
)
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Priority 3: Default fallback - entity lookup by type if entity_type
|
|
529
|
+
# is provided
|
|
530
|
+
elif context.get("entity_type"):
|
|
531
|
+
steps.append(
|
|
532
|
+
QueryStep(
|
|
533
|
+
step_id="step_1",
|
|
534
|
+
operation=QueryOperation.FILTER,
|
|
535
|
+
query=GraphQuery(
|
|
536
|
+
query_type=QueryType.ENTITY_LOOKUP,
|
|
537
|
+
entity_type=context.get("entity_type"),
|
|
538
|
+
max_results=context.get("max_results", 10),
|
|
539
|
+
),
|
|
540
|
+
description=f"Filter entities by type: {context.get('entity_type')}",
|
|
541
|
+
estimated_cost=0.3,
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# Priority 4: Last resort - simple vector search (may not work without
|
|
546
|
+
# embeddings)
|
|
547
|
+
else:
|
|
548
|
+
steps.append(
|
|
549
|
+
QueryStep(
|
|
550
|
+
step_id="step_1",
|
|
551
|
+
operation=QueryOperation.VECTOR_SEARCH,
|
|
552
|
+
query=GraphQuery(
|
|
553
|
+
query_type=QueryType.VECTOR_SEARCH,
|
|
554
|
+
embedding=None, # Will need to be generated
|
|
555
|
+
max_results=context.get("max_results", 10),
|
|
556
|
+
score_threshold=0.5,
|
|
557
|
+
),
|
|
558
|
+
description="Search for relevant entities (fallback - may not work without embeddings)",
|
|
559
|
+
estimated_cost=0.5,
|
|
560
|
+
)
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
return steps
|
|
564
|
+
|
|
565
|
+
def _generate_explanation(self, steps: List[QueryStep]) -> str:
|
|
566
|
+
"""Generate human-readable explanation of plan"""
|
|
567
|
+
if not steps:
|
|
568
|
+
return "No steps in plan"
|
|
569
|
+
|
|
570
|
+
if len(steps) == 1:
|
|
571
|
+
return f"Single-step query: {steps[0].description}"
|
|
572
|
+
|
|
573
|
+
parts = [f"Multi-step query with {len(steps)} steps:"]
|
|
574
|
+
for i, step in enumerate(steps, 1):
|
|
575
|
+
parts.append(f"{i}. {step.description}")
|
|
576
|
+
|
|
577
|
+
return "\n".join(parts)
|
|
578
|
+
|
|
579
|
+
def optimize_plan(
|
|
580
|
+
self,
|
|
581
|
+
plan: QueryPlan,
|
|
582
|
+
strategy: OptimizationStrategy = OptimizationStrategy.BALANCED,
|
|
583
|
+
) -> QueryPlan:
|
|
584
|
+
"""
|
|
585
|
+
Optimize query execution plan
|
|
586
|
+
|
|
587
|
+
Args:
|
|
588
|
+
plan: Original query plan
|
|
589
|
+
strategy: Optimization strategy
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
Optimized query plan
|
|
593
|
+
|
|
594
|
+
Example:
|
|
595
|
+
```python
|
|
596
|
+
optimized = planner.optimize_plan(
|
|
597
|
+
plan,
|
|
598
|
+
strategy=OptimizationStrategy.MINIMIZE_COST
|
|
599
|
+
)
|
|
600
|
+
```
|
|
601
|
+
"""
|
|
602
|
+
if plan.optimized:
|
|
603
|
+
return plan # Already optimized
|
|
604
|
+
|
|
605
|
+
# Use advanced optimizer if enabled
|
|
606
|
+
if self._enable_advanced_optimization and self._optimizer:
|
|
607
|
+
result = self._optimizer.optimize(plan)
|
|
608
|
+
return result.optimized_plan
|
|
609
|
+
|
|
610
|
+
# Fall back to basic optimization
|
|
611
|
+
optimized_steps = list(plan.steps)
|
|
612
|
+
|
|
613
|
+
if strategy == OptimizationStrategy.MINIMIZE_COST:
|
|
614
|
+
optimized_steps = self._optimize_for_cost(optimized_steps)
|
|
615
|
+
elif strategy == OptimizationStrategy.MINIMIZE_LATENCY:
|
|
616
|
+
optimized_steps = self._optimize_for_latency(optimized_steps)
|
|
617
|
+
else: # BALANCED
|
|
618
|
+
optimized_steps = self._optimize_balanced(optimized_steps)
|
|
619
|
+
|
|
620
|
+
# Create optimized plan
|
|
621
|
+
optimized_plan = QueryPlan(
|
|
622
|
+
plan_id=plan.plan_id + "_opt",
|
|
623
|
+
original_query=plan.original_query,
|
|
624
|
+
steps=optimized_steps,
|
|
625
|
+
optimized=True,
|
|
626
|
+
explanation=plan.explanation + "\n(Optimized)",
|
|
627
|
+
metadata=plan.metadata,
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
optimized_plan.total_estimated_cost = optimized_plan.calculate_total_cost()
|
|
631
|
+
|
|
632
|
+
return optimized_plan
|
|
633
|
+
|
|
634
|
+
def _optimize_for_cost(self, steps: List[QueryStep]) -> List[QueryStep]:
|
|
635
|
+
"""
|
|
636
|
+
Optimize to minimize total cost
|
|
637
|
+
|
|
638
|
+
Strategy: Execute cheaper operations first when possible
|
|
639
|
+
"""
|
|
640
|
+
# Group steps by dependency level
|
|
641
|
+
levels = self._get_dependency_levels(steps)
|
|
642
|
+
|
|
643
|
+
optimized = []
|
|
644
|
+
for level_steps in levels:
|
|
645
|
+
# Sort by cost (ascending) within each level
|
|
646
|
+
sorted_level = sorted(level_steps, key=lambda s: s.estimated_cost)
|
|
647
|
+
optimized.extend(sorted_level)
|
|
648
|
+
|
|
649
|
+
return optimized
|
|
650
|
+
|
|
651
|
+
def _optimize_for_latency(self, steps: List[QueryStep]) -> List[QueryStep]:
|
|
652
|
+
"""
|
|
653
|
+
Optimize to minimize latency
|
|
654
|
+
|
|
655
|
+
Strategy: Maximize parallelization
|
|
656
|
+
"""
|
|
657
|
+
# Already maximized in get_execution_order()
|
|
658
|
+
# Just return original order
|
|
659
|
+
return steps
|
|
660
|
+
|
|
661
|
+
def _optimize_balanced(self, steps: List[QueryStep]) -> List[QueryStep]:
|
|
662
|
+
"""
|
|
663
|
+
Balanced optimization
|
|
664
|
+
|
|
665
|
+
Strategy: Balance cost and latency
|
|
666
|
+
"""
|
|
667
|
+
levels = self._get_dependency_levels(steps)
|
|
668
|
+
|
|
669
|
+
optimized = []
|
|
670
|
+
for level_steps in levels:
|
|
671
|
+
# Sort by cost but not too aggressively
|
|
672
|
+
# Keep expensive operations that can run in parallel
|
|
673
|
+
sorted_level = sorted(
|
|
674
|
+
level_steps,
|
|
675
|
+
key=lambda s: (s.estimated_cost > 0.7, s.estimated_cost),
|
|
676
|
+
)
|
|
677
|
+
optimized.extend(sorted_level)
|
|
678
|
+
|
|
679
|
+
return optimized
|
|
680
|
+
|
|
681
|
+
def _get_dependency_levels(self, steps: List[QueryStep]) -> List[List[QueryStep]]:
|
|
682
|
+
"""
|
|
683
|
+
Group steps by dependency level
|
|
684
|
+
|
|
685
|
+
Returns:
|
|
686
|
+
List of lists, each containing steps at the same dependency level
|
|
687
|
+
"""
|
|
688
|
+
# step_map = {step.step_id: step for step in steps} # Reserved for
|
|
689
|
+
# future use
|
|
690
|
+
levels: List[List[QueryStep]] = []
|
|
691
|
+
processed: Set[str] = set()
|
|
692
|
+
|
|
693
|
+
while len(processed) < len(steps):
|
|
694
|
+
current_level = []
|
|
695
|
+
for step in steps:
|
|
696
|
+
if step.step_id in processed:
|
|
697
|
+
continue
|
|
698
|
+
# Check if all dependencies are processed
|
|
699
|
+
if all(dep in processed for dep in step.depends_on):
|
|
700
|
+
current_level.append(step)
|
|
701
|
+
|
|
702
|
+
if not current_level:
|
|
703
|
+
break # Should not happen with valid dependencies
|
|
704
|
+
|
|
705
|
+
levels.append(current_level)
|
|
706
|
+
processed.update(step.step_id for step in current_level)
|
|
707
|
+
|
|
708
|
+
return levels
|
|
709
|
+
|
|
710
|
+
def translate_to_graph_query(
|
|
711
|
+
self,
|
|
712
|
+
natural_language_query: str,
|
|
713
|
+
context: Optional[Dict[str, Any]] = None,
|
|
714
|
+
) -> GraphQuery:
|
|
715
|
+
"""
|
|
716
|
+
Translate natural language to a single graph query
|
|
717
|
+
|
|
718
|
+
For simple queries that don't need decomposition.
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
natural_language_query: Natural language query
|
|
722
|
+
context: Query context (embeddings, entity IDs, etc.)
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
Single graph query
|
|
726
|
+
|
|
727
|
+
Example:
|
|
728
|
+
```python
|
|
729
|
+
query = planner.translate_to_graph_query(
|
|
730
|
+
"Find entities similar to X",
|
|
731
|
+
context={"query_embedding": [0.1, 0.2, ...]}
|
|
732
|
+
)
|
|
733
|
+
```
|
|
734
|
+
"""
|
|
735
|
+
context = context or {}
|
|
736
|
+
query_lower = natural_language_query.lower()
|
|
737
|
+
|
|
738
|
+
# Determine query type
|
|
739
|
+
if "similar" in query_lower or "related" in query_lower:
|
|
740
|
+
return GraphQuery(
|
|
741
|
+
query_type=QueryType.VECTOR_SEARCH,
|
|
742
|
+
embedding=context.get("query_embedding"),
|
|
743
|
+
entity_type=context.get("entity_type"),
|
|
744
|
+
max_results=context.get("max_results", 10),
|
|
745
|
+
score_threshold=context.get("score_threshold", 0.7),
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
elif "path" in query_lower:
|
|
749
|
+
return GraphQuery(
|
|
750
|
+
query_type=QueryType.PATH_FINDING,
|
|
751
|
+
source_entity_id=context.get("source_id"),
|
|
752
|
+
target_entity_id=context.get("target_id"),
|
|
753
|
+
max_depth=context.get("max_depth", 5),
|
|
754
|
+
max_results=context.get("max_results", 10),
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
elif "neighbor" in query_lower or "connected to" in query_lower:
|
|
758
|
+
return GraphQuery(
|
|
759
|
+
query_type=QueryType.TRAVERSAL,
|
|
760
|
+
entity_id=context.get("entity_id"),
|
|
761
|
+
relation_type=context.get("relation_type"),
|
|
762
|
+
max_depth=1,
|
|
763
|
+
max_results=context.get("max_results", 20),
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
else:
|
|
767
|
+
# Default to entity lookup
|
|
768
|
+
return GraphQuery(
|
|
769
|
+
query_type=QueryType.ENTITY_LOOKUP,
|
|
770
|
+
entity_id=context.get("entity_id"),
|
|
771
|
+
entity_type=context.get("entity_type"),
|
|
772
|
+
properties=context.get("properties", {}),
|
|
773
|
+
max_results=context.get("max_results", 10),
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# Advanced Optimization Methods
|
|
777
|
+
|
|
778
|
+
def update_statistics(self) -> None:
|
|
779
|
+
"""
|
|
780
|
+
Update query statistics from graph store
|
|
781
|
+
|
|
782
|
+
Call this periodically to keep optimizer statistics up-to-date
|
|
783
|
+
"""
|
|
784
|
+
if self._enable_advanced_optimization and self._statistics_collector and self._optimizer:
|
|
785
|
+
statistics = self._statistics_collector.collect_from_graph_store(self.graph_store)
|
|
786
|
+
self._optimizer.update_statistics(statistics)
|
|
787
|
+
|
|
788
|
+
def record_execution_time(self, execution_time_ms: float) -> None:
|
|
789
|
+
"""
|
|
790
|
+
Record query execution time for statistics
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
execution_time_ms: Execution time in milliseconds
|
|
794
|
+
"""
|
|
795
|
+
if self._statistics_collector:
|
|
796
|
+
self._statistics_collector.record_execution_time(execution_time_ms)
|
|
797
|
+
|
|
798
|
+
def get_optimizer_stats(self) -> Dict[str, Any]:
|
|
799
|
+
"""
|
|
800
|
+
Get optimizer statistics
|
|
801
|
+
|
|
802
|
+
Returns:
|
|
803
|
+
Dictionary with optimizer statistics
|
|
804
|
+
"""
|
|
805
|
+
if not self._enable_advanced_optimization or not self._optimizer:
|
|
806
|
+
return {"enabled": False}
|
|
807
|
+
|
|
808
|
+
return {
|
|
809
|
+
"enabled": True,
|
|
810
|
+
"optimizations_performed": self._optimizer.get_optimization_count(),
|
|
811
|
+
"avg_execution_time_ms": (
|
|
812
|
+
self._statistics_collector.get_average_execution_time()
|
|
813
|
+
if self._statistics_collector
|
|
814
|
+
else 0.0
|
|
815
|
+
),
|
|
816
|
+
"p95_execution_time_ms": (
|
|
817
|
+
self._statistics_collector.get_execution_percentile(0.95)
|
|
818
|
+
if self._statistics_collector
|
|
819
|
+
else 0.0
|
|
820
|
+
),
|
|
821
|
+
"entity_count": self._optimizer.statistics.entity_count,
|
|
822
|
+
"relation_count": self._optimizer.statistics.relation_count,
|
|
823
|
+
"avg_degree": self._optimizer.statistics.avg_degree,
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
# ========================================================================
|
|
827
|
+
# Logic Query Support
|
|
828
|
+
# ========================================================================
|
|
829
|
+
|
|
830
|
+
def plan_logic_query(self, logic_query: str) -> Union[QueryPlan, List[Any]]:
|
|
831
|
+
"""
|
|
832
|
+
Create execution plan from logic query DSL
|
|
833
|
+
|
|
834
|
+
This method parses a logic query (e.g., "Find(Person) WHERE age > 30")
|
|
835
|
+
and converts it directly to a QueryPlan.
|
|
836
|
+
|
|
837
|
+
Args:
|
|
838
|
+
logic_query: Logic query string in DSL format
|
|
839
|
+
|
|
840
|
+
Returns:
|
|
841
|
+
QueryPlan if successful, List[ParserError] if errors occurred
|
|
842
|
+
|
|
843
|
+
Example:
|
|
844
|
+
```python
|
|
845
|
+
plan = planner.plan_logic_query("Find(Person) WHERE age > 30")
|
|
846
|
+
|
|
847
|
+
if isinstance(plan, list):
|
|
848
|
+
# Parsing errors
|
|
849
|
+
for error in plan:
|
|
850
|
+
print(f"Error at line {error.line}: {error.message}")
|
|
851
|
+
else:
|
|
852
|
+
# Success - execute the plan
|
|
853
|
+
result = await graph_store.execute_plan(plan)
|
|
854
|
+
```
|
|
855
|
+
"""
|
|
856
|
+
if not LOGIC_PARSER_AVAILABLE:
|
|
857
|
+
raise ImportError("Logic parser not available. Install lark-parser.")
|
|
858
|
+
|
|
859
|
+
if self._logic_parser is None:
|
|
860
|
+
raise ValueError("Logic parser not initialized. Provide schema to QueryPlanner.")
|
|
861
|
+
|
|
862
|
+
# Parse logic query to QueryPlan
|
|
863
|
+
return self._logic_parser.parse_to_query_plan(logic_query)
|
|
864
|
+
|
|
865
|
+
def supports_logic_queries(self) -> bool:
|
|
866
|
+
"""
|
|
867
|
+
Check if logic query support is available
|
|
868
|
+
|
|
869
|
+
Returns:
|
|
870
|
+
True if logic queries are supported, False otherwise
|
|
871
|
+
"""
|
|
872
|
+
return LOGIC_PARSER_AVAILABLE and self._logic_parser is not None
|