aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,923 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Graph Search Tool
|
|
3
|
+
|
|
4
|
+
AIECS tool for searching knowledge graphs with multiple search modes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Any, List, Optional
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
from enum import Enum
|
|
10
|
+
|
|
11
|
+
from aiecs.tools.base_tool import BaseTool
|
|
12
|
+
from aiecs.tools import register_tool
|
|
13
|
+
from aiecs.infrastructure.graph_storage.in_memory import InMemoryGraphStore
|
|
14
|
+
from aiecs.application.knowledge_graph.search.hybrid_search import (
|
|
15
|
+
HybridSearchStrategy,
|
|
16
|
+
HybridSearchConfig,
|
|
17
|
+
SearchMode,
|
|
18
|
+
)
|
|
19
|
+
from aiecs.application.knowledge_graph.retrieval.retrieval_strategies import (
|
|
20
|
+
PersonalizedPageRank,
|
|
21
|
+
MultiHopRetrieval,
|
|
22
|
+
FilteredRetrieval,
|
|
23
|
+
RetrievalCache,
|
|
24
|
+
)
|
|
25
|
+
from aiecs.application.knowledge_graph.traversal.enhanced_traversal import (
|
|
26
|
+
EnhancedTraversal,
|
|
27
|
+
)
|
|
28
|
+
from aiecs.domain.knowledge_graph.models.path_pattern import PathPattern
|
|
29
|
+
from aiecs.application.knowledge_graph.search.reranker import (
|
|
30
|
+
ResultReranker,
|
|
31
|
+
ScoreCombinationMethod,
|
|
32
|
+
)
|
|
33
|
+
from aiecs.application.knowledge_graph.search.reranker_strategies import (
|
|
34
|
+
TextSimilarityReranker,
|
|
35
|
+
SemanticReranker,
|
|
36
|
+
StructuralReranker,
|
|
37
|
+
HybridReranker,
|
|
38
|
+
)
|
|
39
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class SearchModeEnum(str, Enum):
|
|
43
|
+
"""Search mode enumeration"""
|
|
44
|
+
|
|
45
|
+
VECTOR = "vector"
|
|
46
|
+
GRAPH = "graph"
|
|
47
|
+
HYBRID = "hybrid"
|
|
48
|
+
PAGERANK = "pagerank"
|
|
49
|
+
MULTIHOP = "multihop"
|
|
50
|
+
FILTERED = "filtered"
|
|
51
|
+
TRAVERSE = "traverse"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GraphSearchInput(BaseModel):
|
|
55
|
+
"""Input schema for Graph Search Tool (legacy, for execute() method)"""
|
|
56
|
+
|
|
57
|
+
mode: SearchModeEnum = Field(
|
|
58
|
+
...,
|
|
59
|
+
description=(
|
|
60
|
+
"Search mode: 'vector' (similarity), 'graph' (structure), "
|
|
61
|
+
"'hybrid' (combined), 'pagerank' (importance), "
|
|
62
|
+
"'multihop' (neighbors), 'filtered' (by properties), "
|
|
63
|
+
"'traverse' (pattern-based)"
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
query: Optional[str] = Field(
|
|
68
|
+
None,
|
|
69
|
+
description="Natural language query (converted to embedding for vector/hybrid search)",
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
query_embedding: Optional[List[float]] = Field(
|
|
73
|
+
None, description="Query vector embedding (for vector/hybrid search)"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
seed_entity_ids: Optional[List[str]] = Field(
|
|
77
|
+
None,
|
|
78
|
+
description="Starting entity IDs (for graph/pagerank/multihop/traverse modes)",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
entity_type: Optional[str] = Field(
|
|
82
|
+
None,
|
|
83
|
+
description="Filter by entity type (e.g., 'Person', 'Company', 'Location')",
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
property_filters: Optional[Dict[str, Any]] = Field(
|
|
87
|
+
None,
|
|
88
|
+
description="Filter by properties (e.g., {'role': 'Engineer', 'level': 'Senior'})",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
relation_types: Optional[List[str]] = Field(
|
|
92
|
+
None,
|
|
93
|
+
description="Filter by relation types (e.g., ['WORKS_FOR', 'LOCATED_IN'])",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
max_results: int = Field(
|
|
97
|
+
default=10,
|
|
98
|
+
ge=1,
|
|
99
|
+
le=100,
|
|
100
|
+
description="Maximum number of results to return (1-100)",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
max_depth: int = Field(
|
|
104
|
+
default=2,
|
|
105
|
+
ge=1,
|
|
106
|
+
le=5,
|
|
107
|
+
description="Maximum traversal depth for graph/multihop/traverse modes (1-5)",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
vector_threshold: float = Field(
|
|
111
|
+
default=0.0,
|
|
112
|
+
ge=0.0,
|
|
113
|
+
le=1.0,
|
|
114
|
+
description="Minimum similarity threshold for vector search (0.0-1.0)",
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
vector_weight: float = Field(
|
|
118
|
+
default=0.6,
|
|
119
|
+
ge=0.0,
|
|
120
|
+
le=1.0,
|
|
121
|
+
description="Weight for vector similarity in hybrid mode (0.0-1.0)",
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
graph_weight: float = Field(
|
|
125
|
+
default=0.4,
|
|
126
|
+
ge=0.0,
|
|
127
|
+
le=1.0,
|
|
128
|
+
description="Weight for graph structure in hybrid mode (0.0-1.0)",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
expand_results: bool = Field(
|
|
132
|
+
default=True,
|
|
133
|
+
description="Whether to expand results with graph neighbors (hybrid mode)",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
use_cache: bool = Field(
|
|
137
|
+
default=True,
|
|
138
|
+
description="Whether to use result caching for performance",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Reranking parameters
|
|
142
|
+
enable_reranking: bool = Field(
|
|
143
|
+
default=False,
|
|
144
|
+
description="Whether to enable result reranking for improved relevance",
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
rerank_strategy: Optional[str] = Field(
|
|
148
|
+
default="text",
|
|
149
|
+
description=(
|
|
150
|
+
"Reranking strategy: 'text' (text similarity), 'semantic' (embeddings), "
|
|
151
|
+
"'structural' (graph importance), 'hybrid' (all signals)"
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
rerank_top_k: Optional[int] = Field(
|
|
156
|
+
default=None,
|
|
157
|
+
ge=1,
|
|
158
|
+
le=500,
|
|
159
|
+
description=(
|
|
160
|
+
"Top-K results to fetch before reranking (for performance). "
|
|
161
|
+
"If None, uses max_results. Should be >= max_results."
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Schemas for individual operations (used with run_async)
|
|
167
|
+
class VectorSearchSchema(BaseModel):
|
|
168
|
+
"""Schema for vector_search operation"""
|
|
169
|
+
|
|
170
|
+
query: Optional[str] = Field(None, description="Natural language query")
|
|
171
|
+
query_embedding: Optional[List[float]] = Field(None, description="Query vector embedding")
|
|
172
|
+
entity_type: Optional[str] = Field(None, description="Filter by entity type")
|
|
173
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
174
|
+
vector_threshold: float = Field(default=0.0, ge=0.0, le=1.0, description="Minimum similarity")
|
|
175
|
+
enable_reranking: bool = Field(default=False, description="Enable result reranking")
|
|
176
|
+
rerank_strategy: Optional[str] = Field(default="text", description="Reranking strategy")
|
|
177
|
+
rerank_top_k: Optional[int] = Field(default=None, description="Top-K for reranking")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class GraphSearchSchema(BaseModel):
|
|
181
|
+
"""Schema for graph_search operation"""
|
|
182
|
+
|
|
183
|
+
seed_entity_ids: List[str] = Field(..., description="Starting entity IDs")
|
|
184
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum traversal depth")
|
|
185
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class HybridSearchSchema(BaseModel):
|
|
189
|
+
"""Schema for hybrid_search operation"""
|
|
190
|
+
|
|
191
|
+
query: Optional[str] = Field(None, description="Natural language query")
|
|
192
|
+
query_embedding: Optional[List[float]] = Field(None, description="Query vector embedding")
|
|
193
|
+
seed_entity_ids: Optional[List[str]] = Field(None, description="Starting entity IDs")
|
|
194
|
+
entity_type: Optional[str] = Field(None, description="Filter by entity type")
|
|
195
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
196
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum graph depth")
|
|
197
|
+
vector_weight: float = Field(default=0.6, ge=0.0, le=1.0, description="Vector weight")
|
|
198
|
+
graph_weight: float = Field(default=0.4, ge=0.0, le=1.0, description="Graph weight")
|
|
199
|
+
expand_results: bool = Field(default=True, description="Expand with neighbors")
|
|
200
|
+
vector_threshold: float = Field(default=0.0, ge=0.0, le=1.0, description="Minimum similarity")
|
|
201
|
+
enable_reranking: bool = Field(default=False, description="Enable result reranking")
|
|
202
|
+
rerank_strategy: Optional[str] = Field(default="hybrid", description="Reranking strategy")
|
|
203
|
+
rerank_top_k: Optional[int] = Field(default=None, description="Top-K for reranking")
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class PagerankSearchSchema(BaseModel):
|
|
207
|
+
"""Schema for pagerank_search operation"""
|
|
208
|
+
|
|
209
|
+
seed_entity_ids: List[str] = Field(..., description="Starting entity IDs")
|
|
210
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class MultihopSearchSchema(BaseModel):
|
|
214
|
+
"""Schema for multihop_search operation"""
|
|
215
|
+
|
|
216
|
+
seed_entity_ids: List[str] = Field(..., description="Starting entity IDs")
|
|
217
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum hops")
|
|
218
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class FilteredSearchSchema(BaseModel):
|
|
222
|
+
"""Schema for filtered_search operation"""
|
|
223
|
+
|
|
224
|
+
entity_type: Optional[str] = Field(None, description="Filter by entity type")
|
|
225
|
+
property_filters: Optional[Dict[str, Any]] = Field(None, description="Filter by properties")
|
|
226
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class TraverseSearchSchema(BaseModel):
|
|
230
|
+
"""Schema for traverse_search operation"""
|
|
231
|
+
|
|
232
|
+
seed_entity_ids: List[str] = Field(..., description="Starting entity IDs")
|
|
233
|
+
relation_types: Optional[List[str]] = Field(None, description="Filter by relation types")
|
|
234
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum depth")
|
|
235
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results")
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@register_tool("graph_search")
|
|
239
|
+
class GraphSearchTool(BaseTool):
|
|
240
|
+
"""
|
|
241
|
+
Knowledge Graph Search Tool
|
|
242
|
+
|
|
243
|
+
Powerful search tool for querying knowledge graphs with multiple search modes:
|
|
244
|
+
|
|
245
|
+
1. **Vector Search** - Find semantically similar entities
|
|
246
|
+
2. **Graph Search** - Explore graph structure from seed entities
|
|
247
|
+
3. **Hybrid Search** - Combine vector similarity with graph structure
|
|
248
|
+
4. **PageRank** - Find important/influential entities
|
|
249
|
+
5. **Multi-Hop** - Find entities within N hops
|
|
250
|
+
6. **Filtered** - Precise filtering by properties
|
|
251
|
+
7. **Traverse** - Pattern-based graph traversal
|
|
252
|
+
|
|
253
|
+
Example Usage:
|
|
254
|
+
```python
|
|
255
|
+
# Vector search
|
|
256
|
+
results = tool.execute({
|
|
257
|
+
"mode": "vector",
|
|
258
|
+
"query": "machine learning researchers",
|
|
259
|
+
"max_results": 10
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
# Hybrid search
|
|
263
|
+
results = tool.execute({
|
|
264
|
+
"mode": "hybrid",
|
|
265
|
+
"query": "AI research",
|
|
266
|
+
"seed_entity_ids": ["person_1"],
|
|
267
|
+
"vector_weight": 0.6,
|
|
268
|
+
"graph_weight": 0.4
|
|
269
|
+
})
|
|
270
|
+
|
|
271
|
+
# PageRank
|
|
272
|
+
results = tool.execute({
|
|
273
|
+
"mode": "pagerank",
|
|
274
|
+
"seed_entity_ids": ["important_node"],
|
|
275
|
+
"max_results": 20
|
|
276
|
+
})
|
|
277
|
+
|
|
278
|
+
# Filtered search
|
|
279
|
+
results = tool.execute({
|
|
280
|
+
"mode": "filtered",
|
|
281
|
+
"entity_type": "Person",
|
|
282
|
+
"property_filters": {"role": "Engineer", "experience": "Senior"}
|
|
283
|
+
})
|
|
284
|
+
```
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
name: str = "graph_search"
|
|
288
|
+
description: str = """Search knowledge graphs with multiple powerful search modes.
|
|
289
|
+
|
|
290
|
+
This tool enables sophisticated graph querying including:
|
|
291
|
+
- Semantic similarity search (vector embeddings)
|
|
292
|
+
- Graph structure exploration
|
|
293
|
+
- Hybrid search combining both approaches
|
|
294
|
+
- Importance ranking (PageRank)
|
|
295
|
+
- Multi-hop neighbor discovery
|
|
296
|
+
- Property-based filtering
|
|
297
|
+
- Pattern-based traversal
|
|
298
|
+
|
|
299
|
+
Use this tool when you need to:
|
|
300
|
+
- Find entities similar to a query
|
|
301
|
+
- Explore relationships in a knowledge graph
|
|
302
|
+
- Find influential entities
|
|
303
|
+
- Discover connections between entities
|
|
304
|
+
- Filter entities by specific criteria
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
input_schema: type[BaseModel] = GraphSearchInput
|
|
308
|
+
|
|
309
|
+
def __init__(self):
|
|
310
|
+
super().__init__()
|
|
311
|
+
|
|
312
|
+
# Graph store (shared with KG builder)
|
|
313
|
+
self.graph_store = None
|
|
314
|
+
|
|
315
|
+
# Search strategies (using _strategy suffix to avoid shadowing public
|
|
316
|
+
# methods)
|
|
317
|
+
self.hybrid_search_strategy = None
|
|
318
|
+
self.pagerank_strategy = None
|
|
319
|
+
self.multihop_strategy = None
|
|
320
|
+
self.filtered_strategy = None
|
|
321
|
+
self.traversal_strategy = None
|
|
322
|
+
self.cache = None
|
|
323
|
+
|
|
324
|
+
self._initialized = False
|
|
325
|
+
|
|
326
|
+
async def _initialize(self):
|
|
327
|
+
"""Lazy initialization of components"""
|
|
328
|
+
if self._initialized:
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
# Initialize graph store (use in-memory for now)
|
|
332
|
+
# In production, this would be configurable
|
|
333
|
+
self.graph_store = InMemoryGraphStore()
|
|
334
|
+
await self.graph_store.initialize()
|
|
335
|
+
|
|
336
|
+
# Initialize search strategies
|
|
337
|
+
self.hybrid_search_strategy = HybridSearchStrategy(self.graph_store)
|
|
338
|
+
self.pagerank_strategy = PersonalizedPageRank(self.graph_store)
|
|
339
|
+
self.multihop_strategy = MultiHopRetrieval(self.graph_store)
|
|
340
|
+
self.filtered_strategy = FilteredRetrieval(self.graph_store)
|
|
341
|
+
self.traversal_strategy = EnhancedTraversal(self.graph_store)
|
|
342
|
+
|
|
343
|
+
# Initialize cache
|
|
344
|
+
self.cache = RetrievalCache(max_size=100, ttl=300)
|
|
345
|
+
|
|
346
|
+
# Initialize reranking strategies
|
|
347
|
+
self._rerankers = {
|
|
348
|
+
"text": TextSimilarityReranker(),
|
|
349
|
+
"semantic": SemanticReranker(),
|
|
350
|
+
"structural": StructuralReranker(self.graph_store),
|
|
351
|
+
"hybrid": HybridReranker(self.graph_store),
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
self._initialized = True
|
|
355
|
+
|
|
356
|
+
async def _execute(self, **kwargs) -> Dict[str, Any]:
|
|
357
|
+
"""
|
|
358
|
+
Execute graph search
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
**kwargs: Tool input parameters
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Dictionary with search results
|
|
365
|
+
"""
|
|
366
|
+
# Initialize components
|
|
367
|
+
await self._initialize()
|
|
368
|
+
|
|
369
|
+
# Parse input
|
|
370
|
+
mode = kwargs.get("mode")
|
|
371
|
+
query = kwargs.get("query")
|
|
372
|
+
query_embedding = kwargs.get("query_embedding")
|
|
373
|
+
seed_entity_ids = kwargs.get("seed_entity_ids")
|
|
374
|
+
entity_type = kwargs.get("entity_type")
|
|
375
|
+
property_filters = kwargs.get("property_filters")
|
|
376
|
+
relation_types = kwargs.get("relation_types")
|
|
377
|
+
max_results = kwargs.get("max_results", 10)
|
|
378
|
+
max_depth = kwargs.get("max_depth", 2)
|
|
379
|
+
vector_threshold = kwargs.get("vector_threshold", 0.0)
|
|
380
|
+
vector_weight = kwargs.get("vector_weight", 0.6)
|
|
381
|
+
graph_weight = kwargs.get("graph_weight", 0.4)
|
|
382
|
+
expand_results = kwargs.get("expand_results", True)
|
|
383
|
+
# use_cache is available in kwargs but not currently used in
|
|
384
|
+
# implementation
|
|
385
|
+
|
|
386
|
+
# Reranking parameters
|
|
387
|
+
enable_reranking = kwargs.get("enable_reranking", False)
|
|
388
|
+
rerank_strategy = kwargs.get("rerank_strategy", "text")
|
|
389
|
+
rerank_top_k = kwargs.get("rerank_top_k")
|
|
390
|
+
|
|
391
|
+
# Generate query embedding if query provided but no embedding
|
|
392
|
+
if query and not query_embedding:
|
|
393
|
+
# In production, this would use an embedding model
|
|
394
|
+
# For now, create a placeholder embedding
|
|
395
|
+
query_embedding = [0.1] * 128
|
|
396
|
+
|
|
397
|
+
try:
|
|
398
|
+
# Adjust max_results for top-K limiting (fetch more, rerank, then
|
|
399
|
+
# limit)
|
|
400
|
+
initial_max_results = max_results
|
|
401
|
+
if enable_reranking and rerank_top_k:
|
|
402
|
+
initial_max_results = max(rerank_top_k, max_results)
|
|
403
|
+
|
|
404
|
+
if mode == SearchModeEnum.VECTOR:
|
|
405
|
+
results = await self._vector_search(
|
|
406
|
+
query_embedding,
|
|
407
|
+
entity_type,
|
|
408
|
+
initial_max_results,
|
|
409
|
+
vector_threshold,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
elif mode == SearchModeEnum.GRAPH:
|
|
413
|
+
results = await self._graph_search(seed_entity_ids, max_depth, initial_max_results)
|
|
414
|
+
|
|
415
|
+
elif mode == SearchModeEnum.HYBRID:
|
|
416
|
+
results = await self._hybrid_search(
|
|
417
|
+
query_embedding,
|
|
418
|
+
seed_entity_ids,
|
|
419
|
+
entity_type,
|
|
420
|
+
initial_max_results,
|
|
421
|
+
max_depth,
|
|
422
|
+
vector_weight,
|
|
423
|
+
graph_weight,
|
|
424
|
+
expand_results,
|
|
425
|
+
vector_threshold,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
elif mode == SearchModeEnum.PAGERANK:
|
|
429
|
+
results = await self._pagerank_search(seed_entity_ids, initial_max_results)
|
|
430
|
+
|
|
431
|
+
elif mode == SearchModeEnum.MULTIHOP:
|
|
432
|
+
results = await self._multihop_search(
|
|
433
|
+
seed_entity_ids, max_depth, initial_max_results
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
elif mode == SearchModeEnum.FILTERED:
|
|
437
|
+
results = await self._filtered_search(
|
|
438
|
+
entity_type, property_filters, initial_max_results
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
elif mode == SearchModeEnum.TRAVERSE:
|
|
442
|
+
results = await self._traverse_search(
|
|
443
|
+
seed_entity_ids,
|
|
444
|
+
relation_types,
|
|
445
|
+
max_depth,
|
|
446
|
+
initial_max_results,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
else:
|
|
450
|
+
return {
|
|
451
|
+
"success": False,
|
|
452
|
+
"error": f"Unknown search mode: {mode}",
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
# Apply reranking if enabled
|
|
456
|
+
if enable_reranking and results:
|
|
457
|
+
results = await self._apply_reranking(
|
|
458
|
+
results=results,
|
|
459
|
+
query=query,
|
|
460
|
+
query_embedding=query_embedding,
|
|
461
|
+
strategy=rerank_strategy,
|
|
462
|
+
max_results=max_results,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
"success": True,
|
|
467
|
+
"mode": mode,
|
|
468
|
+
"num_results": len(results),
|
|
469
|
+
"results": results,
|
|
470
|
+
"reranked": enable_reranking,
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
except Exception as e:
|
|
474
|
+
return {"success": False, "error": str(e)}
|
|
475
|
+
|
|
476
|
+
async def _vector_search(
|
|
477
|
+
self,
|
|
478
|
+
query_embedding: List[float],
|
|
479
|
+
entity_type: Optional[str],
|
|
480
|
+
max_results: int,
|
|
481
|
+
vector_threshold: float,
|
|
482
|
+
) -> List[Dict[str, Any]]:
|
|
483
|
+
"""Perform vector similarity search"""
|
|
484
|
+
if not query_embedding:
|
|
485
|
+
return []
|
|
486
|
+
|
|
487
|
+
raw_results = await self.graph_store.vector_search(
|
|
488
|
+
query_embedding=query_embedding,
|
|
489
|
+
entity_type=entity_type,
|
|
490
|
+
max_results=max_results,
|
|
491
|
+
score_threshold=vector_threshold,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
return [
|
|
495
|
+
{
|
|
496
|
+
"entity_id": entity.id,
|
|
497
|
+
"entity_type": entity.entity_type,
|
|
498
|
+
"properties": entity.properties,
|
|
499
|
+
"score": score,
|
|
500
|
+
}
|
|
501
|
+
for entity, score in raw_results
|
|
502
|
+
]
|
|
503
|
+
|
|
504
|
+
async def _graph_search(
|
|
505
|
+
self,
|
|
506
|
+
seed_entity_ids: Optional[List[str]],
|
|
507
|
+
max_depth: int,
|
|
508
|
+
max_results: int,
|
|
509
|
+
) -> List[Dict[str, Any]]:
|
|
510
|
+
"""Perform graph structure search"""
|
|
511
|
+
if not seed_entity_ids:
|
|
512
|
+
return []
|
|
513
|
+
|
|
514
|
+
config = HybridSearchConfig(
|
|
515
|
+
mode=SearchMode.GRAPH_ONLY,
|
|
516
|
+
max_graph_depth=max_depth,
|
|
517
|
+
max_results=max_results,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
raw_results = await self.hybrid_search_strategy.search(
|
|
521
|
+
query_embedding=[0.0], # Placeholder
|
|
522
|
+
config=config,
|
|
523
|
+
seed_entity_ids=seed_entity_ids,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
return [
|
|
527
|
+
{
|
|
528
|
+
"entity_id": entity.id,
|
|
529
|
+
"entity_type": entity.entity_type,
|
|
530
|
+
"properties": entity.properties,
|
|
531
|
+
"score": score,
|
|
532
|
+
}
|
|
533
|
+
for entity, score in raw_results
|
|
534
|
+
]
|
|
535
|
+
|
|
536
|
+
async def _hybrid_search(
|
|
537
|
+
self,
|
|
538
|
+
query_embedding: Optional[List[float]],
|
|
539
|
+
seed_entity_ids: Optional[List[str]],
|
|
540
|
+
entity_type: Optional[str],
|
|
541
|
+
max_results: int,
|
|
542
|
+
max_depth: int,
|
|
543
|
+
vector_weight: float,
|
|
544
|
+
graph_weight: float,
|
|
545
|
+
expand_results: bool,
|
|
546
|
+
vector_threshold: float,
|
|
547
|
+
) -> List[Dict[str, Any]]:
|
|
548
|
+
"""Perform hybrid search"""
|
|
549
|
+
if not query_embedding:
|
|
550
|
+
return []
|
|
551
|
+
|
|
552
|
+
config = HybridSearchConfig(
|
|
553
|
+
mode=SearchMode.HYBRID,
|
|
554
|
+
vector_weight=vector_weight,
|
|
555
|
+
graph_weight=graph_weight,
|
|
556
|
+
max_results=max_results,
|
|
557
|
+
max_graph_depth=max_depth,
|
|
558
|
+
expand_results=expand_results,
|
|
559
|
+
vector_threshold=vector_threshold,
|
|
560
|
+
entity_type_filter=entity_type,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
raw_results = await self.hybrid_search_strategy.search(
|
|
564
|
+
query_embedding=query_embedding,
|
|
565
|
+
config=config,
|
|
566
|
+
seed_entity_ids=seed_entity_ids,
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
return [
|
|
570
|
+
{
|
|
571
|
+
"entity_id": entity.id,
|
|
572
|
+
"entity_type": entity.entity_type,
|
|
573
|
+
"properties": entity.properties,
|
|
574
|
+
"score": score,
|
|
575
|
+
}
|
|
576
|
+
for entity, score in raw_results
|
|
577
|
+
]
|
|
578
|
+
|
|
579
|
+
async def _pagerank_search(
|
|
580
|
+
self, seed_entity_ids: Optional[List[str]], max_results: int
|
|
581
|
+
) -> List[Dict[str, Any]]:
|
|
582
|
+
"""Perform PageRank search"""
|
|
583
|
+
if not seed_entity_ids:
|
|
584
|
+
return []
|
|
585
|
+
|
|
586
|
+
raw_results = await self.pagerank_strategy.retrieve(
|
|
587
|
+
seed_entity_ids=seed_entity_ids, max_results=max_results
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
return [
|
|
591
|
+
{
|
|
592
|
+
"entity_id": entity.id,
|
|
593
|
+
"entity_type": entity.entity_type,
|
|
594
|
+
"properties": entity.properties,
|
|
595
|
+
"score": score,
|
|
596
|
+
"score_type": "pagerank",
|
|
597
|
+
}
|
|
598
|
+
for entity, score in raw_results
|
|
599
|
+
]
|
|
600
|
+
|
|
601
|
+
async def _multihop_search(
|
|
602
|
+
self,
|
|
603
|
+
seed_entity_ids: Optional[List[str]],
|
|
604
|
+
max_depth: int,
|
|
605
|
+
max_results: int,
|
|
606
|
+
) -> List[Dict[str, Any]]:
|
|
607
|
+
"""Perform multi-hop retrieval"""
|
|
608
|
+
if not seed_entity_ids:
|
|
609
|
+
return []
|
|
610
|
+
|
|
611
|
+
raw_results = await self.multihop_strategy.retrieve(
|
|
612
|
+
seed_entity_ids=seed_entity_ids,
|
|
613
|
+
max_hops=max_depth,
|
|
614
|
+
max_results=max_results,
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
return [
|
|
618
|
+
{
|
|
619
|
+
"entity_id": entity.id,
|
|
620
|
+
"entity_type": entity.entity_type,
|
|
621
|
+
"properties": entity.properties,
|
|
622
|
+
"score": score,
|
|
623
|
+
"score_type": "hop_distance",
|
|
624
|
+
}
|
|
625
|
+
for entity, score in raw_results
|
|
626
|
+
]
|
|
627
|
+
|
|
628
|
+
async def _filtered_search(
|
|
629
|
+
self,
|
|
630
|
+
entity_type: Optional[str],
|
|
631
|
+
property_filters: Optional[Dict[str, Any]],
|
|
632
|
+
max_results: int,
|
|
633
|
+
) -> List[Dict[str, Any]]:
|
|
634
|
+
"""Perform filtered retrieval"""
|
|
635
|
+
raw_results = await self.filtered_strategy.retrieve(
|
|
636
|
+
entity_type=entity_type,
|
|
637
|
+
property_filters=property_filters,
|
|
638
|
+
max_results=max_results,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
return [
|
|
642
|
+
{
|
|
643
|
+
"entity_id": entity.id,
|
|
644
|
+
"entity_type": entity.entity_type,
|
|
645
|
+
"properties": entity.properties,
|
|
646
|
+
"score": score,
|
|
647
|
+
}
|
|
648
|
+
for entity, score in raw_results
|
|
649
|
+
]
|
|
650
|
+
|
|
651
|
+
async def _traverse_search(
|
|
652
|
+
self,
|
|
653
|
+
seed_entity_ids: Optional[List[str]],
|
|
654
|
+
relation_types: Optional[List[str]],
|
|
655
|
+
max_depth: int,
|
|
656
|
+
max_results: int,
|
|
657
|
+
) -> List[Dict[str, Any]]:
|
|
658
|
+
"""Perform pattern-based traversal"""
|
|
659
|
+
if not seed_entity_ids:
|
|
660
|
+
return []
|
|
661
|
+
|
|
662
|
+
pattern = PathPattern(
|
|
663
|
+
relation_types=relation_types,
|
|
664
|
+
max_depth=max_depth,
|
|
665
|
+
allow_cycles=False,
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
# Get all paths from traversal
|
|
669
|
+
all_entities = {}
|
|
670
|
+
for seed_id in seed_entity_ids:
|
|
671
|
+
paths = await self.traversal_strategy.traverse_with_pattern(
|
|
672
|
+
start_entity_id=seed_id,
|
|
673
|
+
pattern=pattern,
|
|
674
|
+
max_results=max_results * 2,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
# Extract unique entities
|
|
678
|
+
for path in paths:
|
|
679
|
+
for entity in path.nodes:
|
|
680
|
+
if entity.id not in all_entities:
|
|
681
|
+
# Score by path length (shorter is better)
|
|
682
|
+
all_entities[entity.id] = {
|
|
683
|
+
"entity": entity,
|
|
684
|
+
"score": 1.0 / (path.length + 1),
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
# Sort by score and take top results
|
|
688
|
+
sorted_entities = sorted(all_entities.values(), key=lambda x: x["score"], reverse=True)[
|
|
689
|
+
:max_results
|
|
690
|
+
]
|
|
691
|
+
|
|
692
|
+
return [
|
|
693
|
+
{
|
|
694
|
+
"entity_id": item["entity"].id,
|
|
695
|
+
"entity_type": item["entity"].entity_type,
|
|
696
|
+
"properties": item["entity"].properties,
|
|
697
|
+
"score": item["score"],
|
|
698
|
+
"score_type": "path_length",
|
|
699
|
+
}
|
|
700
|
+
for item in sorted_entities
|
|
701
|
+
]
|
|
702
|
+
|
|
703
|
+
async def _apply_reranking(
|
|
704
|
+
self,
|
|
705
|
+
results: List[Dict[str, Any]],
|
|
706
|
+
query: Optional[str],
|
|
707
|
+
query_embedding: Optional[List[float]],
|
|
708
|
+
strategy: str,
|
|
709
|
+
max_results: int,
|
|
710
|
+
) -> List[Dict[str, Any]]:
|
|
711
|
+
"""
|
|
712
|
+
Apply reranking to search results
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
results: Initial search results (list of dicts)
|
|
716
|
+
query: Query text
|
|
717
|
+
query_embedding: Query embedding vector
|
|
718
|
+
strategy: Reranking strategy name
|
|
719
|
+
max_results: Final number of results to return
|
|
720
|
+
|
|
721
|
+
Returns:
|
|
722
|
+
Reranked and limited results
|
|
723
|
+
"""
|
|
724
|
+
if not results:
|
|
725
|
+
return results
|
|
726
|
+
|
|
727
|
+
# Convert result dicts to Entity objects
|
|
728
|
+
entities = []
|
|
729
|
+
for result in results:
|
|
730
|
+
entity = Entity(
|
|
731
|
+
id=result["entity_id"],
|
|
732
|
+
entity_type=result["entity_type"],
|
|
733
|
+
properties=result["properties"],
|
|
734
|
+
embedding=result.get("embedding"), # May be None
|
|
735
|
+
)
|
|
736
|
+
entities.append(entity)
|
|
737
|
+
|
|
738
|
+
# Get reranker strategy
|
|
739
|
+
reranker_strategy = self._rerankers.get(strategy)
|
|
740
|
+
if not reranker_strategy:
|
|
741
|
+
# Fall back to text similarity if strategy not found
|
|
742
|
+
reranker_strategy = self._rerankers["text"]
|
|
743
|
+
|
|
744
|
+
# Create result reranker with single strategy
|
|
745
|
+
reranker = ResultReranker(
|
|
746
|
+
strategies=[reranker_strategy],
|
|
747
|
+
combination_method=ScoreCombinationMethod.WEIGHTED_AVERAGE,
|
|
748
|
+
weights={reranker_strategy.name: 1.0},
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
# Rerank entities
|
|
752
|
+
reranked = await reranker.rerank(
|
|
753
|
+
query=query or "",
|
|
754
|
+
entities=entities,
|
|
755
|
+
top_k=max_results,
|
|
756
|
+
query_embedding=query_embedding,
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
# Convert back to result dicts
|
|
760
|
+
reranked_results = []
|
|
761
|
+
for entity, rerank_score in reranked:
|
|
762
|
+
# Find original result to preserve additional fields
|
|
763
|
+
original_result = next((r for r in results if r["entity_id"] == entity.id), None)
|
|
764
|
+
|
|
765
|
+
if original_result:
|
|
766
|
+
result_dict = original_result.copy()
|
|
767
|
+
# Update score with reranked score
|
|
768
|
+
result_dict["original_score"] = result_dict.get("score", 0.0)
|
|
769
|
+
result_dict["score"] = rerank_score
|
|
770
|
+
result_dict["rerank_score"] = rerank_score
|
|
771
|
+
reranked_results.append(result_dict)
|
|
772
|
+
|
|
773
|
+
return reranked_results
|
|
774
|
+
|
|
775
|
+
# Public methods for ToolExecutor integration
|
|
776
|
+
async def vector_search(
|
|
777
|
+
self,
|
|
778
|
+
query: Optional[str] = None,
|
|
779
|
+
query_embedding: Optional[List[float]] = None,
|
|
780
|
+
entity_type: Optional[str] = None,
|
|
781
|
+
max_results: int = 10,
|
|
782
|
+
vector_threshold: float = 0.0,
|
|
783
|
+
) -> Dict[str, Any]:
|
|
784
|
+
"""Vector similarity search (public method for ToolExecutor)"""
|
|
785
|
+
await self._initialize()
|
|
786
|
+
if query and not query_embedding:
|
|
787
|
+
query_embedding = [0.1] * 128 # Placeholder
|
|
788
|
+
results = await self._vector_search(
|
|
789
|
+
query_embedding, entity_type, max_results, vector_threshold
|
|
790
|
+
)
|
|
791
|
+
return {
|
|
792
|
+
"success": True,
|
|
793
|
+
"mode": "vector",
|
|
794
|
+
"num_results": len(results),
|
|
795
|
+
"results": results,
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
async def graph_search(
|
|
799
|
+
self,
|
|
800
|
+
seed_entity_ids: List[str],
|
|
801
|
+
max_depth: int = 2,
|
|
802
|
+
max_results: int = 10,
|
|
803
|
+
) -> Dict[str, Any]:
|
|
804
|
+
"""Graph structure search (public method for ToolExecutor)"""
|
|
805
|
+
await self._initialize()
|
|
806
|
+
results = await self._graph_search(seed_entity_ids, max_depth, max_results)
|
|
807
|
+
return {
|
|
808
|
+
"success": True,
|
|
809
|
+
"mode": "graph",
|
|
810
|
+
"num_results": len(results),
|
|
811
|
+
"results": results,
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
async def hybrid_search(
|
|
815
|
+
self,
|
|
816
|
+
query: Optional[str] = None,
|
|
817
|
+
query_embedding: Optional[List[float]] = None,
|
|
818
|
+
seed_entity_ids: Optional[List[str]] = None,
|
|
819
|
+
entity_type: Optional[str] = None,
|
|
820
|
+
max_results: int = 10,
|
|
821
|
+
max_depth: int = 2,
|
|
822
|
+
vector_weight: float = 0.6,
|
|
823
|
+
graph_weight: float = 0.4,
|
|
824
|
+
expand_results: bool = True,
|
|
825
|
+
vector_threshold: float = 0.0,
|
|
826
|
+
) -> Dict[str, Any]:
|
|
827
|
+
"""Hybrid search (public method for ToolExecutor)"""
|
|
828
|
+
await self._initialize()
|
|
829
|
+
if query and not query_embedding:
|
|
830
|
+
query_embedding = [0.1] * 128 # Placeholder
|
|
831
|
+
results = await self._hybrid_search(
|
|
832
|
+
query_embedding,
|
|
833
|
+
seed_entity_ids,
|
|
834
|
+
entity_type,
|
|
835
|
+
max_results,
|
|
836
|
+
max_depth,
|
|
837
|
+
vector_weight,
|
|
838
|
+
graph_weight,
|
|
839
|
+
expand_results,
|
|
840
|
+
vector_threshold,
|
|
841
|
+
)
|
|
842
|
+
return {
|
|
843
|
+
"success": True,
|
|
844
|
+
"mode": "hybrid",
|
|
845
|
+
"num_results": len(results),
|
|
846
|
+
"results": results,
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
async def pagerank_search(
|
|
850
|
+
self, seed_entity_ids: List[str], max_results: int = 10
|
|
851
|
+
) -> Dict[str, Any]:
|
|
852
|
+
"""PageRank search (public method for ToolExecutor)"""
|
|
853
|
+
await self._initialize()
|
|
854
|
+
results = await self._pagerank_search(seed_entity_ids, max_results)
|
|
855
|
+
return {
|
|
856
|
+
"success": True,
|
|
857
|
+
"mode": "pagerank",
|
|
858
|
+
"num_results": len(results),
|
|
859
|
+
"results": results,
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
async def multihop_search(
|
|
863
|
+
self,
|
|
864
|
+
seed_entity_ids: List[str],
|
|
865
|
+
max_depth: int = 2,
|
|
866
|
+
max_results: int = 10,
|
|
867
|
+
) -> Dict[str, Any]:
|
|
868
|
+
"""Multi-hop search (public method for ToolExecutor)"""
|
|
869
|
+
await self._initialize()
|
|
870
|
+
results = await self._multihop_search(seed_entity_ids, max_depth, max_results)
|
|
871
|
+
return {
|
|
872
|
+
"success": True,
|
|
873
|
+
"mode": "multihop",
|
|
874
|
+
"num_results": len(results),
|
|
875
|
+
"results": results,
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
async def filtered_search(
|
|
879
|
+
self,
|
|
880
|
+
entity_type: Optional[str] = None,
|
|
881
|
+
property_filters: Optional[Dict[str, Any]] = None,
|
|
882
|
+
max_results: int = 10,
|
|
883
|
+
) -> Dict[str, Any]:
|
|
884
|
+
"""Filtered search (public method for ToolExecutor)"""
|
|
885
|
+
await self._initialize()
|
|
886
|
+
results = await self._filtered_search(entity_type, property_filters, max_results)
|
|
887
|
+
return {
|
|
888
|
+
"success": True,
|
|
889
|
+
"mode": "filtered",
|
|
890
|
+
"num_results": len(results),
|
|
891
|
+
"results": results,
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
async def traverse_search(
|
|
895
|
+
self,
|
|
896
|
+
seed_entity_ids: List[str],
|
|
897
|
+
relation_types: Optional[List[str]] = None,
|
|
898
|
+
max_depth: int = 2,
|
|
899
|
+
max_results: int = 10,
|
|
900
|
+
) -> Dict[str, Any]:
|
|
901
|
+
"""Pattern-based traversal (public method for ToolExecutor)"""
|
|
902
|
+
await self._initialize()
|
|
903
|
+
results = await self._traverse_search(
|
|
904
|
+
seed_entity_ids, relation_types, max_depth, max_results
|
|
905
|
+
)
|
|
906
|
+
return {
|
|
907
|
+
"success": True,
|
|
908
|
+
"mode": "traverse",
|
|
909
|
+
"num_results": len(results),
|
|
910
|
+
"results": results,
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
async def execute(self, **kwargs) -> Dict[str, Any]:
|
|
914
|
+
"""
|
|
915
|
+
Execute the tool (public interface)
|
|
916
|
+
|
|
917
|
+
Args:
|
|
918
|
+
**kwargs: Tool input parameters
|
|
919
|
+
|
|
920
|
+
Returns:
|
|
921
|
+
Dictionary with search results
|
|
922
|
+
"""
|
|
923
|
+
return await self._execute(**kwargs)
|