aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,944 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Graph Search Tool
|
|
3
|
+
|
|
4
|
+
AIECS tool for searching knowledge graphs with multiple search modes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Any, List, Optional
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
from aiecs.tools.base_tool import BaseTool
|
|
13
|
+
from aiecs.tools import register_tool
|
|
14
|
+
from aiecs.infrastructure.graph_storage.in_memory import InMemoryGraphStore
|
|
15
|
+
from aiecs.application.knowledge_graph.search.hybrid_search import (
|
|
16
|
+
HybridSearchStrategy,
|
|
17
|
+
HybridSearchConfig,
|
|
18
|
+
SearchMode,
|
|
19
|
+
)
|
|
20
|
+
from aiecs.application.knowledge_graph.retrieval.retrieval_strategies import (
|
|
21
|
+
PersonalizedPageRank,
|
|
22
|
+
MultiHopRetrieval,
|
|
23
|
+
FilteredRetrieval,
|
|
24
|
+
RetrievalCache,
|
|
25
|
+
)
|
|
26
|
+
from aiecs.application.knowledge_graph.traversal.enhanced_traversal import (
|
|
27
|
+
EnhancedTraversal,
|
|
28
|
+
)
|
|
29
|
+
from aiecs.domain.knowledge_graph.models.path_pattern import PathPattern
|
|
30
|
+
from aiecs.application.knowledge_graph.search.reranker import (
|
|
31
|
+
ResultReranker,
|
|
32
|
+
ScoreCombinationMethod,
|
|
33
|
+
)
|
|
34
|
+
from aiecs.application.knowledge_graph.search.reranker_strategies import (
|
|
35
|
+
TextSimilarityReranker,
|
|
36
|
+
SemanticReranker,
|
|
37
|
+
StructuralReranker,
|
|
38
|
+
HybridReranker,
|
|
39
|
+
)
|
|
40
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SearchModeEnum(str, Enum):
|
|
44
|
+
"""Search mode enumeration"""
|
|
45
|
+
|
|
46
|
+
VECTOR = "vector"
|
|
47
|
+
GRAPH = "graph"
|
|
48
|
+
HYBRID = "hybrid"
|
|
49
|
+
PAGERANK = "pagerank"
|
|
50
|
+
MULTIHOP = "multihop"
|
|
51
|
+
FILTERED = "filtered"
|
|
52
|
+
TRAVERSE = "traverse"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GraphSearchInput(BaseModel):
|
|
56
|
+
"""Input schema for Graph Search Tool (legacy, for execute() method)"""
|
|
57
|
+
|
|
58
|
+
mode: SearchModeEnum = Field(
|
|
59
|
+
...,
|
|
60
|
+
description=(
|
|
61
|
+
"Search mode: 'vector' (similarity), 'graph' (structure), "
|
|
62
|
+
"'hybrid' (combined), 'pagerank' (importance), "
|
|
63
|
+
"'multihop' (neighbors), 'filtered' (by properties), "
|
|
64
|
+
"'traverse' (pattern-based)"
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
query: Optional[str] = Field(
|
|
69
|
+
None,
|
|
70
|
+
description="Natural language query (converted to embedding for vector/hybrid search)",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
query_embedding: Optional[List[float]] = Field(None, description="Query vector embedding (for vector/hybrid search)")
|
|
74
|
+
|
|
75
|
+
seed_entity_ids: Optional[List[str]] = Field(
|
|
76
|
+
None,
|
|
77
|
+
description="Starting entity IDs (for graph/pagerank/multihop/traverse modes)",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
entity_type: Optional[str] = Field(
|
|
81
|
+
None,
|
|
82
|
+
description="Filter by entity type (e.g., 'Person', 'Company', 'Location')",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
property_filters: Optional[Dict[str, Any]] = Field(
|
|
86
|
+
None,
|
|
87
|
+
description="Filter by properties (e.g., {'role': 'Engineer', 'level': 'Senior'})",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
relation_types: Optional[List[str]] = Field(
|
|
91
|
+
None,
|
|
92
|
+
description="Filter by relation types (e.g., ['WORKS_FOR', 'LOCATED_IN'])",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
max_results: int = Field(
|
|
96
|
+
default=10,
|
|
97
|
+
ge=1,
|
|
98
|
+
le=100,
|
|
99
|
+
description="Maximum number of results to return (1-100)",
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
max_depth: int = Field(
|
|
103
|
+
default=2,
|
|
104
|
+
ge=1,
|
|
105
|
+
le=5,
|
|
106
|
+
description="Maximum traversal depth for graph/multihop/traverse modes (1-5)",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
vector_threshold: float = Field(
|
|
110
|
+
default=0.0,
|
|
111
|
+
ge=0.0,
|
|
112
|
+
le=1.0,
|
|
113
|
+
description="Minimum similarity threshold for vector search (0.0-1.0)",
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
vector_weight: float = Field(
|
|
117
|
+
default=0.6,
|
|
118
|
+
ge=0.0,
|
|
119
|
+
le=1.0,
|
|
120
|
+
description="Weight for vector similarity in hybrid mode (0.0-1.0)",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
graph_weight: float = Field(
|
|
124
|
+
default=0.4,
|
|
125
|
+
ge=0.0,
|
|
126
|
+
le=1.0,
|
|
127
|
+
description="Weight for graph structure in hybrid mode (0.0-1.0)",
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
expand_results: bool = Field(
|
|
131
|
+
default=True,
|
|
132
|
+
description="Whether to expand results with graph neighbors (hybrid mode)",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
use_cache: bool = Field(
|
|
136
|
+
default=True,
|
|
137
|
+
description="Whether to use result caching for performance",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Reranking parameters
|
|
141
|
+
enable_reranking: bool = Field(
|
|
142
|
+
default=False,
|
|
143
|
+
description="Whether to enable result reranking for improved relevance",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
rerank_strategy: Optional[str] = Field(
|
|
147
|
+
default="text",
|
|
148
|
+
description=("Reranking strategy: 'text' (text similarity), 'semantic' (embeddings), " "'structural' (graph importance), 'hybrid' (all signals)"),
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
rerank_top_k: Optional[int] = Field(
|
|
152
|
+
default=None,
|
|
153
|
+
ge=1,
|
|
154
|
+
le=500,
|
|
155
|
+
description=("Top-K results to fetch before reranking (for performance). " "If None, uses max_results. Should be >= max_results."),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Schemas for individual operations - moved to GraphSearchTool class as inner classes
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@register_tool("graph_search")
|
|
163
|
+
class GraphSearchTool(BaseTool):
|
|
164
|
+
"""
|
|
165
|
+
Knowledge Graph Search Tool
|
|
166
|
+
|
|
167
|
+
Powerful search tool for querying knowledge graphs with multiple search modes:
|
|
168
|
+
|
|
169
|
+
1. **Vector Search** - Find semantically similar entities
|
|
170
|
+
2. **Graph Search** - Explore graph structure from seed entities
|
|
171
|
+
3. **Hybrid Search** - Combine vector similarity with graph structure
|
|
172
|
+
4. **PageRank** - Find important/influential entities
|
|
173
|
+
5. **Multi-Hop** - Find entities within N hops
|
|
174
|
+
6. **Filtered** - Precise filtering by properties
|
|
175
|
+
7. **Traverse** - Pattern-based graph traversal
|
|
176
|
+
|
|
177
|
+
Example Usage:
|
|
178
|
+
```python
|
|
179
|
+
# Vector search
|
|
180
|
+
results = tool.execute({
|
|
181
|
+
"mode": "vector",
|
|
182
|
+
"query": "machine learning researchers",
|
|
183
|
+
"max_results": 10
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
# Hybrid search
|
|
187
|
+
results = tool.execute({
|
|
188
|
+
"mode": "hybrid",
|
|
189
|
+
"query": "AI research",
|
|
190
|
+
"seed_entity_ids": ["person_1"],
|
|
191
|
+
"vector_weight": 0.6,
|
|
192
|
+
"graph_weight": 0.4
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
# PageRank
|
|
196
|
+
results = tool.execute({
|
|
197
|
+
"mode": "pagerank",
|
|
198
|
+
"seed_entity_ids": ["important_node"],
|
|
199
|
+
"max_results": 20
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
# Filtered search
|
|
203
|
+
results = tool.execute({
|
|
204
|
+
"mode": "filtered",
|
|
205
|
+
"entity_type": "Person",
|
|
206
|
+
"property_filters": {"role": "Engineer", "experience": "Senior"}
|
|
207
|
+
})
|
|
208
|
+
```
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
name: str = "graph_search"
|
|
212
|
+
description: str = """Search knowledge graphs with multiple powerful search modes.
|
|
213
|
+
|
|
214
|
+
This tool enables sophisticated graph querying including:
|
|
215
|
+
- Semantic similarity search (vector embeddings)
|
|
216
|
+
- Graph structure exploration
|
|
217
|
+
- Hybrid search combining both approaches
|
|
218
|
+
- Importance ranking (PageRank)
|
|
219
|
+
- Multi-hop neighbor discovery
|
|
220
|
+
- Property-based filtering
|
|
221
|
+
- Pattern-based traversal
|
|
222
|
+
|
|
223
|
+
Use this tool when you need to:
|
|
224
|
+
- Find entities similar to a query
|
|
225
|
+
- Explore relationships in a knowledge graph
|
|
226
|
+
- Find influential entities
|
|
227
|
+
- Discover connections between entities
|
|
228
|
+
- Filter entities by specific criteria
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
# Configuration schema
|
|
232
|
+
class Config(BaseSettings):
|
|
233
|
+
"""Configuration for the Graph Search Tool
|
|
234
|
+
|
|
235
|
+
Automatically reads from environment variables with GRAPH_SEARCH_ prefix.
|
|
236
|
+
Example: GRAPH_SEARCH_CACHE_MAX_SIZE -> cache_max_size
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
model_config = SettingsConfigDict(env_prefix="GRAPH_SEARCH_")
|
|
240
|
+
|
|
241
|
+
cache_max_size: int = Field(
|
|
242
|
+
default=100,
|
|
243
|
+
description="Maximum cache size for retrieval results",
|
|
244
|
+
)
|
|
245
|
+
cache_ttl: int = Field(
|
|
246
|
+
default=300,
|
|
247
|
+
description="Cache time-to-live in seconds",
|
|
248
|
+
)
|
|
249
|
+
default_max_results: int = Field(
|
|
250
|
+
default=10,
|
|
251
|
+
description="Default maximum number of search results",
|
|
252
|
+
)
|
|
253
|
+
default_max_depth: int = Field(
|
|
254
|
+
default=2,
|
|
255
|
+
description="Default maximum traversal depth",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Schema definitions
|
|
259
|
+
class Vector_searchSchema(BaseModel):
|
|
260
|
+
"""Schema for vector_search operation"""
|
|
261
|
+
|
|
262
|
+
query: Optional[str] = Field(default=None, description="Optional natural language query. Either query or query_embedding must be provided")
|
|
263
|
+
query_embedding: Optional[List[float]] = Field(default=None, description="Optional pre-computed query vector embedding. Either query or query_embedding must be provided")
|
|
264
|
+
entity_type: Optional[str] = Field(default=None, description="Optional filter by entity type (e.g., 'Person', 'Company', 'Location')")
|
|
265
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
266
|
+
vector_threshold: float = Field(default=0.0, ge=0.0, le=1.0, description="Minimum similarity threshold for results (0.0-1.0)")
|
|
267
|
+
enable_reranking: bool = Field(default=False, description="Whether to enable result reranking for improved relevance")
|
|
268
|
+
rerank_strategy: Optional[str] = Field(default="text", description="Reranking strategy: 'text' (text similarity), 'semantic' (embeddings), 'structural' (graph importance), or 'hybrid' (all signals)")
|
|
269
|
+
rerank_top_k: Optional[int] = Field(default=None, ge=1, le=500, description="Top-K results to fetch before reranking. If None, uses max_results")
|
|
270
|
+
|
|
271
|
+
class Graph_searchSchema(BaseModel):
|
|
272
|
+
"""Schema for graph_search operation"""
|
|
273
|
+
|
|
274
|
+
seed_entity_ids: List[str] = Field(description="List of starting entity IDs to begin graph traversal from")
|
|
275
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum traversal depth from seed entities (1-5)")
|
|
276
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
277
|
+
|
|
278
|
+
class Hybrid_searchSchema(BaseModel):
|
|
279
|
+
"""Schema for hybrid_search operation"""
|
|
280
|
+
|
|
281
|
+
query: Optional[str] = Field(default=None, description="Optional natural language query. Either query or query_embedding must be provided")
|
|
282
|
+
query_embedding: Optional[List[float]] = Field(default=None, description="Optional pre-computed query vector embedding. Either query or query_embedding must be provided")
|
|
283
|
+
seed_entity_ids: Optional[List[str]] = Field(default=None, description="Optional list of starting entity IDs for graph-based search")
|
|
284
|
+
entity_type: Optional[str] = Field(default=None, description="Optional filter by entity type")
|
|
285
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
286
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum graph traversal depth (1-5)")
|
|
287
|
+
vector_weight: float = Field(default=0.6, ge=0.0, le=1.0, description="Weight for vector similarity component (0.0-1.0)")
|
|
288
|
+
graph_weight: float = Field(default=0.4, ge=0.0, le=1.0, description="Weight for graph structure component (0.0-1.0)")
|
|
289
|
+
expand_results: bool = Field(default=True, description="Whether to expand results with graph neighbors")
|
|
290
|
+
vector_threshold: float = Field(default=0.0, ge=0.0, le=1.0, description="Minimum similarity threshold for vector search (0.0-1.0)")
|
|
291
|
+
enable_reranking: bool = Field(default=False, description="Whether to enable result reranking")
|
|
292
|
+
rerank_strategy: Optional[str] = Field(default="hybrid", description="Reranking strategy: 'text', 'semantic', 'structural', or 'hybrid'")
|
|
293
|
+
rerank_top_k: Optional[int] = Field(default=None, ge=1, le=500, description="Top-K results for reranking")
|
|
294
|
+
|
|
295
|
+
class Pagerank_searchSchema(BaseModel):
|
|
296
|
+
"""Schema for pagerank_search operation"""
|
|
297
|
+
|
|
298
|
+
seed_entity_ids: List[str] = Field(description="List of starting entity IDs for Personalized PageRank calculation")
|
|
299
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
300
|
+
|
|
301
|
+
class Multihop_searchSchema(BaseModel):
|
|
302
|
+
"""Schema for multihop_search operation"""
|
|
303
|
+
|
|
304
|
+
seed_entity_ids: List[str] = Field(description="List of starting entity IDs")
|
|
305
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum number of hops from seed entities (1-5)")
|
|
306
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
307
|
+
|
|
308
|
+
class Filtered_searchSchema(BaseModel):
|
|
309
|
+
"""Schema for filtered_search operation"""
|
|
310
|
+
|
|
311
|
+
entity_type: Optional[str] = Field(default=None, description="Optional filter by entity type (e.g., 'Person', 'Company')")
|
|
312
|
+
property_filters: Optional[Dict[str, Any]] = Field(default=None, description="Optional dictionary of property filters (e.g., {'role': 'Engineer', 'level': 'Senior'})")
|
|
313
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
314
|
+
|
|
315
|
+
class Traverse_searchSchema(BaseModel):
|
|
316
|
+
"""Schema for traverse_search operation"""
|
|
317
|
+
|
|
318
|
+
seed_entity_ids: List[str] = Field(description="List of starting entity IDs for pattern-based traversal")
|
|
319
|
+
relation_types: Optional[List[str]] = Field(default=None, description="Optional filter by relation types (e.g., ['WORKS_FOR', 'LOCATED_IN'])")
|
|
320
|
+
max_depth: int = Field(default=2, ge=1, le=5, description="Maximum traversal depth (1-5)")
|
|
321
|
+
max_results: int = Field(default=10, ge=1, le=100, description="Maximum number of results to return (1-100)")
|
|
322
|
+
|
|
323
|
+
input_schema: type[BaseModel] = GraphSearchInput
|
|
324
|
+
|
|
325
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
|
|
326
|
+
"""
|
|
327
|
+
Initialize Graph Search Tool.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
config (Dict, optional): Configuration overrides for Graph Search Tool.
|
|
331
|
+
**kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
|
|
332
|
+
|
|
333
|
+
Configuration is automatically loaded by BaseTool from:
|
|
334
|
+
1. Explicit config dict (highest priority)
|
|
335
|
+
2. YAML config files (config/tools/graph_search.yaml)
|
|
336
|
+
3. Environment variables (via dotenv from .env files)
|
|
337
|
+
4. Tool defaults (lowest priority)
|
|
338
|
+
"""
|
|
339
|
+
super().__init__(config, **kwargs)
|
|
340
|
+
|
|
341
|
+
# Configuration is automatically loaded by BaseTool into self._config_obj
|
|
342
|
+
# Access config via self._config_obj (BaseSettings instance)
|
|
343
|
+
self.config = self._config_obj if self._config_obj else self.Config()
|
|
344
|
+
|
|
345
|
+
# Graph store (shared with KG builder)
|
|
346
|
+
self.graph_store = None
|
|
347
|
+
|
|
348
|
+
# Search strategies (using _strategy suffix to avoid shadowing public
|
|
349
|
+
# methods)
|
|
350
|
+
self.hybrid_search_strategy = None
|
|
351
|
+
self.pagerank_strategy = None
|
|
352
|
+
self.multihop_strategy = None
|
|
353
|
+
self.filtered_strategy = None
|
|
354
|
+
self.traversal_strategy = None
|
|
355
|
+
self.cache = None
|
|
356
|
+
|
|
357
|
+
self._initialized = False
|
|
358
|
+
|
|
359
|
+
async def _initialize(self):
|
|
360
|
+
"""Lazy initialization of components"""
|
|
361
|
+
if self._initialized:
|
|
362
|
+
return
|
|
363
|
+
|
|
364
|
+
# Initialize graph store (use in-memory for now)
|
|
365
|
+
# In production, this would be configurable
|
|
366
|
+
self.graph_store = InMemoryGraphStore()
|
|
367
|
+
await self.graph_store.initialize()
|
|
368
|
+
|
|
369
|
+
# Initialize search strategies
|
|
370
|
+
self.hybrid_search_strategy = HybridSearchStrategy(self.graph_store)
|
|
371
|
+
self.pagerank_strategy = PersonalizedPageRank(self.graph_store)
|
|
372
|
+
self.multihop_strategy = MultiHopRetrieval(self.graph_store)
|
|
373
|
+
self.filtered_strategy = FilteredRetrieval(self.graph_store)
|
|
374
|
+
self.traversal_strategy = EnhancedTraversal(self.graph_store)
|
|
375
|
+
|
|
376
|
+
# Initialize cache
|
|
377
|
+
self.cache = RetrievalCache(max_size=self.config.cache_max_size, ttl=self.config.cache_ttl)
|
|
378
|
+
|
|
379
|
+
# Initialize reranking strategies
|
|
380
|
+
self._rerankers = {
|
|
381
|
+
"text": TextSimilarityReranker(),
|
|
382
|
+
"semantic": SemanticReranker(),
|
|
383
|
+
"structural": StructuralReranker(self.graph_store),
|
|
384
|
+
"hybrid": HybridReranker(self.graph_store),
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
self._initialized = True
|
|
388
|
+
|
|
389
|
+
async def _execute(self, **kwargs) -> Dict[str, Any]:
|
|
390
|
+
"""
|
|
391
|
+
Execute graph search
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
**kwargs: Tool input parameters
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dictionary with search results
|
|
398
|
+
"""
|
|
399
|
+
# Initialize components
|
|
400
|
+
await self._initialize()
|
|
401
|
+
|
|
402
|
+
# Parse input
|
|
403
|
+
mode = kwargs.get("mode")
|
|
404
|
+
query = kwargs.get("query")
|
|
405
|
+
query_embedding = kwargs.get("query_embedding")
|
|
406
|
+
seed_entity_ids = kwargs.get("seed_entity_ids")
|
|
407
|
+
entity_type = kwargs.get("entity_type")
|
|
408
|
+
property_filters = kwargs.get("property_filters")
|
|
409
|
+
relation_types = kwargs.get("relation_types")
|
|
410
|
+
max_results = kwargs.get("max_results", 10)
|
|
411
|
+
max_depth = kwargs.get("max_depth", 2)
|
|
412
|
+
vector_threshold = kwargs.get("vector_threshold", 0.0)
|
|
413
|
+
vector_weight = kwargs.get("vector_weight", 0.6)
|
|
414
|
+
graph_weight = kwargs.get("graph_weight", 0.4)
|
|
415
|
+
expand_results = kwargs.get("expand_results", True)
|
|
416
|
+
# use_cache is available in kwargs but not currently used in
|
|
417
|
+
# implementation
|
|
418
|
+
|
|
419
|
+
# Reranking parameters
|
|
420
|
+
enable_reranking = kwargs.get("enable_reranking", False)
|
|
421
|
+
rerank_strategy = kwargs.get("rerank_strategy", "text")
|
|
422
|
+
rerank_top_k = kwargs.get("rerank_top_k")
|
|
423
|
+
|
|
424
|
+
# Generate query embedding if query provided but no embedding
|
|
425
|
+
if query and not query_embedding:
|
|
426
|
+
# In production, this would use an embedding model
|
|
427
|
+
# For now, create a placeholder embedding
|
|
428
|
+
query_embedding = [0.1] * 128
|
|
429
|
+
|
|
430
|
+
try:
|
|
431
|
+
# Adjust max_results for top-K limiting (fetch more, rerank, then
|
|
432
|
+
# limit)
|
|
433
|
+
initial_max_results = max_results
|
|
434
|
+
if enable_reranking and rerank_top_k:
|
|
435
|
+
initial_max_results = max(rerank_top_k, max_results)
|
|
436
|
+
|
|
437
|
+
if mode == SearchModeEnum.VECTOR:
|
|
438
|
+
if query_embedding is None:
|
|
439
|
+
raise ValueError("query_embedding is required for vector search mode")
|
|
440
|
+
results = await self._vector_search(
|
|
441
|
+
query_embedding,
|
|
442
|
+
entity_type,
|
|
443
|
+
initial_max_results,
|
|
444
|
+
vector_threshold,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
elif mode == SearchModeEnum.GRAPH:
|
|
448
|
+
results = await self._graph_search(seed_entity_ids, max_depth, initial_max_results)
|
|
449
|
+
|
|
450
|
+
elif mode == SearchModeEnum.HYBRID:
|
|
451
|
+
results = await self._hybrid_search(
|
|
452
|
+
query_embedding,
|
|
453
|
+
seed_entity_ids,
|
|
454
|
+
entity_type,
|
|
455
|
+
initial_max_results,
|
|
456
|
+
max_depth,
|
|
457
|
+
vector_weight,
|
|
458
|
+
graph_weight,
|
|
459
|
+
expand_results,
|
|
460
|
+
vector_threshold,
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
elif mode == SearchModeEnum.PAGERANK:
|
|
464
|
+
results = await self._pagerank_search(seed_entity_ids, initial_max_results)
|
|
465
|
+
|
|
466
|
+
elif mode == SearchModeEnum.MULTIHOP:
|
|
467
|
+
results = await self._multihop_search(seed_entity_ids, max_depth, initial_max_results)
|
|
468
|
+
|
|
469
|
+
elif mode == SearchModeEnum.FILTERED:
|
|
470
|
+
results = await self._filtered_search(entity_type, property_filters, initial_max_results)
|
|
471
|
+
|
|
472
|
+
elif mode == SearchModeEnum.TRAVERSE:
|
|
473
|
+
results = await self._traverse_search(
|
|
474
|
+
seed_entity_ids,
|
|
475
|
+
relation_types,
|
|
476
|
+
max_depth,
|
|
477
|
+
initial_max_results,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
else:
|
|
481
|
+
return {
|
|
482
|
+
"success": False,
|
|
483
|
+
"error": f"Unknown search mode: {mode}",
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
# Apply reranking if enabled
|
|
487
|
+
if enable_reranking and results:
|
|
488
|
+
results = await self._apply_reranking(
|
|
489
|
+
results=results,
|
|
490
|
+
query=query,
|
|
491
|
+
query_embedding=query_embedding,
|
|
492
|
+
strategy=rerank_strategy,
|
|
493
|
+
max_results=max_results,
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
return {
|
|
497
|
+
"success": True,
|
|
498
|
+
"mode": mode,
|
|
499
|
+
"num_results": len(results),
|
|
500
|
+
"results": results,
|
|
501
|
+
"reranked": enable_reranking,
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
except Exception as e:
|
|
505
|
+
return {"success": False, "error": str(e)}
|
|
506
|
+
|
|
507
|
+
async def _vector_search(
|
|
508
|
+
self,
|
|
509
|
+
query_embedding: List[float],
|
|
510
|
+
entity_type: Optional[str],
|
|
511
|
+
max_results: int,
|
|
512
|
+
vector_threshold: float,
|
|
513
|
+
) -> List[Dict[str, Any]]:
|
|
514
|
+
"""Perform vector similarity search"""
|
|
515
|
+
if not query_embedding:
|
|
516
|
+
return []
|
|
517
|
+
|
|
518
|
+
raw_results = await self.graph_store.vector_search(
|
|
519
|
+
query_embedding=query_embedding,
|
|
520
|
+
entity_type=entity_type,
|
|
521
|
+
max_results=max_results,
|
|
522
|
+
score_threshold=vector_threshold,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
return [
|
|
526
|
+
{
|
|
527
|
+
"entity_id": entity.id,
|
|
528
|
+
"entity_type": entity.entity_type,
|
|
529
|
+
"properties": entity.properties,
|
|
530
|
+
"score": score,
|
|
531
|
+
}
|
|
532
|
+
for entity, score in raw_results
|
|
533
|
+
]
|
|
534
|
+
|
|
535
|
+
async def _graph_search(
|
|
536
|
+
self,
|
|
537
|
+
seed_entity_ids: Optional[List[str]],
|
|
538
|
+
max_depth: int,
|
|
539
|
+
max_results: int,
|
|
540
|
+
) -> List[Dict[str, Any]]:
|
|
541
|
+
"""Perform graph structure search"""
|
|
542
|
+
if not seed_entity_ids:
|
|
543
|
+
return []
|
|
544
|
+
|
|
545
|
+
config = HybridSearchConfig(
|
|
546
|
+
mode=SearchMode.GRAPH_ONLY,
|
|
547
|
+
max_graph_depth=max_depth,
|
|
548
|
+
max_results=max_results,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
raw_results = await self.hybrid_search_strategy.search(
|
|
552
|
+
query_embedding=[0.0], # Placeholder
|
|
553
|
+
config=config,
|
|
554
|
+
seed_entity_ids=seed_entity_ids,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
return [
|
|
558
|
+
{
|
|
559
|
+
"entity_id": entity.id,
|
|
560
|
+
"entity_type": entity.entity_type,
|
|
561
|
+
"properties": entity.properties,
|
|
562
|
+
"score": score,
|
|
563
|
+
}
|
|
564
|
+
for entity, score in raw_results
|
|
565
|
+
]
|
|
566
|
+
|
|
567
|
+
async def _hybrid_search(
|
|
568
|
+
self,
|
|
569
|
+
query_embedding: Optional[List[float]],
|
|
570
|
+
seed_entity_ids: Optional[List[str]],
|
|
571
|
+
entity_type: Optional[str],
|
|
572
|
+
max_results: int,
|
|
573
|
+
max_depth: int,
|
|
574
|
+
vector_weight: float,
|
|
575
|
+
graph_weight: float,
|
|
576
|
+
expand_results: bool,
|
|
577
|
+
vector_threshold: float,
|
|
578
|
+
) -> List[Dict[str, Any]]:
|
|
579
|
+
"""Perform hybrid search"""
|
|
580
|
+
if not query_embedding:
|
|
581
|
+
return []
|
|
582
|
+
|
|
583
|
+
config = HybridSearchConfig(
|
|
584
|
+
mode=SearchMode.HYBRID,
|
|
585
|
+
vector_weight=vector_weight,
|
|
586
|
+
graph_weight=graph_weight,
|
|
587
|
+
max_results=max_results,
|
|
588
|
+
max_graph_depth=max_depth,
|
|
589
|
+
expand_results=expand_results,
|
|
590
|
+
vector_threshold=vector_threshold,
|
|
591
|
+
entity_type_filter=entity_type,
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
raw_results = await self.hybrid_search_strategy.search(
|
|
595
|
+
query_embedding=query_embedding,
|
|
596
|
+
config=config,
|
|
597
|
+
seed_entity_ids=seed_entity_ids,
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
return [
|
|
601
|
+
{
|
|
602
|
+
"entity_id": entity.id,
|
|
603
|
+
"entity_type": entity.entity_type,
|
|
604
|
+
"properties": entity.properties,
|
|
605
|
+
"score": score,
|
|
606
|
+
}
|
|
607
|
+
for entity, score in raw_results
|
|
608
|
+
]
|
|
609
|
+
|
|
610
|
+
async def _pagerank_search(self, seed_entity_ids: Optional[List[str]], max_results: int) -> List[Dict[str, Any]]:
|
|
611
|
+
"""Perform PageRank search"""
|
|
612
|
+
if not seed_entity_ids:
|
|
613
|
+
return []
|
|
614
|
+
|
|
615
|
+
raw_results = await self.pagerank_strategy.retrieve(seed_entity_ids=seed_entity_ids, max_results=max_results)
|
|
616
|
+
|
|
617
|
+
return [
|
|
618
|
+
{
|
|
619
|
+
"entity_id": entity.id,
|
|
620
|
+
"entity_type": entity.entity_type,
|
|
621
|
+
"properties": entity.properties,
|
|
622
|
+
"score": score,
|
|
623
|
+
"score_type": "pagerank",
|
|
624
|
+
}
|
|
625
|
+
for entity, score in raw_results
|
|
626
|
+
]
|
|
627
|
+
|
|
628
|
+
async def _multihop_search(
|
|
629
|
+
self,
|
|
630
|
+
seed_entity_ids: Optional[List[str]],
|
|
631
|
+
max_depth: int,
|
|
632
|
+
max_results: int,
|
|
633
|
+
) -> List[Dict[str, Any]]:
|
|
634
|
+
"""Perform multi-hop retrieval"""
|
|
635
|
+
if not seed_entity_ids:
|
|
636
|
+
return []
|
|
637
|
+
|
|
638
|
+
raw_results = await self.multihop_strategy.retrieve(
|
|
639
|
+
seed_entity_ids=seed_entity_ids,
|
|
640
|
+
max_hops=max_depth,
|
|
641
|
+
max_results=max_results,
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
return [
|
|
645
|
+
{
|
|
646
|
+
"entity_id": entity.id,
|
|
647
|
+
"entity_type": entity.entity_type,
|
|
648
|
+
"properties": entity.properties,
|
|
649
|
+
"score": score,
|
|
650
|
+
"score_type": "hop_distance",
|
|
651
|
+
}
|
|
652
|
+
for entity, score in raw_results
|
|
653
|
+
]
|
|
654
|
+
|
|
655
|
+
async def _filtered_search(
|
|
656
|
+
self,
|
|
657
|
+
entity_type: Optional[str],
|
|
658
|
+
property_filters: Optional[Dict[str, Any]],
|
|
659
|
+
max_results: int,
|
|
660
|
+
) -> List[Dict[str, Any]]:
|
|
661
|
+
"""Perform filtered retrieval"""
|
|
662
|
+
raw_results = await self.filtered_strategy.retrieve(
|
|
663
|
+
entity_type=entity_type,
|
|
664
|
+
property_filters=property_filters,
|
|
665
|
+
max_results=max_results,
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
return [
|
|
669
|
+
{
|
|
670
|
+
"entity_id": entity.id,
|
|
671
|
+
"entity_type": entity.entity_type,
|
|
672
|
+
"properties": entity.properties,
|
|
673
|
+
"score": score,
|
|
674
|
+
}
|
|
675
|
+
for entity, score in raw_results
|
|
676
|
+
]
|
|
677
|
+
|
|
678
|
+
async def _traverse_search(
|
|
679
|
+
self,
|
|
680
|
+
seed_entity_ids: Optional[List[str]],
|
|
681
|
+
relation_types: Optional[List[str]],
|
|
682
|
+
max_depth: int,
|
|
683
|
+
max_results: int,
|
|
684
|
+
) -> List[Dict[str, Any]]:
|
|
685
|
+
"""Perform pattern-based traversal"""
|
|
686
|
+
if not seed_entity_ids:
|
|
687
|
+
return []
|
|
688
|
+
|
|
689
|
+
pattern = PathPattern(
|
|
690
|
+
relation_types=relation_types,
|
|
691
|
+
max_depth=max_depth,
|
|
692
|
+
allow_cycles=False,
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Get all paths from traversal
|
|
696
|
+
all_entities = {}
|
|
697
|
+
for seed_id in seed_entity_ids:
|
|
698
|
+
paths = await self.traversal_strategy.traverse_with_pattern(
|
|
699
|
+
start_entity_id=seed_id,
|
|
700
|
+
pattern=pattern,
|
|
701
|
+
max_results=max_results * 2,
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
# Extract unique entities
|
|
705
|
+
for path in paths:
|
|
706
|
+
for entity in path.nodes:
|
|
707
|
+
if entity.id not in all_entities:
|
|
708
|
+
# Score by path length (shorter is better)
|
|
709
|
+
all_entities[entity.id] = {
|
|
710
|
+
"entity": entity,
|
|
711
|
+
"score": 1.0 / (path.length + 1),
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
# Sort by score and take top results
|
|
715
|
+
sorted_entities = sorted(all_entities.values(), key=lambda x: x["score"], reverse=True)[:max_results]
|
|
716
|
+
|
|
717
|
+
return [
|
|
718
|
+
{
|
|
719
|
+
"entity_id": item["entity"].id,
|
|
720
|
+
"entity_type": item["entity"].entity_type,
|
|
721
|
+
"properties": item["entity"].properties,
|
|
722
|
+
"score": item["score"],
|
|
723
|
+
"score_type": "path_length",
|
|
724
|
+
}
|
|
725
|
+
for item in sorted_entities
|
|
726
|
+
]
|
|
727
|
+
|
|
728
|
+
async def _apply_reranking(
|
|
729
|
+
self,
|
|
730
|
+
results: List[Dict[str, Any]],
|
|
731
|
+
query: Optional[str],
|
|
732
|
+
query_embedding: Optional[List[float]],
|
|
733
|
+
strategy: str,
|
|
734
|
+
max_results: int,
|
|
735
|
+
) -> List[Dict[str, Any]]:
|
|
736
|
+
"""
|
|
737
|
+
Apply reranking to search results
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
results: Initial search results (list of dicts)
|
|
741
|
+
query: Query text
|
|
742
|
+
query_embedding: Query embedding vector
|
|
743
|
+
strategy: Reranking strategy name
|
|
744
|
+
max_results: Final number of results to return
|
|
745
|
+
|
|
746
|
+
Returns:
|
|
747
|
+
Reranked and limited results
|
|
748
|
+
"""
|
|
749
|
+
if not results:
|
|
750
|
+
return results
|
|
751
|
+
|
|
752
|
+
# Convert result dicts to Entity objects
|
|
753
|
+
entities = []
|
|
754
|
+
for result in results:
|
|
755
|
+
entity = Entity(
|
|
756
|
+
id=result["entity_id"],
|
|
757
|
+
entity_type=result["entity_type"],
|
|
758
|
+
properties=result["properties"],
|
|
759
|
+
embedding=result.get("embedding"), # May be None
|
|
760
|
+
)
|
|
761
|
+
entities.append(entity)
|
|
762
|
+
|
|
763
|
+
# Get reranker strategy
|
|
764
|
+
reranker_strategy = self._rerankers.get(strategy)
|
|
765
|
+
if not reranker_strategy:
|
|
766
|
+
# Fall back to text similarity if strategy not found
|
|
767
|
+
reranker_strategy = self._rerankers["text"]
|
|
768
|
+
|
|
769
|
+
# Create result reranker with single strategy
|
|
770
|
+
reranker = ResultReranker(
|
|
771
|
+
strategies=[reranker_strategy],
|
|
772
|
+
combination_method=ScoreCombinationMethod.WEIGHTED_AVERAGE,
|
|
773
|
+
weights={reranker_strategy.name: 1.0},
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# Rerank entities
|
|
777
|
+
reranked = await reranker.rerank(
|
|
778
|
+
query=query or "",
|
|
779
|
+
entities=entities,
|
|
780
|
+
top_k=max_results,
|
|
781
|
+
query_embedding=query_embedding,
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
# Convert back to result dicts
|
|
785
|
+
reranked_results = []
|
|
786
|
+
for entity, rerank_score in reranked:
|
|
787
|
+
# Find original result to preserve additional fields
|
|
788
|
+
original_result = next((r for r in results if r["entity_id"] == entity.id), None)
|
|
789
|
+
|
|
790
|
+
if original_result:
|
|
791
|
+
result_dict = original_result.copy()
|
|
792
|
+
# Update score with reranked score
|
|
793
|
+
result_dict["original_score"] = result_dict.get("score", 0.0)
|
|
794
|
+
result_dict["score"] = rerank_score
|
|
795
|
+
result_dict["rerank_score"] = rerank_score
|
|
796
|
+
reranked_results.append(result_dict)
|
|
797
|
+
|
|
798
|
+
return reranked_results
|
|
799
|
+
|
|
800
|
+
# Public methods for ToolExecutor integration
|
|
801
|
+
async def vector_search(
|
|
802
|
+
self,
|
|
803
|
+
query: Optional[str] = None,
|
|
804
|
+
query_embedding: Optional[List[float]] = None,
|
|
805
|
+
entity_type: Optional[str] = None,
|
|
806
|
+
max_results: int = 10,
|
|
807
|
+
vector_threshold: float = 0.0,
|
|
808
|
+
) -> Dict[str, Any]:
|
|
809
|
+
"""Vector similarity search (public method for ToolExecutor)"""
|
|
810
|
+
await self._initialize()
|
|
811
|
+
if query and not query_embedding:
|
|
812
|
+
query_embedding = [0.1] * 128 # Placeholder
|
|
813
|
+
if query_embedding is None:
|
|
814
|
+
raise ValueError("query_embedding is required for vector search")
|
|
815
|
+
results = await self._vector_search(query_embedding, entity_type, max_results, vector_threshold)
|
|
816
|
+
return {
|
|
817
|
+
"success": True,
|
|
818
|
+
"mode": "vector",
|
|
819
|
+
"num_results": len(results),
|
|
820
|
+
"results": results,
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
async def graph_search(
|
|
824
|
+
self,
|
|
825
|
+
seed_entity_ids: List[str],
|
|
826
|
+
max_depth: int = 2,
|
|
827
|
+
max_results: int = 10,
|
|
828
|
+
) -> Dict[str, Any]:
|
|
829
|
+
"""Graph structure search (public method for ToolExecutor)"""
|
|
830
|
+
await self._initialize()
|
|
831
|
+
results = await self._graph_search(seed_entity_ids, max_depth, max_results)
|
|
832
|
+
return {
|
|
833
|
+
"success": True,
|
|
834
|
+
"mode": "graph",
|
|
835
|
+
"num_results": len(results),
|
|
836
|
+
"results": results,
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
async def hybrid_search(
|
|
840
|
+
self,
|
|
841
|
+
query: Optional[str] = None,
|
|
842
|
+
query_embedding: Optional[List[float]] = None,
|
|
843
|
+
seed_entity_ids: Optional[List[str]] = None,
|
|
844
|
+
entity_type: Optional[str] = None,
|
|
845
|
+
max_results: int = 10,
|
|
846
|
+
max_depth: int = 2,
|
|
847
|
+
vector_weight: float = 0.6,
|
|
848
|
+
graph_weight: float = 0.4,
|
|
849
|
+
expand_results: bool = True,
|
|
850
|
+
vector_threshold: float = 0.0,
|
|
851
|
+
) -> Dict[str, Any]:
|
|
852
|
+
"""Hybrid search (public method for ToolExecutor)"""
|
|
853
|
+
await self._initialize()
|
|
854
|
+
if query and not query_embedding:
|
|
855
|
+
query_embedding = [0.1] * 128 # Placeholder
|
|
856
|
+
results = await self._hybrid_search(
|
|
857
|
+
query_embedding,
|
|
858
|
+
seed_entity_ids,
|
|
859
|
+
entity_type,
|
|
860
|
+
max_results,
|
|
861
|
+
max_depth,
|
|
862
|
+
vector_weight,
|
|
863
|
+
graph_weight,
|
|
864
|
+
expand_results,
|
|
865
|
+
vector_threshold,
|
|
866
|
+
)
|
|
867
|
+
return {
|
|
868
|
+
"success": True,
|
|
869
|
+
"mode": "hybrid",
|
|
870
|
+
"num_results": len(results),
|
|
871
|
+
"results": results,
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
async def pagerank_search(self, seed_entity_ids: List[str], max_results: int = 10) -> Dict[str, Any]:
|
|
875
|
+
"""PageRank search (public method for ToolExecutor)"""
|
|
876
|
+
await self._initialize()
|
|
877
|
+
results = await self._pagerank_search(seed_entity_ids, max_results)
|
|
878
|
+
return {
|
|
879
|
+
"success": True,
|
|
880
|
+
"mode": "pagerank",
|
|
881
|
+
"num_results": len(results),
|
|
882
|
+
"results": results,
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
async def multihop_search(
|
|
886
|
+
self,
|
|
887
|
+
seed_entity_ids: List[str],
|
|
888
|
+
max_depth: int = 2,
|
|
889
|
+
max_results: int = 10,
|
|
890
|
+
) -> Dict[str, Any]:
|
|
891
|
+
"""Multi-hop search (public method for ToolExecutor)"""
|
|
892
|
+
await self._initialize()
|
|
893
|
+
results = await self._multihop_search(seed_entity_ids, max_depth, max_results)
|
|
894
|
+
return {
|
|
895
|
+
"success": True,
|
|
896
|
+
"mode": "multihop",
|
|
897
|
+
"num_results": len(results),
|
|
898
|
+
"results": results,
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
async def filtered_search(
|
|
902
|
+
self,
|
|
903
|
+
entity_type: Optional[str] = None,
|
|
904
|
+
property_filters: Optional[Dict[str, Any]] = None,
|
|
905
|
+
max_results: int = 10,
|
|
906
|
+
) -> Dict[str, Any]:
|
|
907
|
+
"""Filtered search (public method for ToolExecutor)"""
|
|
908
|
+
await self._initialize()
|
|
909
|
+
results = await self._filtered_search(entity_type, property_filters, max_results)
|
|
910
|
+
return {
|
|
911
|
+
"success": True,
|
|
912
|
+
"mode": "filtered",
|
|
913
|
+
"num_results": len(results),
|
|
914
|
+
"results": results,
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
async def traverse_search(
|
|
918
|
+
self,
|
|
919
|
+
seed_entity_ids: List[str],
|
|
920
|
+
relation_types: Optional[List[str]] = None,
|
|
921
|
+
max_depth: int = 2,
|
|
922
|
+
max_results: int = 10,
|
|
923
|
+
) -> Dict[str, Any]:
|
|
924
|
+
"""Pattern-based traversal (public method for ToolExecutor)"""
|
|
925
|
+
await self._initialize()
|
|
926
|
+
results = await self._traverse_search(seed_entity_ids, relation_types, max_depth, max_results)
|
|
927
|
+
return {
|
|
928
|
+
"success": True,
|
|
929
|
+
"mode": "traverse",
|
|
930
|
+
"num_results": len(results),
|
|
931
|
+
"results": results,
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
async def execute(self, **kwargs) -> Dict[str, Any]:
|
|
935
|
+
"""
|
|
936
|
+
Execute the tool (public interface)
|
|
937
|
+
|
|
938
|
+
Args:
|
|
939
|
+
**kwargs: Tool input parameters
|
|
940
|
+
|
|
941
|
+
Returns:
|
|
942
|
+
Dictionary with search results
|
|
943
|
+
"""
|
|
944
|
+
return await self._execute(**kwargs)
|