aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hybrid Search Strategy
|
|
3
|
+
|
|
4
|
+
Combines vector similarity search with graph structure traversal
|
|
5
|
+
to provide enhanced search results.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import List, Optional, Dict, Tuple
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
13
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
14
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SearchMode(str, Enum):
|
|
20
|
+
"""Search mode for hybrid search"""
|
|
21
|
+
|
|
22
|
+
VECTOR_ONLY = "vector_only"
|
|
23
|
+
GRAPH_ONLY = "graph_only"
|
|
24
|
+
HYBRID = "hybrid"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class HybridSearchConfig(BaseModel):
|
|
28
|
+
"""
|
|
29
|
+
Configuration for hybrid search
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
mode: Search mode (vector_only, graph_only, hybrid)
|
|
33
|
+
vector_weight: Weight for vector similarity scores (0.0-1.0)
|
|
34
|
+
graph_weight: Weight for graph structure scores (0.0-1.0)
|
|
35
|
+
max_results: Maximum number of results to return
|
|
36
|
+
vector_threshold: Minimum similarity threshold for vector search
|
|
37
|
+
max_graph_depth: Maximum depth for graph traversal
|
|
38
|
+
expand_results: Whether to expand vector results with graph neighbors
|
|
39
|
+
min_combined_score: Minimum combined score threshold
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
mode: SearchMode = Field(default=SearchMode.HYBRID, description="Search mode")
|
|
43
|
+
|
|
44
|
+
vector_weight: float = Field(
|
|
45
|
+
default=0.6,
|
|
46
|
+
ge=0.0,
|
|
47
|
+
le=1.0,
|
|
48
|
+
description="Weight for vector similarity scores",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
graph_weight: float = Field(
|
|
52
|
+
default=0.4,
|
|
53
|
+
ge=0.0,
|
|
54
|
+
le=1.0,
|
|
55
|
+
description="Weight for graph structure scores",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
max_results: int = Field(default=10, ge=1, description="Maximum number of results")
|
|
59
|
+
|
|
60
|
+
vector_threshold: float = Field(
|
|
61
|
+
default=0.0,
|
|
62
|
+
ge=0.0,
|
|
63
|
+
le=1.0,
|
|
64
|
+
description="Minimum similarity threshold for vector search",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
max_graph_depth: int = Field(default=2, ge=1, le=5, description="Maximum depth for graph traversal")
|
|
68
|
+
|
|
69
|
+
expand_results: bool = Field(
|
|
70
|
+
default=True,
|
|
71
|
+
description="Whether to expand vector results with graph neighbors",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
min_combined_score: float = Field(
|
|
75
|
+
default=0.0,
|
|
76
|
+
ge=0.0,
|
|
77
|
+
le=1.0,
|
|
78
|
+
description="Minimum combined score threshold",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
entity_type_filter: Optional[str] = Field(default=None, description="Optional entity type filter")
|
|
82
|
+
|
|
83
|
+
class Config:
|
|
84
|
+
use_enum_values = True
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class HybridSearchStrategy:
|
|
88
|
+
"""
|
|
89
|
+
Hybrid Search Strategy
|
|
90
|
+
|
|
91
|
+
Combines vector similarity search with graph structure traversal
|
|
92
|
+
to provide enhanced search results that leverage both semantic
|
|
93
|
+
similarity and structural relationships.
|
|
94
|
+
|
|
95
|
+
Search Modes:
|
|
96
|
+
- VECTOR_ONLY: Pure vector similarity search
|
|
97
|
+
- GRAPH_ONLY: Pure graph traversal from seed entities
|
|
98
|
+
- HYBRID: Combines both approaches with weighted scoring
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
```python
|
|
102
|
+
strategy = HybridSearchStrategy(graph_store)
|
|
103
|
+
|
|
104
|
+
config = HybridSearchConfig(
|
|
105
|
+
mode=SearchMode.HYBRID,
|
|
106
|
+
vector_weight=0.6,
|
|
107
|
+
graph_weight=0.4,
|
|
108
|
+
max_results=10,
|
|
109
|
+
expand_results=True
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
results = await strategy.search(
|
|
113
|
+
query_embedding=[0.1, 0.2, ...],
|
|
114
|
+
config=config
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
for entity, score in results:
|
|
118
|
+
print(f"{entity.id}: {score:.3f}")
|
|
119
|
+
```
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(self, graph_store: GraphStore):
|
|
123
|
+
"""
|
|
124
|
+
Initialize hybrid search strategy
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
graph_store: Graph storage backend
|
|
128
|
+
"""
|
|
129
|
+
self.graph_store = graph_store
|
|
130
|
+
|
|
131
|
+
async def search(
|
|
132
|
+
self,
|
|
133
|
+
query_embedding: List[float],
|
|
134
|
+
config: Optional[HybridSearchConfig] = None,
|
|
135
|
+
seed_entity_ids: Optional[List[str]] = None,
|
|
136
|
+
) -> List[Tuple[Entity, float]]:
|
|
137
|
+
"""
|
|
138
|
+
Perform hybrid search
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
query_embedding: Query vector embedding
|
|
142
|
+
config: Search configuration (uses defaults if None)
|
|
143
|
+
seed_entity_ids: Optional seed entities for graph traversal
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List of (entity, score) tuples sorted by score descending
|
|
147
|
+
"""
|
|
148
|
+
if config is None:
|
|
149
|
+
config = HybridSearchConfig()
|
|
150
|
+
|
|
151
|
+
if config.mode == SearchMode.VECTOR_ONLY:
|
|
152
|
+
return await self._vector_search(query_embedding, config)
|
|
153
|
+
elif config.mode == SearchMode.GRAPH_ONLY:
|
|
154
|
+
if not seed_entity_ids:
|
|
155
|
+
# If no seeds provided, try vector search to find seeds
|
|
156
|
+
if query_embedding:
|
|
157
|
+
try:
|
|
158
|
+
vector_results = await self._vector_search(query_embedding, config, max_results=5)
|
|
159
|
+
seed_entity_ids = [entity.id for entity, _ in vector_results]
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.warning(f"Vector search failed while trying to find seed entities for graph search: {e}")
|
|
162
|
+
seed_entity_ids = []
|
|
163
|
+
else:
|
|
164
|
+
logger.warning("No seed entities provided and no query embedding available for graph search")
|
|
165
|
+
seed_entity_ids = []
|
|
166
|
+
|
|
167
|
+
if not seed_entity_ids:
|
|
168
|
+
logger.warning("No seed entities available for graph-only search, returning empty results")
|
|
169
|
+
return []
|
|
170
|
+
|
|
171
|
+
return await self._graph_search(seed_entity_ids, config)
|
|
172
|
+
else: # HYBRID
|
|
173
|
+
return await self._hybrid_search(query_embedding, config, seed_entity_ids)
|
|
174
|
+
|
|
175
|
+
async def _vector_search(
|
|
176
|
+
self,
|
|
177
|
+
query_embedding: List[float],
|
|
178
|
+
config: HybridSearchConfig,
|
|
179
|
+
max_results: Optional[int] = None,
|
|
180
|
+
) -> List[Tuple[Entity, float]]:
|
|
181
|
+
"""
|
|
182
|
+
Perform vector similarity search
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
query_embedding: Query vector
|
|
186
|
+
config: Search configuration
|
|
187
|
+
max_results: Optional override for max results
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
List of (entity, score) tuples
|
|
191
|
+
"""
|
|
192
|
+
results = await self.graph_store.vector_search(
|
|
193
|
+
query_embedding=query_embedding,
|
|
194
|
+
entity_type=config.entity_type_filter,
|
|
195
|
+
max_results=max_results or config.max_results,
|
|
196
|
+
score_threshold=config.vector_threshold,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return results
|
|
200
|
+
|
|
201
|
+
async def _graph_search(self, seed_entity_ids: List[str], config: HybridSearchConfig) -> List[Tuple[Entity, float]]:
|
|
202
|
+
"""
|
|
203
|
+
Perform graph structure search from seed entities
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
seed_entity_ids: Starting entities for traversal
|
|
207
|
+
config: Search configuration
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
List of (entity, score) tuples
|
|
211
|
+
"""
|
|
212
|
+
# Collect entities from graph traversal
|
|
213
|
+
entity_scores: Dict[str, float] = {}
|
|
214
|
+
|
|
215
|
+
for seed_id in seed_entity_ids:
|
|
216
|
+
# Get neighbors at different depths
|
|
217
|
+
current_entities = {seed_id}
|
|
218
|
+
visited = set()
|
|
219
|
+
|
|
220
|
+
for depth in range(config.max_graph_depth):
|
|
221
|
+
next_entities = set()
|
|
222
|
+
|
|
223
|
+
for entity_id in current_entities:
|
|
224
|
+
if entity_id in visited:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
visited.add(entity_id)
|
|
228
|
+
|
|
229
|
+
# Score decreases with depth
|
|
230
|
+
depth_score = 1.0 / (depth + 1)
|
|
231
|
+
|
|
232
|
+
# Update score (take max if entity seen from multiple
|
|
233
|
+
# paths)
|
|
234
|
+
if entity_id not in entity_scores:
|
|
235
|
+
entity_scores[entity_id] = depth_score
|
|
236
|
+
else:
|
|
237
|
+
entity_scores[entity_id] = max(entity_scores[entity_id], depth_score)
|
|
238
|
+
|
|
239
|
+
# Get neighbors for next depth
|
|
240
|
+
neighbors = await self.graph_store.get_neighbors(entity_id, direction="outgoing")
|
|
241
|
+
|
|
242
|
+
for neighbor in neighbors:
|
|
243
|
+
if neighbor.id not in visited:
|
|
244
|
+
next_entities.add(neighbor.id)
|
|
245
|
+
|
|
246
|
+
current_entities = next_entities
|
|
247
|
+
|
|
248
|
+
if not current_entities:
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
# Retrieve entities and create result list
|
|
252
|
+
results = []
|
|
253
|
+
for entity_id, score in entity_scores.items():
|
|
254
|
+
entity = await self.graph_store.get_entity(entity_id)
|
|
255
|
+
if entity:
|
|
256
|
+
# Apply entity type filter if specified
|
|
257
|
+
if config.entity_type_filter:
|
|
258
|
+
if entity.entity_type == config.entity_type_filter:
|
|
259
|
+
results.append((entity, score))
|
|
260
|
+
else:
|
|
261
|
+
results.append((entity, score))
|
|
262
|
+
|
|
263
|
+
# Sort by score descending
|
|
264
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
265
|
+
|
|
266
|
+
# Return top results
|
|
267
|
+
return results[: config.max_results]
|
|
268
|
+
|
|
269
|
+
async def _hybrid_search(
|
|
270
|
+
self,
|
|
271
|
+
query_embedding: List[float],
|
|
272
|
+
config: HybridSearchConfig,
|
|
273
|
+
seed_entity_ids: Optional[List[str]] = None,
|
|
274
|
+
) -> List[Tuple[Entity, float]]:
|
|
275
|
+
"""
|
|
276
|
+
Perform hybrid search combining vector and graph
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
query_embedding: Query vector
|
|
280
|
+
config: Search configuration
|
|
281
|
+
seed_entity_ids: Optional seed entities
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
List of (entity, score) tuples with combined scores
|
|
285
|
+
"""
|
|
286
|
+
# Step 1: Vector search with fallback to graph-only
|
|
287
|
+
vector_results = []
|
|
288
|
+
vector_scores: Dict[str, float] = {}
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
vector_results = await self._vector_search(
|
|
292
|
+
query_embedding,
|
|
293
|
+
config,
|
|
294
|
+
max_results=config.max_results * 2, # Get more for expansion
|
|
295
|
+
)
|
|
296
|
+
# Create score dictionaries
|
|
297
|
+
vector_scores = {entity.id: score for entity, score in vector_results}
|
|
298
|
+
|
|
299
|
+
except Exception as e:
|
|
300
|
+
logger.warning(
|
|
301
|
+
f"Vector search failed, falling back to graph-only search: {e}",
|
|
302
|
+
exc_info=True
|
|
303
|
+
)
|
|
304
|
+
# Fallback to graph-only search if vector search fails
|
|
305
|
+
if seed_entity_ids:
|
|
306
|
+
logger.info("Using graph-only search with provided seed entities")
|
|
307
|
+
return await self._graph_search(seed_entity_ids, config)
|
|
308
|
+
else:
|
|
309
|
+
logger.warning("No seed entities available for graph-only fallback, returning empty results")
|
|
310
|
+
return []
|
|
311
|
+
|
|
312
|
+
# Step 2: Graph expansion (if enabled)
|
|
313
|
+
graph_scores: Dict[str, float] = {}
|
|
314
|
+
|
|
315
|
+
if config.expand_results:
|
|
316
|
+
try:
|
|
317
|
+
# Use top vector results as seeds
|
|
318
|
+
seeds = seed_entity_ids or [entity.id for entity, _ in vector_results[:5]]
|
|
319
|
+
|
|
320
|
+
graph_results = await self._graph_search(seeds, config)
|
|
321
|
+
graph_scores = {entity.id: score for entity, score in graph_results}
|
|
322
|
+
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(
|
|
325
|
+
f"Graph expansion failed, continuing with vector results only: {e}",
|
|
326
|
+
exc_info=True
|
|
327
|
+
)
|
|
328
|
+
# Continue with vector results only if graph expansion fails
|
|
329
|
+
|
|
330
|
+
# Step 3: Combine scores
|
|
331
|
+
combined_scores = await self._combine_scores(vector_scores, graph_scores, config)
|
|
332
|
+
|
|
333
|
+
# Step 4: Retrieve entities and create results
|
|
334
|
+
results = []
|
|
335
|
+
for entity_id, combined_score in combined_scores.items():
|
|
336
|
+
# Apply minimum score threshold
|
|
337
|
+
if combined_score < config.min_combined_score:
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
entity = await self.graph_store.get_entity(entity_id)
|
|
342
|
+
if entity:
|
|
343
|
+
results.append((entity, combined_score))
|
|
344
|
+
except Exception as e:
|
|
345
|
+
logger.warning(f"Failed to retrieve entity {entity_id}: {e}")
|
|
346
|
+
# Continue with other entities
|
|
347
|
+
|
|
348
|
+
# Sort by combined score descending
|
|
349
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
350
|
+
|
|
351
|
+
# Return top results
|
|
352
|
+
return results[: config.max_results]
|
|
353
|
+
|
|
354
|
+
async def _combine_scores(
|
|
355
|
+
self,
|
|
356
|
+
vector_scores: Dict[str, float],
|
|
357
|
+
graph_scores: Dict[str, float],
|
|
358
|
+
config: HybridSearchConfig,
|
|
359
|
+
) -> Dict[str, float]:
|
|
360
|
+
"""
|
|
361
|
+
Combine vector and graph scores with weighted averaging
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
vector_scores: Entity ID to vector similarity score
|
|
365
|
+
graph_scores: Entity ID to graph structure score
|
|
366
|
+
config: Search configuration
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Combined scores dictionary
|
|
370
|
+
"""
|
|
371
|
+
# Normalize weights
|
|
372
|
+
total_weight = config.vector_weight + config.graph_weight
|
|
373
|
+
if total_weight == 0:
|
|
374
|
+
total_weight = 1.0
|
|
375
|
+
|
|
376
|
+
norm_vector_weight = config.vector_weight / total_weight
|
|
377
|
+
norm_graph_weight = config.graph_weight / total_weight
|
|
378
|
+
|
|
379
|
+
# Get all entity IDs
|
|
380
|
+
all_entity_ids = set(vector_scores.keys()) | set(graph_scores.keys())
|
|
381
|
+
|
|
382
|
+
# Combine scores
|
|
383
|
+
combined: Dict[str, float] = {}
|
|
384
|
+
|
|
385
|
+
for entity_id in all_entity_ids:
|
|
386
|
+
v_score = vector_scores.get(entity_id, 0.0)
|
|
387
|
+
g_score = graph_scores.get(entity_id, 0.0)
|
|
388
|
+
|
|
389
|
+
# Weighted combination
|
|
390
|
+
combined[entity_id] = v_score * norm_vector_weight + g_score * norm_graph_weight
|
|
391
|
+
|
|
392
|
+
return combined
|
|
393
|
+
|
|
394
|
+
async def search_with_expansion(
|
|
395
|
+
self,
|
|
396
|
+
query_embedding: List[float],
|
|
397
|
+
config: Optional[HybridSearchConfig] = None,
|
|
398
|
+
include_paths: bool = False,
|
|
399
|
+
) -> Tuple[List[Tuple[Entity, float]], Optional[List[Path]]]:
|
|
400
|
+
"""
|
|
401
|
+
Search with result expansion and optional path tracking
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
query_embedding: Query vector
|
|
405
|
+
config: Search configuration
|
|
406
|
+
include_paths: Whether to include paths to results
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
Tuple of (results, paths) where paths is None if not requested
|
|
410
|
+
"""
|
|
411
|
+
if config is None:
|
|
412
|
+
config = HybridSearchConfig()
|
|
413
|
+
|
|
414
|
+
# Perform search
|
|
415
|
+
results = await self.search(query_embedding, config)
|
|
416
|
+
|
|
417
|
+
paths = None
|
|
418
|
+
if include_paths and config.expand_results:
|
|
419
|
+
# Find paths from top vector results to expanded results
|
|
420
|
+
paths = await self._find_result_paths(results, config)
|
|
421
|
+
|
|
422
|
+
return results, paths
|
|
423
|
+
|
|
424
|
+
async def _find_result_paths(self, results: List[Tuple[Entity, float]], config: HybridSearchConfig) -> List[Path]:
|
|
425
|
+
"""
|
|
426
|
+
Find paths between top results
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
results: Search results
|
|
430
|
+
config: Search configuration
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
List of paths connecting results
|
|
434
|
+
"""
|
|
435
|
+
if len(results) < 2:
|
|
436
|
+
return []
|
|
437
|
+
|
|
438
|
+
paths = []
|
|
439
|
+
|
|
440
|
+
# Find paths between top results
|
|
441
|
+
for i in range(min(3, len(results))):
|
|
442
|
+
source_id = results[i][0].id
|
|
443
|
+
|
|
444
|
+
for j in range(i + 1, min(i + 4, len(results))):
|
|
445
|
+
target_id = results[j][0].id
|
|
446
|
+
|
|
447
|
+
# Find paths between these entities
|
|
448
|
+
found_paths = await self.graph_store.find_paths(
|
|
449
|
+
source_entity_id=source_id,
|
|
450
|
+
target_entity_id=target_id,
|
|
451
|
+
max_depth=config.max_graph_depth,
|
|
452
|
+
max_paths=2,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
paths.extend(found_paths)
|
|
456
|
+
|
|
457
|
+
return paths
|