aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,837 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph Store Base Interface
|
|
3
|
+
|
|
4
|
+
Two-tier abstract interface for graph storage backends:
|
|
5
|
+
- Tier 1 (Basic): Must implement - core CRUD operations
|
|
6
|
+
- Tier 2 (Advanced): Has defaults, can optimize - complex queries
|
|
7
|
+
|
|
8
|
+
This design allows minimal adapters (Tier 1 only) to work immediately,
|
|
9
|
+
while backends can optimize Tier 2 methods for better performance.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from typing import List, Optional, Set
|
|
14
|
+
from collections import deque
|
|
15
|
+
|
|
16
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
17
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
18
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
19
|
+
from aiecs.domain.knowledge_graph.models.query import (
|
|
20
|
+
GraphQuery,
|
|
21
|
+
GraphResult,
|
|
22
|
+
QueryType,
|
|
23
|
+
)
|
|
24
|
+
from aiecs.infrastructure.graph_storage.tenant import TenantContext
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class GraphStore(ABC):
|
|
28
|
+
"""
|
|
29
|
+
Abstract Graph Storage Interface
|
|
30
|
+
|
|
31
|
+
Two-tier design:
|
|
32
|
+
|
|
33
|
+
**Tier 1 - Basic Interface (MUST IMPLEMENT)**:
|
|
34
|
+
- add_entity() - Add entity to graph
|
|
35
|
+
- get_entity() - Get entity by ID
|
|
36
|
+
- add_relation() - Add relation to graph
|
|
37
|
+
- get_relation() - Get relation by ID
|
|
38
|
+
- get_neighbors() - Get neighboring entities
|
|
39
|
+
- initialize() - Initialize storage
|
|
40
|
+
- close() - Close storage connection
|
|
41
|
+
|
|
42
|
+
**Tier 2 - Advanced Interface (HAS DEFAULTS, CAN OPTIMIZE)**:
|
|
43
|
+
- traverse() - Multi-hop graph traversal
|
|
44
|
+
- find_paths() - Find paths between entities
|
|
45
|
+
- subgraph_query() - Extract subgraph
|
|
46
|
+
- vector_search() - Semantic vector search
|
|
47
|
+
- execute_query() - Execute GraphQuery
|
|
48
|
+
- clear() - Clear all data (tenant-scoped if context provided)
|
|
49
|
+
|
|
50
|
+
**Multi-Tenancy Support**:
|
|
51
|
+
All methods accept an optional `context: Optional[TenantContext]` parameter
|
|
52
|
+
for multi-tenant data isolation. When provided, operations are scoped to
|
|
53
|
+
the specified tenant. When None, operations work on the global namespace
|
|
54
|
+
(backward compatible with single-tenant deployments).
|
|
55
|
+
|
|
56
|
+
Implementations only need to provide Tier 1 methods. Tier 2 methods
|
|
57
|
+
have default implementations using Tier 1, but can be overridden for
|
|
58
|
+
performance optimization (e.g., using SQL recursive CTEs, Cypher queries).
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
```python
|
|
62
|
+
# Minimal implementation (Tier 1 only)
|
|
63
|
+
class CustomGraphStore(GraphStore):
|
|
64
|
+
async def add_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
|
|
65
|
+
# Your implementation
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# ... implement other Tier 1 methods
|
|
69
|
+
# Tier 2 methods work automatically!
|
|
70
|
+
|
|
71
|
+
# Optimized implementation (override Tier 2)
|
|
72
|
+
class OptimizedGraphStore(CustomGraphStore):
|
|
73
|
+
async def traverse(self, ..., context: Optional[TenantContext] = None):
|
|
74
|
+
# Use database-specific optimization
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
# Multi-tenant usage
|
|
78
|
+
context = TenantContext(tenant_id="acme-corp")
|
|
79
|
+
await store.add_entity(entity, context=context)
|
|
80
|
+
entities = await store.vector_search(embedding, context=context)
|
|
81
|
+
```
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
# =========================================================================
|
|
85
|
+
# TIER 1: BASIC INTERFACE - MUST IMPLEMENT
|
|
86
|
+
# =========================================================================
|
|
87
|
+
|
|
88
|
+
@abstractmethod
|
|
89
|
+
async def initialize(self) -> None:
|
|
90
|
+
"""
|
|
91
|
+
Initialize the graph storage backend
|
|
92
|
+
|
|
93
|
+
Called once before using the store. Use this to:
|
|
94
|
+
- Create database connections
|
|
95
|
+
- Initialize data structures
|
|
96
|
+
- Create tables/indexes
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
async def close(self) -> None:
|
|
101
|
+
"""
|
|
102
|
+
Close the graph storage backend and cleanup resources
|
|
103
|
+
|
|
104
|
+
Called when shutting down. Use this to:
|
|
105
|
+
- Close database connections
|
|
106
|
+
- Flush pending writes
|
|
107
|
+
- Cleanup resources
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
async def add_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
|
|
112
|
+
"""
|
|
113
|
+
Add an entity to the graph
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
entity: Entity to add
|
|
117
|
+
context: Optional tenant context for multi-tenant isolation
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: If entity with same ID already exists
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
async def get_entity(self, entity_id: str, context: Optional[TenantContext] = None) -> Optional[Entity]:
|
|
125
|
+
"""
|
|
126
|
+
Get an entity by ID
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
entity_id: Entity ID to retrieve
|
|
130
|
+
context: Optional tenant context for multi-tenant isolation
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Entity if found, None otherwise
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
@abstractmethod
|
|
137
|
+
async def add_relation(self, relation: Relation, context: Optional[TenantContext] = None) -> None:
|
|
138
|
+
"""
|
|
139
|
+
Add a relation to the graph
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
relation: Relation to add
|
|
143
|
+
context: Optional tenant context for multi-tenant isolation
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
ValueError: If relation with same ID already exists
|
|
147
|
+
ValueError: If source or target entity doesn't exist
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
@abstractmethod
|
|
151
|
+
async def get_relation(self, relation_id: str, context: Optional[TenantContext] = None) -> Optional[Relation]:
|
|
152
|
+
"""
|
|
153
|
+
Get a relation by ID
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
relation_id: Relation ID to retrieve
|
|
157
|
+
context: Optional tenant context for multi-tenant isolation
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Relation if found, None otherwise
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
@abstractmethod
|
|
164
|
+
async def get_neighbors(
|
|
165
|
+
self,
|
|
166
|
+
entity_id: str,
|
|
167
|
+
relation_type: Optional[str] = None,
|
|
168
|
+
direction: str = "outgoing",
|
|
169
|
+
context: Optional[TenantContext] = None,
|
|
170
|
+
) -> List[Entity]:
|
|
171
|
+
"""
|
|
172
|
+
Get neighboring entities connected by relations
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
entity_id: ID of entity to get neighbors for
|
|
176
|
+
relation_type: Optional filter by relation type
|
|
177
|
+
direction: "outgoing", "incoming", or "both"
|
|
178
|
+
context: Optional tenant context for multi-tenant isolation
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of neighboring entities
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
# =========================================================================
|
|
185
|
+
# BULK OPERATIONS - Default implementations (can be optimized)
|
|
186
|
+
# =========================================================================
|
|
187
|
+
|
|
188
|
+
async def add_entities_bulk(self, entities: List[Entity]) -> int:
|
|
189
|
+
"""
|
|
190
|
+
Add multiple entities in bulk.
|
|
191
|
+
|
|
192
|
+
Default implementation calls add_entity() for each entity.
|
|
193
|
+
Override for better performance with database-specific bulk inserts.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
entities: List of entities to add
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Number of entities successfully added
|
|
200
|
+
"""
|
|
201
|
+
added = 0
|
|
202
|
+
for entity in entities:
|
|
203
|
+
try:
|
|
204
|
+
await self.add_entity(entity)
|
|
205
|
+
added += 1
|
|
206
|
+
except ValueError:
|
|
207
|
+
# Entity already exists, skip
|
|
208
|
+
pass
|
|
209
|
+
return added
|
|
210
|
+
|
|
211
|
+
async def add_relations_bulk(self, relations: List[Relation]) -> int:
|
|
212
|
+
"""
|
|
213
|
+
Add multiple relations in bulk.
|
|
214
|
+
|
|
215
|
+
Default implementation calls add_relation() for each relation.
|
|
216
|
+
Override for better performance with database-specific bulk inserts.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
relations: List of relations to add
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Number of relations successfully added
|
|
223
|
+
"""
|
|
224
|
+
added = 0
|
|
225
|
+
for relation in relations:
|
|
226
|
+
try:
|
|
227
|
+
await self.add_relation(relation)
|
|
228
|
+
added += 1
|
|
229
|
+
except ValueError:
|
|
230
|
+
# Relation already exists or entities don't exist, skip
|
|
231
|
+
pass
|
|
232
|
+
return added
|
|
233
|
+
|
|
234
|
+
# =========================================================================
|
|
235
|
+
# TIER 2: ADVANCED INTERFACE - HAS DEFAULTS (Template Method Pattern)
|
|
236
|
+
# =========================================================================
|
|
237
|
+
|
|
238
|
+
async def traverse(
|
|
239
|
+
self,
|
|
240
|
+
start_entity_id: str,
|
|
241
|
+
relation_type: Optional[str] = None,
|
|
242
|
+
max_depth: int = 3,
|
|
243
|
+
max_results: int = 100,
|
|
244
|
+
context: Optional[TenantContext] = None,
|
|
245
|
+
) -> List[Path]:
|
|
246
|
+
"""
|
|
247
|
+
Traverse the graph starting from an entity (BFS traversal)
|
|
248
|
+
|
|
249
|
+
**DEFAULT IMPLEMENTATION**: Uses get_neighbors() in BFS pattern.
|
|
250
|
+
Override for better performance (e.g., recursive CTEs in SQL).
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
start_entity_id: Starting entity ID
|
|
254
|
+
relation_type: Optional filter by relation type
|
|
255
|
+
max_depth: Maximum traversal depth
|
|
256
|
+
max_results: Maximum number of paths to return
|
|
257
|
+
context: Optional tenant context for multi-tenant isolation
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
List of paths found during traversal
|
|
261
|
+
"""
|
|
262
|
+
return await self._default_traverse_bfs(start_entity_id, relation_type, max_depth, max_results, context)
|
|
263
|
+
|
|
264
|
+
async def find_paths(
|
|
265
|
+
self,
|
|
266
|
+
source_entity_id: str,
|
|
267
|
+
target_entity_id: str,
|
|
268
|
+
max_depth: int = 5,
|
|
269
|
+
max_paths: int = 10,
|
|
270
|
+
context: Optional[TenantContext] = None,
|
|
271
|
+
) -> List[Path]:
|
|
272
|
+
"""
|
|
273
|
+
Find paths between two entities
|
|
274
|
+
|
|
275
|
+
**DEFAULT IMPLEMENTATION**: Uses traverse() with early stopping.
|
|
276
|
+
Override for better performance (e.g., bidirectional search).
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
source_entity_id: Source entity ID
|
|
280
|
+
target_entity_id: Target entity ID
|
|
281
|
+
max_depth: Maximum path length
|
|
282
|
+
max_paths: Maximum number of paths to return
|
|
283
|
+
context: Optional tenant context for multi-tenant isolation
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
List of paths between source and target
|
|
287
|
+
"""
|
|
288
|
+
return await self._default_find_paths(source_entity_id, target_entity_id, max_depth, max_paths, context)
|
|
289
|
+
|
|
290
|
+
async def subgraph_query(
|
|
291
|
+
self,
|
|
292
|
+
entity_ids: List[str],
|
|
293
|
+
include_relations: bool = True,
|
|
294
|
+
context: Optional[TenantContext] = None,
|
|
295
|
+
) -> tuple[List[Entity], List[Relation]]:
|
|
296
|
+
"""
|
|
297
|
+
Extract a subgraph containing specified entities
|
|
298
|
+
|
|
299
|
+
**DEFAULT IMPLEMENTATION**: Uses get_entity() and get_neighbors().
|
|
300
|
+
Override for better performance (e.g., single JOIN query).
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
entity_ids: List of entity IDs to include
|
|
304
|
+
include_relations: Whether to include relations between entities
|
|
305
|
+
context: Optional tenant context for multi-tenant isolation
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Tuple of (entities, relations)
|
|
309
|
+
"""
|
|
310
|
+
return await self._default_subgraph_query(entity_ids, include_relations, context)
|
|
311
|
+
|
|
312
|
+
async def get_all_entities(
|
|
313
|
+
self,
|
|
314
|
+
entity_type: Optional[str] = None,
|
|
315
|
+
limit: Optional[int] = None,
|
|
316
|
+
offset: int = 0,
|
|
317
|
+
context: Optional[TenantContext] = None,
|
|
318
|
+
) -> List[Entity]:
|
|
319
|
+
"""
|
|
320
|
+
Get all entities in the graph store
|
|
321
|
+
|
|
322
|
+
**DEFAULT IMPLEMENTATION**: Uses entity enumeration.
|
|
323
|
+
Override for better performance (e.g., database cursors, streaming).
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
entity_type: Optional filter by entity type
|
|
327
|
+
limit: Optional maximum number of entities to return
|
|
328
|
+
offset: Number of entities to skip (for pagination)
|
|
329
|
+
context: Optional tenant context for multi-tenant isolation
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
List of entities matching the criteria
|
|
333
|
+
|
|
334
|
+
Example:
|
|
335
|
+
# Get all entities
|
|
336
|
+
all_entities = await store.get_all_entities()
|
|
337
|
+
|
|
338
|
+
# Get first 100 Person entities
|
|
339
|
+
people = await store.get_all_entities(entity_type="Person", limit=100)
|
|
340
|
+
|
|
341
|
+
# Get next page (pagination)
|
|
342
|
+
next_page = await store.get_all_entities(entity_type="Person", limit=100, offset=100)
|
|
343
|
+
"""
|
|
344
|
+
return await self._default_get_all_entities(entity_type, limit, offset, context)
|
|
345
|
+
|
|
346
|
+
async def vector_search(
|
|
347
|
+
self,
|
|
348
|
+
query_embedding: List[float],
|
|
349
|
+
entity_type: Optional[str] = None,
|
|
350
|
+
max_results: int = 10,
|
|
351
|
+
score_threshold: float = 0.0,
|
|
352
|
+
context: Optional[TenantContext] = None,
|
|
353
|
+
) -> List[tuple[Entity, float]]:
|
|
354
|
+
"""
|
|
355
|
+
Semantic vector search over entities
|
|
356
|
+
|
|
357
|
+
**DEFAULT IMPLEMENTATION**: Brute-force cosine similarity.
|
|
358
|
+
Override for better performance (e.g., pgvector, FAISS, ANN indexes).
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
query_embedding: Query vector embedding
|
|
362
|
+
entity_type: Optional filter by entity type
|
|
363
|
+
max_results: Maximum number of results
|
|
364
|
+
score_threshold: Minimum similarity score (0.0-1.0)
|
|
365
|
+
context: Optional tenant context for multi-tenant isolation
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
List of (entity, score) tuples, sorted by score descending
|
|
369
|
+
"""
|
|
370
|
+
return await self._default_vector_search(query_embedding, entity_type, max_results, score_threshold, context)
|
|
371
|
+
|
|
372
|
+
async def text_search(
|
|
373
|
+
self,
|
|
374
|
+
query_text: str,
|
|
375
|
+
entity_type: Optional[str] = None,
|
|
376
|
+
max_results: int = 10,
|
|
377
|
+
score_threshold: float = 0.0,
|
|
378
|
+
method: str = "bm25",
|
|
379
|
+
context: Optional[TenantContext] = None,
|
|
380
|
+
) -> List[tuple[Entity, float]]:
|
|
381
|
+
"""
|
|
382
|
+
Text-based search over entities using text similarity
|
|
383
|
+
|
|
384
|
+
**DEFAULT IMPLEMENTATION**: Uses text similarity utilities (BM25, Jaccard, etc.).
|
|
385
|
+
Override for better performance (e.g., full-text search indexes).
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
query_text: Query text string
|
|
389
|
+
entity_type: Optional filter by entity type
|
|
390
|
+
max_results: Maximum number of results
|
|
391
|
+
score_threshold: Minimum similarity score (0.0-1.0)
|
|
392
|
+
method: Similarity method ("bm25", "jaccard", "cosine", "levenshtein")
|
|
393
|
+
context: Optional tenant context for multi-tenant isolation
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
List of (entity, score) tuples, sorted by score descending
|
|
397
|
+
"""
|
|
398
|
+
return await self._default_text_search(query_text, entity_type, max_results, score_threshold, method, context)
|
|
399
|
+
|
|
400
|
+
async def execute_query(self, query: GraphQuery, context: Optional[TenantContext] = None) -> GraphResult:
|
|
401
|
+
"""
|
|
402
|
+
Execute a graph query
|
|
403
|
+
|
|
404
|
+
**DEFAULT IMPLEMENTATION**: Routes to appropriate methods based on query type.
|
|
405
|
+
Override for custom query execution logic.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
query: Graph query to execute
|
|
409
|
+
context: Optional tenant context for multi-tenant isolation
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Query results
|
|
413
|
+
"""
|
|
414
|
+
return await self._default_execute_query(query, context)
|
|
415
|
+
|
|
416
|
+
async def clear(self, context: Optional[TenantContext] = None) -> None:
|
|
417
|
+
"""
|
|
418
|
+
Clear all data from the graph store
|
|
419
|
+
|
|
420
|
+
**DEFAULT IMPLEMENTATION**: Not implemented in base class.
|
|
421
|
+
Implementations should override this method to provide tenant-scoped clearing.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
context: Optional tenant context for multi-tenant isolation.
|
|
425
|
+
If provided, clears only data for the specified tenant.
|
|
426
|
+
If None, clears all data (use with caution).
|
|
427
|
+
"""
|
|
428
|
+
raise NotImplementedError("clear() must be implemented by subclasses")
|
|
429
|
+
|
|
430
|
+
# =========================================================================
|
|
431
|
+
# DEFAULT IMPLEMENTATIONS (Template Methods)
|
|
432
|
+
# =========================================================================
|
|
433
|
+
|
|
434
|
+
async def _default_traverse_bfs(
|
|
435
|
+
self,
|
|
436
|
+
start_entity_id: str,
|
|
437
|
+
relation_type: Optional[str],
|
|
438
|
+
max_depth: int,
|
|
439
|
+
max_results: int,
|
|
440
|
+
context: Optional[TenantContext],
|
|
441
|
+
) -> List[Path]:
|
|
442
|
+
"""
|
|
443
|
+
Default BFS traversal implementation using get_neighbors()
|
|
444
|
+
|
|
445
|
+
This provides a working traversal that any Tier 1 implementation gets for free.
|
|
446
|
+
Backends can override traverse() with optimized versions.
|
|
447
|
+
"""
|
|
448
|
+
start_entity = await self.get_entity(start_entity_id, context=context)
|
|
449
|
+
if start_entity is None:
|
|
450
|
+
return []
|
|
451
|
+
|
|
452
|
+
paths: List[Path] = []
|
|
453
|
+
visited: Set[str] = set()
|
|
454
|
+
queue: deque = deque([(start_entity, [])]) # (entity, edges_path)
|
|
455
|
+
|
|
456
|
+
while queue and len(paths) < max_results:
|
|
457
|
+
current_entity, edges_path = queue.popleft()
|
|
458
|
+
current_depth = len(edges_path)
|
|
459
|
+
|
|
460
|
+
if current_entity.id in visited:
|
|
461
|
+
continue
|
|
462
|
+
visited.add(current_entity.id)
|
|
463
|
+
|
|
464
|
+
# Create path for this node
|
|
465
|
+
if current_depth > 0: # Don't add single-node paths
|
|
466
|
+
nodes_path = [start_entity]
|
|
467
|
+
for edge in edges_path:
|
|
468
|
+
target_entity = await self.get_entity(edge.target_id, context=context)
|
|
469
|
+
if target_entity:
|
|
470
|
+
nodes_path.append(target_entity)
|
|
471
|
+
|
|
472
|
+
if len(nodes_path) == len(edges_path) + 1:
|
|
473
|
+
paths.append(Path(nodes=nodes_path, edges=edges_path))
|
|
474
|
+
|
|
475
|
+
# Explore neighbors if not at max depth
|
|
476
|
+
if current_depth < max_depth:
|
|
477
|
+
neighbors = await self.get_neighbors(
|
|
478
|
+
current_entity.id,
|
|
479
|
+
relation_type=relation_type,
|
|
480
|
+
direction="outgoing",
|
|
481
|
+
context=context,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
for neighbor in neighbors:
|
|
485
|
+
if neighbor.id not in visited:
|
|
486
|
+
# Find the relation connecting them
|
|
487
|
+
# (In a real implementation, get_neighbors should return relations too)
|
|
488
|
+
# For now, create a placeholder relation
|
|
489
|
+
edge = Relation(
|
|
490
|
+
id=f"rel_{current_entity.id}_{neighbor.id}",
|
|
491
|
+
relation_type=relation_type or "CONNECTED_TO",
|
|
492
|
+
source_id=current_entity.id,
|
|
493
|
+
target_id=neighbor.id,
|
|
494
|
+
)
|
|
495
|
+
queue.append((neighbor, edges_path + [edge]))
|
|
496
|
+
|
|
497
|
+
return paths
|
|
498
|
+
|
|
499
|
+
async def _default_find_paths(
|
|
500
|
+
self,
|
|
501
|
+
source_entity_id: str,
|
|
502
|
+
target_entity_id: str,
|
|
503
|
+
max_depth: int,
|
|
504
|
+
max_paths: int,
|
|
505
|
+
context: Optional[TenantContext],
|
|
506
|
+
) -> List[Path]:
|
|
507
|
+
"""
|
|
508
|
+
Default path finding using BFS with target check
|
|
509
|
+
"""
|
|
510
|
+
all_paths = await self.traverse(
|
|
511
|
+
source_entity_id,
|
|
512
|
+
max_depth=max_depth,
|
|
513
|
+
max_results=max_paths * 10, # Get more, filter later
|
|
514
|
+
context=context,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Filter paths that end at target
|
|
518
|
+
target_paths = [path for path in all_paths if path.end_entity.id == target_entity_id]
|
|
519
|
+
|
|
520
|
+
return target_paths[:max_paths]
|
|
521
|
+
|
|
522
|
+
async def _default_subgraph_query(
|
|
523
|
+
self,
|
|
524
|
+
entity_ids: List[str],
|
|
525
|
+
include_relations: bool,
|
|
526
|
+
context: Optional[TenantContext],
|
|
527
|
+
) -> tuple[List[Entity], List[Relation]]:
|
|
528
|
+
"""
|
|
529
|
+
Default subgraph extraction
|
|
530
|
+
"""
|
|
531
|
+
entities = []
|
|
532
|
+
relations = []
|
|
533
|
+
|
|
534
|
+
# Fetch all entities
|
|
535
|
+
for entity_id in entity_ids:
|
|
536
|
+
entity = await self.get_entity(entity_id, context=context)
|
|
537
|
+
if entity:
|
|
538
|
+
entities.append(entity)
|
|
539
|
+
|
|
540
|
+
# Fetch relations between entities (if requested)
|
|
541
|
+
if include_relations:
|
|
542
|
+
entity_id_set = set(entity_ids)
|
|
543
|
+
for entity_id in entity_ids:
|
|
544
|
+
neighbors = await self.get_neighbors(entity_id, direction="outgoing", context=context)
|
|
545
|
+
for neighbor in neighbors:
|
|
546
|
+
if neighbor.id in entity_id_set:
|
|
547
|
+
# Fetch the relation (simplified - needs proper
|
|
548
|
+
# implementation)
|
|
549
|
+
rel = Relation(
|
|
550
|
+
id=f"rel_{entity_id}_{neighbor.id}",
|
|
551
|
+
relation_type="CONNECTED_TO",
|
|
552
|
+
source_id=entity_id,
|
|
553
|
+
target_id=neighbor.id,
|
|
554
|
+
)
|
|
555
|
+
relations.append(rel)
|
|
556
|
+
|
|
557
|
+
return entities, relations
|
|
558
|
+
|
|
559
|
+
async def _default_get_all_entities(
|
|
560
|
+
self,
|
|
561
|
+
entity_type: Optional[str],
|
|
562
|
+
limit: Optional[int],
|
|
563
|
+
offset: int,
|
|
564
|
+
context: Optional[TenantContext],
|
|
565
|
+
) -> List[Entity]:
|
|
566
|
+
"""
|
|
567
|
+
Default entity enumeration implementation
|
|
568
|
+
|
|
569
|
+
This default raises NotImplementedError. Backends should override
|
|
570
|
+
this method to provide efficient entity enumeration.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
entity_type: Optional filter by entity type
|
|
574
|
+
limit: Optional maximum number of entities to return
|
|
575
|
+
offset: Number of entities to skip (for pagination)
|
|
576
|
+
context: Optional tenant context for multi-tenant isolation
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
List of entities matching the criteria
|
|
580
|
+
|
|
581
|
+
Raises:
|
|
582
|
+
NotImplementedError: If backend doesn't implement this method
|
|
583
|
+
"""
|
|
584
|
+
raise NotImplementedError(
|
|
585
|
+
f"{type(self).__name__} must implement get_all_entities() "
|
|
586
|
+
"or override _default_get_all_entities()"
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
async def _default_vector_search(
|
|
590
|
+
self,
|
|
591
|
+
query_embedding: List[float],
|
|
592
|
+
entity_type: Optional[str],
|
|
593
|
+
max_results: int,
|
|
594
|
+
score_threshold: float,
|
|
595
|
+
context: Optional[TenantContext],
|
|
596
|
+
) -> List[tuple[Entity, float]]:
|
|
597
|
+
"""
|
|
598
|
+
Default brute-force vector search using cosine similarity
|
|
599
|
+
|
|
600
|
+
This implementation uses get_all_entities() to enumerate entities
|
|
601
|
+
and computes cosine similarity. Backends should override with ANN indexes.
|
|
602
|
+
"""
|
|
603
|
+
if not query_embedding:
|
|
604
|
+
return []
|
|
605
|
+
|
|
606
|
+
# Get all entities (or filtered by entity_type)
|
|
607
|
+
entities = await self.get_all_entities(entity_type=entity_type, context=context)
|
|
608
|
+
|
|
609
|
+
if not entities:
|
|
610
|
+
return []
|
|
611
|
+
|
|
612
|
+
# Compute cosine similarity for each entity with embedding
|
|
613
|
+
scored_entities = []
|
|
614
|
+
for entity in entities:
|
|
615
|
+
if not entity.embedding:
|
|
616
|
+
continue # Skip entities without embeddings
|
|
617
|
+
|
|
618
|
+
# Compute cosine similarity between vectors
|
|
619
|
+
try:
|
|
620
|
+
similarity = self._cosine_similarity_vectors(query_embedding, entity.embedding)
|
|
621
|
+
if similarity >= score_threshold:
|
|
622
|
+
scored_entities.append((entity, float(similarity)))
|
|
623
|
+
except Exception as e:
|
|
624
|
+
# Skip entities with incompatible embedding dimensions
|
|
625
|
+
import logging
|
|
626
|
+
logger = logging.getLogger(__name__)
|
|
627
|
+
logger.debug(f"Skipping entity {entity.id} due to embedding error: {e}")
|
|
628
|
+
continue
|
|
629
|
+
|
|
630
|
+
# Sort by score descending and return top results
|
|
631
|
+
scored_entities.sort(key=lambda x: x[1], reverse=True)
|
|
632
|
+
return scored_entities[:max_results]
|
|
633
|
+
|
|
634
|
+
async def _default_text_search(
|
|
635
|
+
self,
|
|
636
|
+
query_text: str,
|
|
637
|
+
entity_type: Optional[str],
|
|
638
|
+
max_results: int,
|
|
639
|
+
score_threshold: float,
|
|
640
|
+
method: str,
|
|
641
|
+
context: Optional[TenantContext],
|
|
642
|
+
) -> List[tuple[Entity, float]]:
|
|
643
|
+
"""
|
|
644
|
+
Default text search using text similarity utilities
|
|
645
|
+
|
|
646
|
+
This implementation requires get_all_entities() or similar method.
|
|
647
|
+
Backends should override for better performance (e.g., full-text indexes).
|
|
648
|
+
"""
|
|
649
|
+
# Try to get all entities - check if store has get_all_entities method
|
|
650
|
+
if hasattr(self, "get_all_entities"):
|
|
651
|
+
entities = await self.get_all_entities(entity_type=entity_type, context=context)
|
|
652
|
+
else:
|
|
653
|
+
# Fallback: return empty if no way to enumerate entities
|
|
654
|
+
return []
|
|
655
|
+
|
|
656
|
+
if not query_text:
|
|
657
|
+
return []
|
|
658
|
+
|
|
659
|
+
from aiecs.application.knowledge_graph.search.text_similarity import (
|
|
660
|
+
BM25Scorer,
|
|
661
|
+
jaccard_similarity_text,
|
|
662
|
+
cosine_similarity_text,
|
|
663
|
+
normalized_levenshtein_similarity,
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
scored_entities = []
|
|
667
|
+
|
|
668
|
+
# Extract text from entities (combine properties into searchable text)
|
|
669
|
+
entity_texts = []
|
|
670
|
+
for entity in entities:
|
|
671
|
+
# Combine all string properties into searchable text
|
|
672
|
+
text_parts = []
|
|
673
|
+
for key, value in entity.properties.items():
|
|
674
|
+
if isinstance(value, str):
|
|
675
|
+
text_parts.append(value)
|
|
676
|
+
elif isinstance(value, (list, tuple)):
|
|
677
|
+
text_parts.extend(str(v) for v in value if isinstance(v, str))
|
|
678
|
+
entity_text = " ".join(text_parts)
|
|
679
|
+
entity_texts.append((entity, entity_text))
|
|
680
|
+
|
|
681
|
+
if method == "bm25":
|
|
682
|
+
# Use BM25 scorer
|
|
683
|
+
corpus = [text for _, text in entity_texts]
|
|
684
|
+
scorer = BM25Scorer(corpus)
|
|
685
|
+
scores = scorer.score(query_text)
|
|
686
|
+
|
|
687
|
+
for (entity, _), score in zip(entity_texts, scores):
|
|
688
|
+
if score >= score_threshold:
|
|
689
|
+
scored_entities.append((entity, float(score)))
|
|
690
|
+
|
|
691
|
+
elif method == "jaccard":
|
|
692
|
+
for entity, text in entity_texts:
|
|
693
|
+
score = jaccard_similarity_text(query_text, text)
|
|
694
|
+
if score >= score_threshold:
|
|
695
|
+
scored_entities.append((entity, score))
|
|
696
|
+
|
|
697
|
+
elif method == "cosine":
|
|
698
|
+
for entity, text in entity_texts:
|
|
699
|
+
score = cosine_similarity_text(query_text, text)
|
|
700
|
+
if score >= score_threshold:
|
|
701
|
+
scored_entities.append((entity, score))
|
|
702
|
+
|
|
703
|
+
elif method == "levenshtein":
|
|
704
|
+
for entity, text in entity_texts:
|
|
705
|
+
score = normalized_levenshtein_similarity(query_text, text)
|
|
706
|
+
if score >= score_threshold:
|
|
707
|
+
scored_entities.append((entity, score))
|
|
708
|
+
|
|
709
|
+
else:
|
|
710
|
+
raise ValueError(f"Unknown text search method: {method}. Use 'bm25', 'jaccard', 'cosine', or 'levenshtein'")
|
|
711
|
+
|
|
712
|
+
# Sort by score descending and return top results
|
|
713
|
+
scored_entities.sort(key=lambda x: x[1], reverse=True)
|
|
714
|
+
return scored_entities[:max_results]
|
|
715
|
+
|
|
716
|
+
def _cosine_similarity_vectors(self, vec1: List[float], vec2: List[float]) -> float:
|
|
717
|
+
"""
|
|
718
|
+
Compute cosine similarity between two vectors
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
vec1: First vector
|
|
722
|
+
vec2: Second vector
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
Cosine similarity score (0.0-1.0)
|
|
726
|
+
|
|
727
|
+
Raises:
|
|
728
|
+
ValueError: If vectors have different dimensions or are empty
|
|
729
|
+
"""
|
|
730
|
+
if len(vec1) != len(vec2):
|
|
731
|
+
raise ValueError(f"Vectors must have same dimension: {len(vec1)} != {len(vec2)}")
|
|
732
|
+
if len(vec1) == 0:
|
|
733
|
+
raise ValueError("Vectors cannot be empty")
|
|
734
|
+
|
|
735
|
+
# Compute dot product
|
|
736
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
737
|
+
|
|
738
|
+
# Compute magnitudes
|
|
739
|
+
magnitude1 = sum(a * a for a in vec1) ** 0.5
|
|
740
|
+
magnitude2 = sum(b * b for b in vec2) ** 0.5
|
|
741
|
+
|
|
742
|
+
if magnitude1 == 0 or magnitude2 == 0:
|
|
743
|
+
return 0.0
|
|
744
|
+
|
|
745
|
+
# Cosine similarity
|
|
746
|
+
similarity = dot_product / (magnitude1 * magnitude2)
|
|
747
|
+
return max(0.0, min(1.0, similarity)) # Clamp to [0, 1]
|
|
748
|
+
|
|
749
|
+
async def _default_execute_query(self, query: GraphQuery, context: Optional[TenantContext]) -> GraphResult:
|
|
750
|
+
"""
|
|
751
|
+
Default query execution router with tenant filtering support.
|
|
752
|
+
|
|
753
|
+
If query.tenant_id is provided, it takes precedence over the context parameter
|
|
754
|
+
for tenant filtering. This ensures GraphQuery objects carry their own tenant scope.
|
|
755
|
+
"""
|
|
756
|
+
import time
|
|
757
|
+
|
|
758
|
+
start_time = time.time()
|
|
759
|
+
|
|
760
|
+
# Apply tenant filtering: query.tenant_id takes precedence over context parameter
|
|
761
|
+
# This allows GraphQuery to be self-contained with tenant scope
|
|
762
|
+
effective_context = context
|
|
763
|
+
if query.tenant_id is not None:
|
|
764
|
+
# Create TenantContext from query.tenant_id if not already provided
|
|
765
|
+
# If context was provided but has different tenant_id, query.tenant_id wins
|
|
766
|
+
from aiecs.infrastructure.graph_storage.tenant import TenantIsolationMode
|
|
767
|
+
effective_context = TenantContext(
|
|
768
|
+
tenant_id=query.tenant_id,
|
|
769
|
+
isolation_mode=context.isolation_mode if context else TenantIsolationMode.SHARED_SCHEMA,
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
if query.query_type == QueryType.ENTITY_LOOKUP:
|
|
773
|
+
entity = await self.get_entity(query.entity_id, context=effective_context) if query.entity_id else None
|
|
774
|
+
entities = [entity] if entity else []
|
|
775
|
+
|
|
776
|
+
elif query.query_type == QueryType.VECTOR_SEARCH:
|
|
777
|
+
if query.embedding:
|
|
778
|
+
results = await self.vector_search(
|
|
779
|
+
query.embedding,
|
|
780
|
+
query.entity_type,
|
|
781
|
+
query.max_results,
|
|
782
|
+
query.score_threshold,
|
|
783
|
+
context=effective_context,
|
|
784
|
+
)
|
|
785
|
+
entities = [entity for entity, score in results]
|
|
786
|
+
else:
|
|
787
|
+
entities = []
|
|
788
|
+
|
|
789
|
+
elif query.query_type == QueryType.TRAVERSAL:
|
|
790
|
+
if query.entity_id:
|
|
791
|
+
paths = await self.traverse(
|
|
792
|
+
query.entity_id,
|
|
793
|
+
query.relation_type,
|
|
794
|
+
query.max_depth,
|
|
795
|
+
query.max_results,
|
|
796
|
+
context=effective_context,
|
|
797
|
+
)
|
|
798
|
+
# Extract unique entities from paths
|
|
799
|
+
entity_ids_seen = set()
|
|
800
|
+
entities = []
|
|
801
|
+
for path in paths:
|
|
802
|
+
for entity in path.nodes:
|
|
803
|
+
if entity.id not in entity_ids_seen:
|
|
804
|
+
entities.append(entity)
|
|
805
|
+
entity_ids_seen.add(entity.id)
|
|
806
|
+
else:
|
|
807
|
+
entities = []
|
|
808
|
+
paths = []
|
|
809
|
+
|
|
810
|
+
elif query.query_type == QueryType.PATH_FINDING:
|
|
811
|
+
if query.source_entity_id and query.target_entity_id:
|
|
812
|
+
paths = await self.find_paths(
|
|
813
|
+
query.source_entity_id,
|
|
814
|
+
query.target_entity_id,
|
|
815
|
+
query.max_depth,
|
|
816
|
+
query.max_results,
|
|
817
|
+
context=effective_context,
|
|
818
|
+
)
|
|
819
|
+
entities = []
|
|
820
|
+
else:
|
|
821
|
+
paths = []
|
|
822
|
+
entities = []
|
|
823
|
+
|
|
824
|
+
else:
|
|
825
|
+
entities = []
|
|
826
|
+
paths = []
|
|
827
|
+
|
|
828
|
+
execution_time_ms = (time.time() - start_time) * 1000
|
|
829
|
+
|
|
830
|
+
return GraphResult(
|
|
831
|
+
query=query,
|
|
832
|
+
entities=entities[: query.max_results],
|
|
833
|
+
paths=paths[: query.max_results] if "paths" in locals() else [],
|
|
834
|
+
scores=[],
|
|
835
|
+
total_count=len(entities),
|
|
836
|
+
execution_time_ms=execution_time_ms,
|
|
837
|
+
)
|