aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
In-Memory Graph Store Implementation
|
|
3
|
+
|
|
4
|
+
Implements Tier 1 of GraphStore interface using networkx.
|
|
5
|
+
Tier 2 methods work automatically via default implementations.
|
|
6
|
+
|
|
7
|
+
This is ideal for:
|
|
8
|
+
- Development and testing
|
|
9
|
+
- Small graphs (< 100K nodes)
|
|
10
|
+
- Prototyping
|
|
11
|
+
- Scenarios where persistence is not required
|
|
12
|
+
|
|
13
|
+
Multi-tenancy Support:
|
|
14
|
+
- Tenant-partitioned graphs using OrderedDict for LRU tracking
|
|
15
|
+
- Global graph for tenant_id=None (never evicted)
|
|
16
|
+
- Configurable max_tenant_graphs with LRU eviction
|
|
17
|
+
- Environment variable KG_INMEMORY_MAX_TENANTS for configuration
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
from collections import OrderedDict
|
|
22
|
+
from typing import Any, List, Optional, Dict, Set, Tuple
|
|
23
|
+
import networkx as nx # type: ignore[import-untyped]
|
|
24
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
25
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
26
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
27
|
+
from aiecs.infrastructure.graph_storage.tenant import TenantContext, CrossTenantRelationError
|
|
28
|
+
from aiecs.infrastructure.graph_storage.property_storage import (
|
|
29
|
+
PropertyOptimizer,
|
|
30
|
+
PropertyStorageConfig,
|
|
31
|
+
PropertyIndex,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Default maximum number of tenant graphs to keep in memory
|
|
35
|
+
DEFAULT_MAX_TENANT_GRAPHS = 100
|
|
36
|
+
ENV_MAX_TENANTS = "KG_INMEMORY_MAX_TENANTS"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class InMemoryGraphStore(GraphStore):
|
|
40
|
+
"""
|
|
41
|
+
In-Memory Graph Store using NetworkX
|
|
42
|
+
|
|
43
|
+
**Implementation Strategy**:
|
|
44
|
+
- Uses networkx.DiGraph for graph structure
|
|
45
|
+
- Stores Entity objects as node attributes
|
|
46
|
+
- Stores Relation objects as edge attributes
|
|
47
|
+
- Implements ONLY Tier 1 methods
|
|
48
|
+
- Tier 2 methods (traverse, find_paths, etc.) work automatically!
|
|
49
|
+
|
|
50
|
+
**Features**:
|
|
51
|
+
- Fast for small-medium graphs
|
|
52
|
+
- No external dependencies
|
|
53
|
+
- Full Python ecosystem integration
|
|
54
|
+
- Rich graph algorithms from networkx
|
|
55
|
+
|
|
56
|
+
**Multi-Tenancy Support**:
|
|
57
|
+
- Tenant-partitioned graphs: Each tenant has its own nx.DiGraph
|
|
58
|
+
- Global graph for tenant_id=None (never evicted, backward compatible)
|
|
59
|
+
- LRU eviction: When max_tenant_graphs exceeded, least recently used tenant evicted
|
|
60
|
+
- Configure via max_tenant_graphs param or KG_INMEMORY_MAX_TENANTS env var
|
|
61
|
+
|
|
62
|
+
**Limitations**:
|
|
63
|
+
- Not persistent (lost on restart)
|
|
64
|
+
- Limited by RAM
|
|
65
|
+
- No concurrent access control
|
|
66
|
+
- No vector search optimization
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
```python
|
|
70
|
+
store = InMemoryGraphStore()
|
|
71
|
+
await store.initialize()
|
|
72
|
+
|
|
73
|
+
# Single-tenant usage (backward compatible)
|
|
74
|
+
entity = Entity(id="person_1", entity_type="Person", properties={"name": "Alice"})
|
|
75
|
+
await store.add_entity(entity)
|
|
76
|
+
|
|
77
|
+
# Multi-tenant usage
|
|
78
|
+
from aiecs.infrastructure.graph_storage.tenant import TenantContext
|
|
79
|
+
context = TenantContext(tenant_id="acme-corp")
|
|
80
|
+
await store.add_entity(entity, context=context)
|
|
81
|
+
|
|
82
|
+
# Tier 2 methods work automatically with tenant isolation
|
|
83
|
+
paths = await store.traverse("person_1", max_depth=3, context=context)
|
|
84
|
+
```
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
property_storage_config: Optional[PropertyStorageConfig] = None,
|
|
90
|
+
max_tenant_graphs: Optional[int] = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Initialize in-memory graph store
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
property_storage_config: Optional configuration for property storage optimization.
|
|
97
|
+
Enables sparse storage, compression, and indexing.
|
|
98
|
+
max_tenant_graphs: Maximum number of tenant graphs to keep in memory.
|
|
99
|
+
When exceeded, least recently used tenant is evicted.
|
|
100
|
+
Default: 100 (or KG_INMEMORY_MAX_TENANTS env var)
|
|
101
|
+
"""
|
|
102
|
+
# Determine max tenant graphs from param, env var, or default
|
|
103
|
+
if max_tenant_graphs is not None:
|
|
104
|
+
self._max_tenant_graphs = max_tenant_graphs
|
|
105
|
+
else:
|
|
106
|
+
env_value = os.environ.get(ENV_MAX_TENANTS)
|
|
107
|
+
if env_value:
|
|
108
|
+
try:
|
|
109
|
+
self._max_tenant_graphs = int(env_value)
|
|
110
|
+
except ValueError:
|
|
111
|
+
self._max_tenant_graphs = DEFAULT_MAX_TENANT_GRAPHS
|
|
112
|
+
else:
|
|
113
|
+
self._max_tenant_graphs = DEFAULT_MAX_TENANT_GRAPHS
|
|
114
|
+
|
|
115
|
+
# Global graph for tenant_id=None (never evicted)
|
|
116
|
+
self._global_graph: Optional[nx.DiGraph] = None
|
|
117
|
+
self._global_entities: Dict[str, Entity] = {}
|
|
118
|
+
self._global_relations: Dict[str, Relation] = {}
|
|
119
|
+
|
|
120
|
+
# Tenant-partitioned storage with LRU tracking
|
|
121
|
+
# OrderedDict maintains insertion order; move_to_end() for LRU
|
|
122
|
+
self._tenant_graphs: OrderedDict[str, nx.DiGraph] = OrderedDict()
|
|
123
|
+
self._tenant_entities: Dict[str, Dict[str, Entity]] = {}
|
|
124
|
+
self._tenant_relations: Dict[str, Dict[str, Relation]] = {}
|
|
125
|
+
|
|
126
|
+
# Legacy attributes for backward compatibility
|
|
127
|
+
self.graph: Optional[nx.DiGraph] = None
|
|
128
|
+
self.entities: Dict[str, Entity] = {}
|
|
129
|
+
self.relations: Dict[str, Relation] = {}
|
|
130
|
+
|
|
131
|
+
self._initialized = False
|
|
132
|
+
|
|
133
|
+
# Property storage optimization
|
|
134
|
+
self._property_optimizer: Optional[PropertyOptimizer] = None
|
|
135
|
+
if property_storage_config is not None:
|
|
136
|
+
self._property_optimizer = PropertyOptimizer(property_storage_config)
|
|
137
|
+
|
|
138
|
+
# =========================================================================
|
|
139
|
+
# TIER 1 IMPLEMENTATION - Core CRUD Operations
|
|
140
|
+
# =========================================================================
|
|
141
|
+
|
|
142
|
+
async def initialize(self) -> None:
|
|
143
|
+
"""Initialize the in-memory graph"""
|
|
144
|
+
if self._initialized:
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
# Initialize global graph (for tenant_id=None)
|
|
148
|
+
self._global_graph = nx.DiGraph()
|
|
149
|
+
self._global_entities = {}
|
|
150
|
+
self._global_relations = {}
|
|
151
|
+
|
|
152
|
+
# Initialize tenant storage
|
|
153
|
+
self._tenant_graphs = OrderedDict()
|
|
154
|
+
self._tenant_entities = {}
|
|
155
|
+
self._tenant_relations = {}
|
|
156
|
+
|
|
157
|
+
# Legacy references point to global storage for backward compatibility
|
|
158
|
+
self.graph = self._global_graph
|
|
159
|
+
self.entities = self._global_entities
|
|
160
|
+
self.relations = self._global_relations
|
|
161
|
+
|
|
162
|
+
self._initialized = True
|
|
163
|
+
|
|
164
|
+
async def close(self) -> None:
|
|
165
|
+
"""Close and cleanup (nothing to do for in-memory)"""
|
|
166
|
+
# Clear global storage
|
|
167
|
+
self._global_graph = None
|
|
168
|
+
self._global_entities = {}
|
|
169
|
+
self._global_relations = {}
|
|
170
|
+
|
|
171
|
+
# Clear tenant storage
|
|
172
|
+
self._tenant_graphs.clear()
|
|
173
|
+
self._tenant_entities.clear()
|
|
174
|
+
self._tenant_relations.clear()
|
|
175
|
+
|
|
176
|
+
# Clear legacy references
|
|
177
|
+
self.graph = None
|
|
178
|
+
self.entities = {}
|
|
179
|
+
self.relations = {}
|
|
180
|
+
|
|
181
|
+
self._initialized = False
|
|
182
|
+
|
|
183
|
+
# =========================================================================
|
|
184
|
+
# MULTI-TENANCY HELPERS
|
|
185
|
+
# =========================================================================
|
|
186
|
+
|
|
187
|
+
def _get_tenant_id(self, context: Optional[TenantContext]) -> Optional[str]:
|
|
188
|
+
"""Extract tenant_id from context, returns None for global namespace."""
|
|
189
|
+
return context.tenant_id if context else None
|
|
190
|
+
|
|
191
|
+
def _get_graph(self, tenant_id: Optional[str]) -> nx.DiGraph:
|
|
192
|
+
"""
|
|
193
|
+
Get the graph for a tenant with LRU tracking.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
tenant_id: Tenant ID or None for global namespace
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
networkx DiGraph for the tenant
|
|
200
|
+
|
|
201
|
+
Note:
|
|
202
|
+
- Global graph (tenant_id=None) is never evicted
|
|
203
|
+
- Tenant graphs are evicted LRU when max_tenant_graphs exceeded
|
|
204
|
+
"""
|
|
205
|
+
if tenant_id is None:
|
|
206
|
+
# Global namespace - never evicted
|
|
207
|
+
if self._global_graph is None:
|
|
208
|
+
self._global_graph = nx.DiGraph()
|
|
209
|
+
return self._global_graph
|
|
210
|
+
|
|
211
|
+
# Tenant-specific graph
|
|
212
|
+
if tenant_id in self._tenant_graphs:
|
|
213
|
+
# Move to end for LRU tracking (most recently used)
|
|
214
|
+
self._tenant_graphs.move_to_end(tenant_id)
|
|
215
|
+
return self._tenant_graphs[tenant_id]
|
|
216
|
+
|
|
217
|
+
# Create new tenant graph
|
|
218
|
+
self._evict_if_needed()
|
|
219
|
+
graph = nx.DiGraph()
|
|
220
|
+
self._tenant_graphs[tenant_id] = graph
|
|
221
|
+
self._tenant_entities[tenant_id] = {}
|
|
222
|
+
self._tenant_relations[tenant_id] = {}
|
|
223
|
+
return graph
|
|
224
|
+
|
|
225
|
+
def _get_entities_dict(self, tenant_id: Optional[str], update_lru: bool = True) -> Dict[str, Entity]:
|
|
226
|
+
"""Get entities dict for a tenant.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
tenant_id: Tenant ID or None for global namespace
|
|
230
|
+
update_lru: Whether to update LRU tracking (default: True)
|
|
231
|
+
"""
|
|
232
|
+
if tenant_id is None:
|
|
233
|
+
return self._global_entities
|
|
234
|
+
# Update LRU tracking if tenant exists
|
|
235
|
+
if update_lru and tenant_id in self._tenant_graphs:
|
|
236
|
+
self._tenant_graphs.move_to_end(tenant_id)
|
|
237
|
+
if tenant_id not in self._tenant_entities:
|
|
238
|
+
self._tenant_entities[tenant_id] = {}
|
|
239
|
+
return self._tenant_entities[tenant_id]
|
|
240
|
+
|
|
241
|
+
def _get_relations_dict(self, tenant_id: Optional[str], update_lru: bool = True) -> Dict[str, Relation]:
|
|
242
|
+
"""Get relations dict for a tenant.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
tenant_id: Tenant ID or None for global namespace
|
|
246
|
+
update_lru: Whether to update LRU tracking (default: True)
|
|
247
|
+
"""
|
|
248
|
+
if tenant_id is None:
|
|
249
|
+
return self._global_relations
|
|
250
|
+
# Update LRU tracking if tenant exists
|
|
251
|
+
if update_lru and tenant_id in self._tenant_graphs:
|
|
252
|
+
self._tenant_graphs.move_to_end(tenant_id)
|
|
253
|
+
if tenant_id not in self._tenant_relations:
|
|
254
|
+
self._tenant_relations[tenant_id] = {}
|
|
255
|
+
return self._tenant_relations[tenant_id]
|
|
256
|
+
|
|
257
|
+
def _evict_if_needed(self) -> None:
|
|
258
|
+
"""Evict least recently used tenant if max_tenant_graphs exceeded."""
|
|
259
|
+
while len(self._tenant_graphs) >= self._max_tenant_graphs:
|
|
260
|
+
# Pop the first item (least recently used)
|
|
261
|
+
evicted_tenant_id, _ = self._tenant_graphs.popitem(last=False)
|
|
262
|
+
# Clean up associated data
|
|
263
|
+
self._tenant_entities.pop(evicted_tenant_id, None)
|
|
264
|
+
self._tenant_relations.pop(evicted_tenant_id, None)
|
|
265
|
+
|
|
266
|
+
def get_tenant_count(self) -> int:
|
|
267
|
+
"""
|
|
268
|
+
Get the number of tenant graphs currently in memory.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Number of tenant graphs (excludes global graph)
|
|
272
|
+
"""
|
|
273
|
+
return len(self._tenant_graphs)
|
|
274
|
+
|
|
275
|
+
def get_tenant_ids(self) -> List[str]:
|
|
276
|
+
"""
|
|
277
|
+
Get list of tenant IDs currently in memory.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
List of tenant IDs (excludes global namespace)
|
|
281
|
+
"""
|
|
282
|
+
return list(self._tenant_graphs.keys())
|
|
283
|
+
|
|
284
|
+
# =========================================================================
|
|
285
|
+
# TIER 1 CRUD OPERATIONS
|
|
286
|
+
# =========================================================================
|
|
287
|
+
|
|
288
|
+
async def add_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
|
|
289
|
+
"""
|
|
290
|
+
Add entity to graph
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
entity: Entity to add
|
|
294
|
+
context: Optional tenant context for multi-tenant isolation
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
ValueError: If entity already exists
|
|
298
|
+
RuntimeError: If store not initialized
|
|
299
|
+
"""
|
|
300
|
+
if not self._initialized:
|
|
301
|
+
raise RuntimeError("GraphStore not initialized. Call initialize() first.")
|
|
302
|
+
|
|
303
|
+
tenant_id = self._get_tenant_id(context)
|
|
304
|
+
graph = self._get_graph(tenant_id)
|
|
305
|
+
entities = self._get_entities_dict(tenant_id)
|
|
306
|
+
|
|
307
|
+
if entity.id in entities:
|
|
308
|
+
raise ValueError(f"Entity with ID '{entity.id}' already exists")
|
|
309
|
+
|
|
310
|
+
# Set tenant_id on entity if context provided and entity doesn't have one
|
|
311
|
+
if tenant_id is not None and entity.tenant_id is None:
|
|
312
|
+
entity.tenant_id = tenant_id
|
|
313
|
+
|
|
314
|
+
# Apply property optimization if enabled
|
|
315
|
+
if self._property_optimizer is not None:
|
|
316
|
+
# Apply sparse storage (remove None values)
|
|
317
|
+
entity.properties = self._property_optimizer.optimize_properties(entity.properties)
|
|
318
|
+
# Index properties for fast lookup
|
|
319
|
+
self._property_optimizer.index_entity(entity.id, entity.properties)
|
|
320
|
+
|
|
321
|
+
# Add to networkx graph
|
|
322
|
+
graph.add_node(entity.id, entity=entity)
|
|
323
|
+
|
|
324
|
+
# Add to entity index
|
|
325
|
+
entities[entity.id] = entity
|
|
326
|
+
|
|
327
|
+
async def get_entity(self, entity_id: str, context: Optional[TenantContext] = None) -> Optional[Entity]:
|
|
328
|
+
"""
|
|
329
|
+
Get entity by ID
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
entity_id: Entity ID
|
|
333
|
+
context: Optional tenant context for multi-tenant isolation
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Entity if found, None otherwise
|
|
337
|
+
"""
|
|
338
|
+
if not self._initialized:
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
tenant_id = self._get_tenant_id(context)
|
|
342
|
+
entities = self._get_entities_dict(tenant_id)
|
|
343
|
+
return entities.get(entity_id)
|
|
344
|
+
|
|
345
|
+
async def add_relation(self, relation: Relation, context: Optional[TenantContext] = None) -> None:
|
|
346
|
+
"""
|
|
347
|
+
Add relation to graph
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
relation: Relation to add
|
|
351
|
+
context: Optional tenant context for multi-tenant isolation
|
|
352
|
+
|
|
353
|
+
Raises:
|
|
354
|
+
ValueError: If relation already exists or entities don't exist
|
|
355
|
+
CrossTenantRelationError: If source and target entities belong to different tenants
|
|
356
|
+
RuntimeError: If store not initialized
|
|
357
|
+
"""
|
|
358
|
+
if not self._initialized:
|
|
359
|
+
raise RuntimeError("GraphStore not initialized. Call initialize() first.")
|
|
360
|
+
|
|
361
|
+
tenant_id = self._get_tenant_id(context)
|
|
362
|
+
graph = self._get_graph(tenant_id)
|
|
363
|
+
entities = self._get_entities_dict(tenant_id)
|
|
364
|
+
relations = self._get_relations_dict(tenant_id)
|
|
365
|
+
|
|
366
|
+
if relation.id in relations:
|
|
367
|
+
raise ValueError(f"Relation with ID '{relation.id}' already exists")
|
|
368
|
+
|
|
369
|
+
# Validate entities exist within the same tenant scope
|
|
370
|
+
source_entity = entities.get(relation.source_id)
|
|
371
|
+
target_entity = entities.get(relation.target_id)
|
|
372
|
+
|
|
373
|
+
if source_entity is None:
|
|
374
|
+
raise ValueError(f"Source entity '{relation.source_id}' not found")
|
|
375
|
+
if target_entity is None:
|
|
376
|
+
raise ValueError(f"Target entity '{relation.target_id}' not found")
|
|
377
|
+
|
|
378
|
+
# Enforce same-tenant constraint
|
|
379
|
+
if tenant_id is not None:
|
|
380
|
+
source_tenant = source_entity.tenant_id
|
|
381
|
+
target_tenant = target_entity.tenant_id
|
|
382
|
+
if source_tenant != target_tenant:
|
|
383
|
+
raise CrossTenantRelationError(source_tenant, target_tenant)
|
|
384
|
+
|
|
385
|
+
# Set tenant_id on relation if context provided and relation doesn't have one
|
|
386
|
+
if tenant_id is not None and relation.tenant_id is None:
|
|
387
|
+
relation.tenant_id = tenant_id
|
|
388
|
+
|
|
389
|
+
# Add to networkx graph
|
|
390
|
+
graph.add_edge(
|
|
391
|
+
relation.source_id,
|
|
392
|
+
relation.target_id,
|
|
393
|
+
key=relation.id,
|
|
394
|
+
relation=relation,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
# Add to relation index
|
|
398
|
+
relations[relation.id] = relation
|
|
399
|
+
|
|
400
|
+
async def get_relation(self, relation_id: str, context: Optional[TenantContext] = None) -> Optional[Relation]:
|
|
401
|
+
"""
|
|
402
|
+
Get relation by ID
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
relation_id: Relation ID
|
|
406
|
+
context: Optional tenant context for multi-tenant isolation
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
Relation if found, None otherwise
|
|
410
|
+
"""
|
|
411
|
+
if not self._initialized:
|
|
412
|
+
return None
|
|
413
|
+
|
|
414
|
+
tenant_id = self._get_tenant_id(context)
|
|
415
|
+
relations = self._get_relations_dict(tenant_id)
|
|
416
|
+
return relations.get(relation_id)
|
|
417
|
+
|
|
418
|
+
async def get_neighbors(
|
|
419
|
+
self,
|
|
420
|
+
entity_id: str,
|
|
421
|
+
relation_type: Optional[str] = None,
|
|
422
|
+
direction: str = "outgoing",
|
|
423
|
+
context: Optional[TenantContext] = None,
|
|
424
|
+
) -> List[Entity]:
|
|
425
|
+
"""
|
|
426
|
+
Get neighboring entities
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
entity_id: Entity ID to get neighbors for
|
|
430
|
+
relation_type: Optional filter by relation type
|
|
431
|
+
direction: "outgoing", "incoming", or "both"
|
|
432
|
+
context: Optional tenant context for multi-tenant isolation
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
List of neighboring entities
|
|
436
|
+
"""
|
|
437
|
+
if not self._initialized:
|
|
438
|
+
return []
|
|
439
|
+
|
|
440
|
+
tenant_id = self._get_tenant_id(context)
|
|
441
|
+
graph = self._get_graph(tenant_id)
|
|
442
|
+
entities = self._get_entities_dict(tenant_id)
|
|
443
|
+
|
|
444
|
+
if entity_id not in graph:
|
|
445
|
+
return []
|
|
446
|
+
|
|
447
|
+
neighbors = []
|
|
448
|
+
|
|
449
|
+
# Get outgoing neighbors
|
|
450
|
+
if direction in ("outgoing", "both"):
|
|
451
|
+
for target_id in graph.successors(entity_id):
|
|
452
|
+
# Check relation type filter
|
|
453
|
+
if relation_type:
|
|
454
|
+
edge_data = graph.get_edge_data(entity_id, target_id)
|
|
455
|
+
if edge_data:
|
|
456
|
+
relation = edge_data.get("relation")
|
|
457
|
+
if relation and relation.relation_type == relation_type:
|
|
458
|
+
if target_id in entities:
|
|
459
|
+
neighbors.append(entities[target_id])
|
|
460
|
+
else:
|
|
461
|
+
if target_id in entities:
|
|
462
|
+
neighbors.append(entities[target_id])
|
|
463
|
+
|
|
464
|
+
# Get incoming neighbors
|
|
465
|
+
if direction in ("incoming", "both"):
|
|
466
|
+
for source_id in graph.predecessors(entity_id):
|
|
467
|
+
# Check relation type filter
|
|
468
|
+
if relation_type:
|
|
469
|
+
edge_data = graph.get_edge_data(source_id, entity_id)
|
|
470
|
+
if edge_data:
|
|
471
|
+
relation = edge_data.get("relation")
|
|
472
|
+
if relation and relation.relation_type == relation_type:
|
|
473
|
+
if source_id in entities:
|
|
474
|
+
neighbors.append(entities[source_id])
|
|
475
|
+
else:
|
|
476
|
+
if source_id in entities:
|
|
477
|
+
neighbors.append(entities[source_id])
|
|
478
|
+
|
|
479
|
+
return neighbors
|
|
480
|
+
|
|
481
|
+
async def get_outgoing_relations(self, entity_id: str, context: Optional[TenantContext] = None) -> List[Relation]:
|
|
482
|
+
"""
|
|
483
|
+
Get all outgoing relations for an entity.
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
entity_id: Entity ID to get outgoing relations for
|
|
487
|
+
context: Optional tenant context for multi-tenant isolation
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
List of outgoing Relation objects
|
|
491
|
+
"""
|
|
492
|
+
if not self._initialized:
|
|
493
|
+
return []
|
|
494
|
+
|
|
495
|
+
tenant_id = self._get_tenant_id(context)
|
|
496
|
+
graph = self._get_graph(tenant_id)
|
|
497
|
+
|
|
498
|
+
if entity_id not in graph:
|
|
499
|
+
return []
|
|
500
|
+
|
|
501
|
+
relations = []
|
|
502
|
+
for target_id in graph.successors(entity_id):
|
|
503
|
+
edge_data = graph.get_edge_data(entity_id, target_id)
|
|
504
|
+
if edge_data:
|
|
505
|
+
relation = edge_data.get("relation")
|
|
506
|
+
if relation:
|
|
507
|
+
relations.append(relation)
|
|
508
|
+
|
|
509
|
+
return relations
|
|
510
|
+
|
|
511
|
+
async def get_incoming_relations(self, entity_id: str, context: Optional[TenantContext] = None) -> List[Relation]:
|
|
512
|
+
"""
|
|
513
|
+
Get all incoming relations for an entity.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
entity_id: Entity ID to get incoming relations for
|
|
517
|
+
context: Optional tenant context for multi-tenant isolation
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
List of incoming Relation objects
|
|
521
|
+
"""
|
|
522
|
+
if not self._initialized:
|
|
523
|
+
return []
|
|
524
|
+
|
|
525
|
+
tenant_id = self._get_tenant_id(context)
|
|
526
|
+
graph = self._get_graph(tenant_id)
|
|
527
|
+
|
|
528
|
+
if entity_id not in graph:
|
|
529
|
+
return []
|
|
530
|
+
|
|
531
|
+
relations = []
|
|
532
|
+
for source_id in graph.predecessors(entity_id):
|
|
533
|
+
edge_data = graph.get_edge_data(source_id, entity_id)
|
|
534
|
+
if edge_data:
|
|
535
|
+
relation = edge_data.get("relation")
|
|
536
|
+
if relation:
|
|
537
|
+
relations.append(relation)
|
|
538
|
+
|
|
539
|
+
return relations
|
|
540
|
+
|
|
541
|
+
async def get_all_entities(
|
|
542
|
+
self,
|
|
543
|
+
entity_type: Optional[str] = None,
|
|
544
|
+
limit: Optional[int] = None,
|
|
545
|
+
context: Optional[TenantContext] = None,
|
|
546
|
+
) -> List[Entity]:
|
|
547
|
+
"""
|
|
548
|
+
Get all entities, optionally filtered by type
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
entity_type: Optional filter by entity type
|
|
552
|
+
limit: Optional limit on number of entities
|
|
553
|
+
context: Optional tenant context for multi-tenant isolation
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
List of entities
|
|
557
|
+
"""
|
|
558
|
+
if not self._initialized:
|
|
559
|
+
return []
|
|
560
|
+
|
|
561
|
+
tenant_id = self._get_tenant_id(context)
|
|
562
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
563
|
+
entities = list(entities_dict.values())
|
|
564
|
+
|
|
565
|
+
# Filter by entity type if specified
|
|
566
|
+
if entity_type:
|
|
567
|
+
entities = [e for e in entities if e.entity_type == entity_type]
|
|
568
|
+
|
|
569
|
+
# Apply limit if specified
|
|
570
|
+
if limit:
|
|
571
|
+
entities = entities[:limit]
|
|
572
|
+
|
|
573
|
+
return entities
|
|
574
|
+
|
|
575
|
+
# =========================================================================
|
|
576
|
+
# BULK OPERATIONS - Optimized implementations
|
|
577
|
+
# =========================================================================
|
|
578
|
+
|
|
579
|
+
async def add_entities_bulk(self, entities: List[Entity], context: Optional[TenantContext] = None) -> int:
|
|
580
|
+
"""
|
|
581
|
+
Add multiple entities in bulk (optimized).
|
|
582
|
+
|
|
583
|
+
Bypasses individual add_entity() calls for better performance.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
entities: List of entities to add
|
|
587
|
+
context: Optional tenant context for multi-tenant isolation
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
Number of entities successfully added
|
|
591
|
+
"""
|
|
592
|
+
if not self._initialized:
|
|
593
|
+
raise RuntimeError("GraphStore not initialized. Call initialize() first.")
|
|
594
|
+
|
|
595
|
+
tenant_id = self._get_tenant_id(context)
|
|
596
|
+
graph = self._get_graph(tenant_id)
|
|
597
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
598
|
+
|
|
599
|
+
added = 0
|
|
600
|
+
for entity in entities:
|
|
601
|
+
if entity.id in entities_dict:
|
|
602
|
+
continue # Skip existing entities
|
|
603
|
+
|
|
604
|
+
# Set tenant_id on entity if context provided
|
|
605
|
+
if tenant_id is not None and entity.tenant_id is None:
|
|
606
|
+
entity.tenant_id = tenant_id
|
|
607
|
+
|
|
608
|
+
# Apply property optimization if enabled
|
|
609
|
+
if self._property_optimizer is not None:
|
|
610
|
+
entity.properties = self._property_optimizer.optimize_properties(entity.properties)
|
|
611
|
+
self._property_optimizer.index_entity(entity.id, entity.properties)
|
|
612
|
+
|
|
613
|
+
# Add to graph and index
|
|
614
|
+
graph.add_node(entity.id, entity=entity)
|
|
615
|
+
entities_dict[entity.id] = entity
|
|
616
|
+
added += 1
|
|
617
|
+
|
|
618
|
+
return added
|
|
619
|
+
|
|
620
|
+
async def add_relations_bulk(self, relations: List[Relation], context: Optional[TenantContext] = None) -> int:
|
|
621
|
+
"""
|
|
622
|
+
Add multiple relations in bulk (optimized).
|
|
623
|
+
|
|
624
|
+
Bypasses individual add_relation() calls for better performance.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
relations: List of relations to add
|
|
628
|
+
context: Optional tenant context for multi-tenant isolation
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
Number of relations successfully added
|
|
632
|
+
"""
|
|
633
|
+
if not self._initialized:
|
|
634
|
+
raise RuntimeError("GraphStore not initialized. Call initialize() first.")
|
|
635
|
+
|
|
636
|
+
tenant_id = self._get_tenant_id(context)
|
|
637
|
+
graph = self._get_graph(tenant_id)
|
|
638
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
639
|
+
relations_dict = self._get_relations_dict(tenant_id)
|
|
640
|
+
|
|
641
|
+
added = 0
|
|
642
|
+
for relation in relations:
|
|
643
|
+
if relation.id in relations_dict:
|
|
644
|
+
continue # Skip existing relations
|
|
645
|
+
|
|
646
|
+
# Validate entities exist
|
|
647
|
+
if relation.source_id not in entities_dict:
|
|
648
|
+
continue
|
|
649
|
+
if relation.target_id not in entities_dict:
|
|
650
|
+
continue
|
|
651
|
+
|
|
652
|
+
# Set tenant_id on relation if context provided
|
|
653
|
+
if tenant_id is not None and relation.tenant_id is None:
|
|
654
|
+
relation.tenant_id = tenant_id
|
|
655
|
+
|
|
656
|
+
# Add edge
|
|
657
|
+
graph.add_edge(
|
|
658
|
+
relation.source_id,
|
|
659
|
+
relation.target_id,
|
|
660
|
+
key=relation.id,
|
|
661
|
+
relation=relation,
|
|
662
|
+
)
|
|
663
|
+
relations_dict[relation.id] = relation
|
|
664
|
+
added += 1
|
|
665
|
+
|
|
666
|
+
return added
|
|
667
|
+
|
|
668
|
+
# =========================================================================
|
|
669
|
+
# TIER 2 METHODS - Optimized overrides with multi-tenancy support
|
|
670
|
+
# =========================================================================
|
|
671
|
+
|
|
672
|
+
async def traverse(
|
|
673
|
+
self,
|
|
674
|
+
start_entity_id: str,
|
|
675
|
+
relation_type: Optional[str] = None,
|
|
676
|
+
max_depth: int = 3,
|
|
677
|
+
max_results: int = 100,
|
|
678
|
+
context: Optional[TenantContext] = None,
|
|
679
|
+
) -> List:
|
|
680
|
+
"""
|
|
681
|
+
Optimized graph traversal using BFS within tenant boundaries.
|
|
682
|
+
|
|
683
|
+
Args:
|
|
684
|
+
start_entity_id: Starting entity ID
|
|
685
|
+
relation_type: Optional filter by relation type
|
|
686
|
+
max_depth: Maximum traversal depth
|
|
687
|
+
max_results: Maximum number of paths to return
|
|
688
|
+
context: Optional tenant context for multi-tenant isolation
|
|
689
|
+
|
|
690
|
+
Returns:
|
|
691
|
+
List of paths found during traversal
|
|
692
|
+
"""
|
|
693
|
+
from collections import deque
|
|
694
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
695
|
+
|
|
696
|
+
if not self._initialized:
|
|
697
|
+
return []
|
|
698
|
+
|
|
699
|
+
tenant_id = self._get_tenant_id(context)
|
|
700
|
+
graph = self._get_graph(tenant_id)
|
|
701
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
702
|
+
|
|
703
|
+
start_entity = entities_dict.get(start_entity_id)
|
|
704
|
+
if start_entity is None:
|
|
705
|
+
return []
|
|
706
|
+
|
|
707
|
+
paths: List[Path] = []
|
|
708
|
+
visited: Set[str] = set()
|
|
709
|
+
queue: deque = deque([(start_entity, [])]) # (entity, edges_path)
|
|
710
|
+
|
|
711
|
+
while queue and len(paths) < max_results:
|
|
712
|
+
current_entity, edges_path = queue.popleft()
|
|
713
|
+
current_depth = len(edges_path)
|
|
714
|
+
|
|
715
|
+
if current_entity.id in visited:
|
|
716
|
+
continue
|
|
717
|
+
visited.add(current_entity.id)
|
|
718
|
+
|
|
719
|
+
# Create path for this node
|
|
720
|
+
if current_depth > 0: # Don't add single-node paths
|
|
721
|
+
nodes_path = [entities_dict[start_entity_id]]
|
|
722
|
+
for edge in edges_path:
|
|
723
|
+
target_entity = entities_dict.get(edge.target_id)
|
|
724
|
+
if target_entity:
|
|
725
|
+
nodes_path.append(target_entity)
|
|
726
|
+
|
|
727
|
+
if len(nodes_path) == len(edges_path) + 1:
|
|
728
|
+
paths.append(Path(nodes=nodes_path, edges=edges_path))
|
|
729
|
+
|
|
730
|
+
# Explore neighbors if not at max depth
|
|
731
|
+
if current_depth < max_depth:
|
|
732
|
+
for target_id in graph.successors(current_entity.id):
|
|
733
|
+
if target_id in visited:
|
|
734
|
+
continue
|
|
735
|
+
|
|
736
|
+
# Get edge data for relation type filtering
|
|
737
|
+
edge_data = graph.get_edge_data(current_entity.id, target_id)
|
|
738
|
+
if edge_data:
|
|
739
|
+
relation = edge_data.get("relation")
|
|
740
|
+
if relation:
|
|
741
|
+
# Filter by relation type if specified
|
|
742
|
+
if relation_type and relation.relation_type != relation_type:
|
|
743
|
+
continue
|
|
744
|
+
|
|
745
|
+
target_entity = entities_dict.get(target_id)
|
|
746
|
+
if target_entity:
|
|
747
|
+
queue.append((target_entity, edges_path + [relation]))
|
|
748
|
+
|
|
749
|
+
return paths
|
|
750
|
+
|
|
751
|
+
async def vector_search(
|
|
752
|
+
self,
|
|
753
|
+
query_embedding: List[float],
|
|
754
|
+
entity_type: Optional[str] = None,
|
|
755
|
+
max_results: int = 10,
|
|
756
|
+
score_threshold: float = 0.0,
|
|
757
|
+
context: Optional[TenantContext] = None,
|
|
758
|
+
) -> List[Tuple[Entity, float]]:
|
|
759
|
+
"""
|
|
760
|
+
Optimized vector search for in-memory store
|
|
761
|
+
|
|
762
|
+
Performs brute-force cosine similarity over all entities with embeddings.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
query_embedding: Query vector
|
|
766
|
+
entity_type: Optional filter by entity type
|
|
767
|
+
max_results: Maximum number of results
|
|
768
|
+
score_threshold: Minimum similarity score (0.0-1.0)
|
|
769
|
+
context: Optional tenant context for multi-tenant isolation
|
|
770
|
+
|
|
771
|
+
Returns:
|
|
772
|
+
List of (entity, similarity_score) tuples, sorted descending
|
|
773
|
+
"""
|
|
774
|
+
if not self._initialized:
|
|
775
|
+
return []
|
|
776
|
+
|
|
777
|
+
if not query_embedding:
|
|
778
|
+
raise ValueError("Query embedding cannot be empty")
|
|
779
|
+
|
|
780
|
+
import numpy as np
|
|
781
|
+
|
|
782
|
+
tenant_id = self._get_tenant_id(context)
|
|
783
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
784
|
+
|
|
785
|
+
query_vec = np.array(query_embedding, dtype=np.float32)
|
|
786
|
+
query_norm = np.linalg.norm(query_vec)
|
|
787
|
+
|
|
788
|
+
if query_norm == 0:
|
|
789
|
+
return []
|
|
790
|
+
|
|
791
|
+
scored_entities: List[Tuple[Entity, float]] = []
|
|
792
|
+
|
|
793
|
+
for entity in entities_dict.values():
|
|
794
|
+
# Filter by entity type if specified
|
|
795
|
+
if entity_type and entity.entity_type != entity_type:
|
|
796
|
+
continue
|
|
797
|
+
|
|
798
|
+
# Skip entities without embeddings
|
|
799
|
+
if not entity.embedding:
|
|
800
|
+
continue
|
|
801
|
+
|
|
802
|
+
# Compute cosine similarity
|
|
803
|
+
entity_vec = np.array(entity.embedding, dtype=np.float32)
|
|
804
|
+
entity_norm = np.linalg.norm(entity_vec)
|
|
805
|
+
|
|
806
|
+
if entity_norm == 0:
|
|
807
|
+
continue
|
|
808
|
+
|
|
809
|
+
# Cosine similarity
|
|
810
|
+
similarity = np.dot(query_vec, entity_vec) / (query_norm * entity_norm)
|
|
811
|
+
# Normalize to 0-1 range
|
|
812
|
+
similarity = (similarity + 1) / 2
|
|
813
|
+
|
|
814
|
+
# Filter by threshold
|
|
815
|
+
if similarity >= score_threshold:
|
|
816
|
+
scored_entities.append((entity, float(similarity)))
|
|
817
|
+
|
|
818
|
+
# Sort by score descending and return top results
|
|
819
|
+
scored_entities.sort(key=lambda x: x[1], reverse=True)
|
|
820
|
+
return scored_entities[:max_results]
|
|
821
|
+
|
|
822
|
+
async def text_search(
|
|
823
|
+
self,
|
|
824
|
+
query_text: str,
|
|
825
|
+
entity_type: Optional[str] = None,
|
|
826
|
+
max_results: int = 10,
|
|
827
|
+
score_threshold: float = 0.0,
|
|
828
|
+
method: str = "bm25",
|
|
829
|
+
context: Optional[TenantContext] = None,
|
|
830
|
+
) -> List[Tuple[Entity, float]]:
|
|
831
|
+
"""
|
|
832
|
+
Optimized text search for in-memory store
|
|
833
|
+
|
|
834
|
+
Performs text similarity search over entity properties using BM25, Jaccard,
|
|
835
|
+
cosine similarity, or Levenshtein distance.
|
|
836
|
+
|
|
837
|
+
Args:
|
|
838
|
+
query_text: Query text string
|
|
839
|
+
entity_type: Optional filter by entity type
|
|
840
|
+
max_results: Maximum number of results
|
|
841
|
+
score_threshold: Minimum similarity score (0.0-1.0)
|
|
842
|
+
method: Similarity method ("bm25", "jaccard", "cosine", "levenshtein")
|
|
843
|
+
context: Optional tenant context for multi-tenant isolation
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
List of (entity, similarity_score) tuples, sorted descending
|
|
847
|
+
"""
|
|
848
|
+
if not self._initialized:
|
|
849
|
+
return []
|
|
850
|
+
|
|
851
|
+
if not query_text:
|
|
852
|
+
return []
|
|
853
|
+
|
|
854
|
+
from aiecs.application.knowledge_graph.search.text_similarity import (
|
|
855
|
+
BM25Scorer,
|
|
856
|
+
jaccard_similarity_text,
|
|
857
|
+
cosine_similarity_text,
|
|
858
|
+
normalized_levenshtein_similarity,
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
tenant_id = self._get_tenant_id(context)
|
|
862
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
863
|
+
|
|
864
|
+
# Get candidate entities
|
|
865
|
+
entities = list(entities_dict.values())
|
|
866
|
+
if entity_type:
|
|
867
|
+
entities = [e for e in entities if e.entity_type == entity_type]
|
|
868
|
+
|
|
869
|
+
if not entities:
|
|
870
|
+
return []
|
|
871
|
+
|
|
872
|
+
scored_entities: List[Tuple[Entity, float]] = []
|
|
873
|
+
|
|
874
|
+
# Extract text from entities (combine properties into searchable text)
|
|
875
|
+
entity_texts = []
|
|
876
|
+
for entity in entities:
|
|
877
|
+
# Combine all string properties into searchable text
|
|
878
|
+
text_parts = []
|
|
879
|
+
for key, value in entity.properties.items():
|
|
880
|
+
if isinstance(value, str):
|
|
881
|
+
text_parts.append(value)
|
|
882
|
+
elif isinstance(value, (list, tuple)):
|
|
883
|
+
text_parts.extend(str(v) for v in value if isinstance(v, str))
|
|
884
|
+
entity_text = " ".join(text_parts)
|
|
885
|
+
entity_texts.append((entity, entity_text))
|
|
886
|
+
|
|
887
|
+
if method == "bm25":
|
|
888
|
+
# Use BM25 scorer
|
|
889
|
+
corpus = [text for _, text in entity_texts]
|
|
890
|
+
scorer = BM25Scorer(corpus)
|
|
891
|
+
scores = scorer.score(query_text)
|
|
892
|
+
|
|
893
|
+
for (entity, _), score in zip(entity_texts, scores):
|
|
894
|
+
if score >= score_threshold:
|
|
895
|
+
scored_entities.append((entity, float(score)))
|
|
896
|
+
|
|
897
|
+
elif method == "jaccard":
|
|
898
|
+
for entity, text in entity_texts:
|
|
899
|
+
score = jaccard_similarity_text(query_text, text)
|
|
900
|
+
if score >= score_threshold:
|
|
901
|
+
scored_entities.append((entity, score))
|
|
902
|
+
|
|
903
|
+
elif method == "cosine":
|
|
904
|
+
for entity, text in entity_texts:
|
|
905
|
+
score = cosine_similarity_text(query_text, text)
|
|
906
|
+
if score >= score_threshold:
|
|
907
|
+
scored_entities.append((entity, score))
|
|
908
|
+
|
|
909
|
+
elif method == "levenshtein":
|
|
910
|
+
for entity, text in entity_texts:
|
|
911
|
+
score = normalized_levenshtein_similarity(query_text, text)
|
|
912
|
+
if score >= score_threshold:
|
|
913
|
+
scored_entities.append((entity, score))
|
|
914
|
+
|
|
915
|
+
else:
|
|
916
|
+
raise ValueError(f"Unknown text search method: {method}. Use 'bm25', 'jaccard', 'cosine', or 'levenshtein'")
|
|
917
|
+
|
|
918
|
+
# Sort by score descending and return top results
|
|
919
|
+
scored_entities.sort(key=lambda x: x[1], reverse=True)
|
|
920
|
+
return scored_entities[:max_results]
|
|
921
|
+
|
|
922
|
+
async def find_paths(
|
|
923
|
+
self,
|
|
924
|
+
source_entity_id: str,
|
|
925
|
+
target_entity_id: str,
|
|
926
|
+
max_depth: int = 5,
|
|
927
|
+
max_paths: int = 10,
|
|
928
|
+
context: Optional[TenantContext] = None,
|
|
929
|
+
) -> List:
|
|
930
|
+
"""
|
|
931
|
+
Optimized path finding using networkx algorithms
|
|
932
|
+
|
|
933
|
+
Overrides default implementation to use networkx.all_simple_paths
|
|
934
|
+
for better performance.
|
|
935
|
+
|
|
936
|
+
Args:
|
|
937
|
+
source_entity_id: Source entity ID
|
|
938
|
+
target_entity_id: Target entity ID
|
|
939
|
+
max_depth: Maximum path length
|
|
940
|
+
max_paths: Maximum number of paths to return
|
|
941
|
+
context: Optional tenant context for multi-tenant isolation
|
|
942
|
+
|
|
943
|
+
Returns:
|
|
944
|
+
List of paths between source and target
|
|
945
|
+
"""
|
|
946
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
947
|
+
|
|
948
|
+
if not self._initialized:
|
|
949
|
+
return []
|
|
950
|
+
|
|
951
|
+
tenant_id = self._get_tenant_id(context)
|
|
952
|
+
graph = self._get_graph(tenant_id)
|
|
953
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
954
|
+
|
|
955
|
+
if source_entity_id not in graph or target_entity_id not in graph:
|
|
956
|
+
return []
|
|
957
|
+
|
|
958
|
+
try:
|
|
959
|
+
# Use networkx's optimized path finding
|
|
960
|
+
paths = []
|
|
961
|
+
for node_path in nx.all_simple_paths(
|
|
962
|
+
graph,
|
|
963
|
+
source_entity_id,
|
|
964
|
+
target_entity_id,
|
|
965
|
+
cutoff=max_depth,
|
|
966
|
+
):
|
|
967
|
+
# Convert node IDs to Entity and Relation objects
|
|
968
|
+
entities = [entities_dict[node_id] for node_id in node_path if node_id in entities_dict]
|
|
969
|
+
|
|
970
|
+
# Get relations between consecutive nodes
|
|
971
|
+
edges = []
|
|
972
|
+
for i in range(len(node_path) - 1):
|
|
973
|
+
edge_data = graph.get_edge_data(node_path[i], node_path[i + 1])
|
|
974
|
+
if edge_data and "relation" in edge_data:
|
|
975
|
+
edges.append(edge_data["relation"])
|
|
976
|
+
|
|
977
|
+
if len(entities) == len(node_path):
|
|
978
|
+
paths.append(Path(nodes=entities, edges=edges))
|
|
979
|
+
|
|
980
|
+
if len(paths) >= max_paths:
|
|
981
|
+
break
|
|
982
|
+
|
|
983
|
+
return paths
|
|
984
|
+
|
|
985
|
+
except nx.NetworkXNoPath:
|
|
986
|
+
return []
|
|
987
|
+
|
|
988
|
+
# =========================================================================
|
|
989
|
+
# UTILITY METHODS
|
|
990
|
+
# =========================================================================
|
|
991
|
+
|
|
992
|
+
def get_stats(self, context: Optional[TenantContext] = None) -> Dict[str, int]:
|
|
993
|
+
"""
|
|
994
|
+
Get graph statistics
|
|
995
|
+
|
|
996
|
+
Args:
|
|
997
|
+
context: Optional tenant context for tenant-scoped stats
|
|
998
|
+
|
|
999
|
+
Returns:
|
|
1000
|
+
Dictionary with node count, edge count, etc.
|
|
1001
|
+
"""
|
|
1002
|
+
if not self._initialized:
|
|
1003
|
+
return {"nodes": 0, "edges": 0, "entities": 0, "relations": 0, "tenant_count": 0}
|
|
1004
|
+
|
|
1005
|
+
tenant_id = self._get_tenant_id(context)
|
|
1006
|
+
graph = self._get_graph(tenant_id)
|
|
1007
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
1008
|
+
relations_dict = self._get_relations_dict(tenant_id)
|
|
1009
|
+
|
|
1010
|
+
return {
|
|
1011
|
+
"nodes": graph.number_of_nodes(),
|
|
1012
|
+
"edges": graph.number_of_edges(),
|
|
1013
|
+
"entities": len(entities_dict),
|
|
1014
|
+
"relations": len(relations_dict),
|
|
1015
|
+
"tenant_count": len(self._tenant_graphs),
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
async def clear(self, context: Optional[TenantContext] = None) -> None:
|
|
1019
|
+
"""
|
|
1020
|
+
Clear data from the graph
|
|
1021
|
+
|
|
1022
|
+
Args:
|
|
1023
|
+
context: Optional tenant context for multi-tenant isolation.
|
|
1024
|
+
If provided, clears only data for the specified tenant.
|
|
1025
|
+
If None, clears all data including global and all tenants.
|
|
1026
|
+
"""
|
|
1027
|
+
if not self._initialized:
|
|
1028
|
+
return
|
|
1029
|
+
|
|
1030
|
+
tenant_id = self._get_tenant_id(context)
|
|
1031
|
+
|
|
1032
|
+
if tenant_id is None:
|
|
1033
|
+
# Clear all data (global + all tenants)
|
|
1034
|
+
if self._global_graph is not None:
|
|
1035
|
+
self._global_graph.clear()
|
|
1036
|
+
self._global_entities.clear()
|
|
1037
|
+
self._global_relations.clear()
|
|
1038
|
+
|
|
1039
|
+
# Clear all tenant data
|
|
1040
|
+
for tid in list(self._tenant_graphs.keys()):
|
|
1041
|
+
self._tenant_graphs[tid].clear()
|
|
1042
|
+
self._tenant_graphs.clear()
|
|
1043
|
+
self._tenant_entities.clear()
|
|
1044
|
+
self._tenant_relations.clear()
|
|
1045
|
+
|
|
1046
|
+
if self._property_optimizer is not None:
|
|
1047
|
+
self._property_optimizer.property_index.clear()
|
|
1048
|
+
else:
|
|
1049
|
+
# Clear only tenant-specific data
|
|
1050
|
+
if tenant_id in self._tenant_graphs:
|
|
1051
|
+
self._tenant_graphs[tenant_id].clear()
|
|
1052
|
+
del self._tenant_graphs[tenant_id]
|
|
1053
|
+
if tenant_id in self._tenant_entities:
|
|
1054
|
+
self._tenant_entities[tenant_id].clear()
|
|
1055
|
+
del self._tenant_entities[tenant_id]
|
|
1056
|
+
if tenant_id in self._tenant_relations:
|
|
1057
|
+
self._tenant_relations[tenant_id].clear()
|
|
1058
|
+
del self._tenant_relations[tenant_id]
|
|
1059
|
+
|
|
1060
|
+
# =========================================================================
|
|
1061
|
+
# PROPERTY OPTIMIZATION METHODS
|
|
1062
|
+
# =========================================================================
|
|
1063
|
+
|
|
1064
|
+
@property
|
|
1065
|
+
def property_optimizer(self) -> Optional[PropertyOptimizer]:
|
|
1066
|
+
"""Get the property optimizer if configured"""
|
|
1067
|
+
return self._property_optimizer
|
|
1068
|
+
|
|
1069
|
+
def lookup_by_property(self, property_name: str, value: Any, context: Optional[TenantContext] = None) -> Set[str]:
|
|
1070
|
+
"""
|
|
1071
|
+
Look up entity IDs by property value using the property index.
|
|
1072
|
+
|
|
1073
|
+
This is much faster than scanning all entities when the property is indexed.
|
|
1074
|
+
|
|
1075
|
+
Args:
|
|
1076
|
+
property_name: Property name to search
|
|
1077
|
+
value: Property value to match
|
|
1078
|
+
context: Optional tenant context for multi-tenant isolation
|
|
1079
|
+
|
|
1080
|
+
Returns:
|
|
1081
|
+
Set of matching entity IDs
|
|
1082
|
+
"""
|
|
1083
|
+
if self._property_optimizer is None:
|
|
1084
|
+
return set()
|
|
1085
|
+
|
|
1086
|
+
# Get all matching IDs from the index
|
|
1087
|
+
all_ids = self._property_optimizer.lookup_by_property(property_name, value)
|
|
1088
|
+
|
|
1089
|
+
# Filter by tenant
|
|
1090
|
+
tenant_id = self._get_tenant_id(context)
|
|
1091
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
1092
|
+
return {eid for eid in all_ids if eid in entities_dict}
|
|
1093
|
+
|
|
1094
|
+
async def get_entities_by_property(
|
|
1095
|
+
self,
|
|
1096
|
+
property_name: str,
|
|
1097
|
+
value: Any,
|
|
1098
|
+
context: Optional[TenantContext] = None,
|
|
1099
|
+
) -> List[Entity]:
|
|
1100
|
+
"""
|
|
1101
|
+
Get entities by property value.
|
|
1102
|
+
|
|
1103
|
+
Uses property index if available, otherwise scans all entities.
|
|
1104
|
+
|
|
1105
|
+
Args:
|
|
1106
|
+
property_name: Property name to search
|
|
1107
|
+
value: Property value to match
|
|
1108
|
+
context: Optional tenant context for multi-tenant isolation
|
|
1109
|
+
|
|
1110
|
+
Returns:
|
|
1111
|
+
List of matching entities
|
|
1112
|
+
"""
|
|
1113
|
+
tenant_id = self._get_tenant_id(context)
|
|
1114
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
1115
|
+
|
|
1116
|
+
# Try indexed lookup first
|
|
1117
|
+
if self._property_optimizer is not None:
|
|
1118
|
+
entity_ids = self._property_optimizer.lookup_by_property(property_name, value)
|
|
1119
|
+
if entity_ids:
|
|
1120
|
+
return [entities_dict[eid] for eid in entity_ids if eid in entities_dict]
|
|
1121
|
+
|
|
1122
|
+
# Fall back to scan
|
|
1123
|
+
return [
|
|
1124
|
+
entity for entity in entities_dict.values()
|
|
1125
|
+
if entity.properties.get(property_name) == value
|
|
1126
|
+
]
|
|
1127
|
+
|
|
1128
|
+
def add_indexed_property(self, property_name: str, context: Optional[TenantContext] = None) -> None:
|
|
1129
|
+
"""
|
|
1130
|
+
Add a property to the index for fast lookups.
|
|
1131
|
+
|
|
1132
|
+
Args:
|
|
1133
|
+
property_name: Property name to index
|
|
1134
|
+
context: Optional tenant context to index specific tenant's entities
|
|
1135
|
+
"""
|
|
1136
|
+
if self._property_optimizer is None:
|
|
1137
|
+
self._property_optimizer = PropertyOptimizer()
|
|
1138
|
+
|
|
1139
|
+
self._property_optimizer.add_indexed_property(property_name)
|
|
1140
|
+
|
|
1141
|
+
tenant_id = self._get_tenant_id(context)
|
|
1142
|
+
entities_dict = self._get_entities_dict(tenant_id)
|
|
1143
|
+
|
|
1144
|
+
# Index existing entities
|
|
1145
|
+
for entity_id, entity in entities_dict.items():
|
|
1146
|
+
if property_name in entity.properties:
|
|
1147
|
+
self._property_optimizer.property_index.add_to_index(
|
|
1148
|
+
entity_id, property_name, entity.properties[property_name]
|
|
1149
|
+
)
|
|
1150
|
+
|
|
1151
|
+
async def get_all_entities(
|
|
1152
|
+
self,
|
|
1153
|
+
entity_type: Optional[str] = None,
|
|
1154
|
+
limit: Optional[int] = None,
|
|
1155
|
+
offset: int = 0,
|
|
1156
|
+
context: Optional[TenantContext] = None,
|
|
1157
|
+
) -> List[Entity]:
|
|
1158
|
+
"""
|
|
1159
|
+
Get all entities in the graph store
|
|
1160
|
+
|
|
1161
|
+
Efficient implementation for InMemoryGraphStore that iterates through
|
|
1162
|
+
the entities dictionary.
|
|
1163
|
+
|
|
1164
|
+
Args:
|
|
1165
|
+
entity_type: Optional filter by entity type
|
|
1166
|
+
limit: Optional maximum number of entities to return
|
|
1167
|
+
offset: Number of entities to skip (for pagination)
|
|
1168
|
+
context: Optional tenant context for multi-tenant isolation
|
|
1169
|
+
|
|
1170
|
+
Returns:
|
|
1171
|
+
List of entities matching the criteria
|
|
1172
|
+
"""
|
|
1173
|
+
tenant_id = self._get_tenant_id(context)
|
|
1174
|
+
entities_dict = self._get_entities_dict(tenant_id, update_lru=False)
|
|
1175
|
+
|
|
1176
|
+
# Filter by entity type if specified
|
|
1177
|
+
entities = list(entities_dict.values())
|
|
1178
|
+
if entity_type:
|
|
1179
|
+
entities = [e for e in entities if e.entity_type == entity_type]
|
|
1180
|
+
|
|
1181
|
+
# Apply pagination
|
|
1182
|
+
if offset > 0:
|
|
1183
|
+
entities = entities[offset:]
|
|
1184
|
+
if limit is not None:
|
|
1185
|
+
entities = entities[:limit]
|
|
1186
|
+
|
|
1187
|
+
return entities
|
|
1188
|
+
|
|
1189
|
+
def __str__(self) -> str:
|
|
1190
|
+
stats = self.get_stats()
|
|
1191
|
+
return (
|
|
1192
|
+
f"InMemoryGraphStore(global_entities={stats['entities']}, "
|
|
1193
|
+
f"global_relations={stats['relations']}, tenant_count={stats['tenant_count']})"
|
|
1194
|
+
)
|
|
1195
|
+
|
|
1196
|
+
def __repr__(self) -> str:
|
|
1197
|
+
return self.__str__()
|