aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1563 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL Graph Storage Backend
|
|
3
|
+
|
|
4
|
+
Provides production-grade graph storage using PostgreSQL with:
|
|
5
|
+
- Connection pooling via asyncpg
|
|
6
|
+
- Transaction support
|
|
7
|
+
- Recursive CTEs for efficient graph traversal
|
|
8
|
+
- Optional pgvector support for vector similarity search
|
|
9
|
+
|
|
10
|
+
Multi-tenancy Support:
|
|
11
|
+
- SHARED_SCHEMA mode: Single schema with tenant_id column + optional RLS
|
|
12
|
+
- SEPARATE_SCHEMA mode: PostgreSQL schemas per tenant (CREATE SCHEMA tenant_xxx)
|
|
13
|
+
- Global namespace for tenant_id=NULL (backward compatible)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import asyncpg # type: ignore[import-untyped]
|
|
18
|
+
import logging
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, cast
|
|
20
|
+
from contextlib import asynccontextmanager
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
24
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
25
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
26
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
27
|
+
from aiecs.infrastructure.graph_storage.tenant import (
|
|
28
|
+
TenantContext,
|
|
29
|
+
TenantIsolationMode,
|
|
30
|
+
CrossTenantRelationError,
|
|
31
|
+
)
|
|
32
|
+
from aiecs.config.config import get_settings
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# PostgreSQL Schema for graph storage with multi-tenancy support
|
|
38
|
+
# Note: For existing databases, run MIGRATION_SQL first to add tenant_id columns
|
|
39
|
+
# Uses empty string '' as default for tenant_id to allow proper composite primary key
|
|
40
|
+
SCHEMA_SQL = """
|
|
41
|
+
-- Entities table with tenant_id for multi-tenancy
|
|
42
|
+
-- tenant_id = '' (empty string) for global namespace
|
|
43
|
+
CREATE TABLE IF NOT EXISTS graph_entities (
|
|
44
|
+
id TEXT NOT NULL,
|
|
45
|
+
tenant_id TEXT NOT NULL DEFAULT '', -- Empty string for global namespace
|
|
46
|
+
entity_type TEXT NOT NULL,
|
|
47
|
+
properties JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
48
|
+
embedding BYTEA,
|
|
49
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
50
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
51
|
+
PRIMARY KEY (id, tenant_id)
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
-- Relations table with tenant_id for multi-tenancy
|
|
55
|
+
CREATE TABLE IF NOT EXISTS graph_relations (
|
|
56
|
+
id TEXT NOT NULL,
|
|
57
|
+
tenant_id TEXT NOT NULL DEFAULT '', -- Empty string for global namespace
|
|
58
|
+
relation_type TEXT NOT NULL,
|
|
59
|
+
source_id TEXT NOT NULL,
|
|
60
|
+
target_id TEXT NOT NULL,
|
|
61
|
+
properties JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
62
|
+
weight REAL DEFAULT 1.0,
|
|
63
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
64
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
65
|
+
PRIMARY KEY (id, tenant_id)
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
-- Indexes for performance
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_type ON graph_entities(entity_type);
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant ON graph_entities(tenant_id);
|
|
71
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant_type ON graph_entities(tenant_id, entity_type);
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_properties ON graph_entities USING GIN(properties);
|
|
73
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_type ON graph_relations(relation_type);
|
|
74
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant ON graph_relations(tenant_id);
|
|
75
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_source ON graph_relations(source_id);
|
|
76
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_target ON graph_relations(target_id);
|
|
77
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_source ON graph_relations(tenant_id, source_id);
|
|
78
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_target ON graph_relations(tenant_id, target_id);
|
|
79
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_properties ON graph_relations USING GIN(properties);
|
|
80
|
+
|
|
81
|
+
-- Optional: Add pgvector extension support (if available)
|
|
82
|
+
-- CREATE EXTENSION IF NOT EXISTS vector;
|
|
83
|
+
-- ALTER TABLE graph_entities ADD COLUMN IF NOT EXISTS embedding_vector vector(1536);
|
|
84
|
+
-- CREATE INDEX IF NOT EXISTS idx_graph_entities_embedding ON graph_entities USING ivfflat (embedding_vector vector_cosine_ops);
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
# Migration SQL for existing databases (adds tenant_id columns if they don't exist)
|
|
88
|
+
MIGRATION_SQL = """
|
|
89
|
+
-- Add tenant_id column to entities if not exists
|
|
90
|
+
DO $$
|
|
91
|
+
BEGIN
|
|
92
|
+
IF NOT EXISTS (
|
|
93
|
+
SELECT 1 FROM information_schema.columns
|
|
94
|
+
WHERE table_name = 'graph_entities' AND column_name = 'tenant_id'
|
|
95
|
+
) THEN
|
|
96
|
+
-- Add tenant_id column with empty string default
|
|
97
|
+
ALTER TABLE graph_entities ADD COLUMN tenant_id TEXT NOT NULL DEFAULT '';
|
|
98
|
+
|
|
99
|
+
-- Drop old primary key if exists
|
|
100
|
+
ALTER TABLE graph_entities DROP CONSTRAINT IF EXISTS graph_entities_pkey;
|
|
101
|
+
|
|
102
|
+
-- Create new composite primary key
|
|
103
|
+
ALTER TABLE graph_entities ADD PRIMARY KEY (id, tenant_id);
|
|
104
|
+
|
|
105
|
+
-- Create indexes
|
|
106
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant ON graph_entities(tenant_id);
|
|
107
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_tenant_type ON graph_entities(tenant_id, entity_type);
|
|
108
|
+
END IF;
|
|
109
|
+
END $$;
|
|
110
|
+
|
|
111
|
+
-- Add tenant_id column to relations if not exists
|
|
112
|
+
DO $$
|
|
113
|
+
BEGIN
|
|
114
|
+
IF NOT EXISTS (
|
|
115
|
+
SELECT 1 FROM information_schema.columns
|
|
116
|
+
WHERE table_name = 'graph_relations' AND column_name = 'tenant_id'
|
|
117
|
+
) THEN
|
|
118
|
+
-- Add tenant_id column with empty string default
|
|
119
|
+
ALTER TABLE graph_relations ADD COLUMN tenant_id TEXT NOT NULL DEFAULT '';
|
|
120
|
+
|
|
121
|
+
-- Drop old primary key if exists
|
|
122
|
+
ALTER TABLE graph_relations DROP CONSTRAINT IF EXISTS graph_relations_pkey;
|
|
123
|
+
|
|
124
|
+
-- Create new composite primary key
|
|
125
|
+
ALTER TABLE graph_relations ADD PRIMARY KEY (id, tenant_id);
|
|
126
|
+
|
|
127
|
+
-- Create indexes
|
|
128
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant ON graph_relations(tenant_id);
|
|
129
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_source ON graph_relations(tenant_id, source_id);
|
|
130
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_tenant_target ON graph_relations(tenant_id, target_id);
|
|
131
|
+
END IF;
|
|
132
|
+
END $$;
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
# RLS (Row-Level Security) policies for SHARED_SCHEMA mode
|
|
136
|
+
RLS_SETUP_SQL = """
|
|
137
|
+
-- Enable RLS on tables
|
|
138
|
+
ALTER TABLE graph_entities ENABLE ROW LEVEL SECURITY;
|
|
139
|
+
ALTER TABLE graph_relations ENABLE ROW LEVEL SECURITY;
|
|
140
|
+
|
|
141
|
+
-- Force RLS even for table owners (important for superuser/owner connections)
|
|
142
|
+
ALTER TABLE graph_entities FORCE ROW LEVEL SECURITY;
|
|
143
|
+
ALTER TABLE graph_relations FORCE ROW LEVEL SECURITY;
|
|
144
|
+
|
|
145
|
+
-- Drop existing policies if they exist
|
|
146
|
+
DROP POLICY IF EXISTS tenant_isolation_entities ON graph_entities;
|
|
147
|
+
DROP POLICY IF EXISTS tenant_isolation_relations ON graph_relations;
|
|
148
|
+
|
|
149
|
+
-- Create RLS policies
|
|
150
|
+
-- Note: Uses current_setting('app.current_tenant_id', true) which returns empty string if not set
|
|
151
|
+
-- Empty string ('') represents the global namespace
|
|
152
|
+
CREATE POLICY tenant_isolation_entities ON graph_entities
|
|
153
|
+
USING (
|
|
154
|
+
tenant_id = '' OR
|
|
155
|
+
tenant_id = COALESCE(current_setting('app.current_tenant_id', true), '')
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
CREATE POLICY tenant_isolation_relations ON graph_relations
|
|
159
|
+
USING (
|
|
160
|
+
tenant_id = '' OR
|
|
161
|
+
tenant_id = COALESCE(current_setting('app.current_tenant_id', true), '')
|
|
162
|
+
);
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# Schema template for SEPARATE_SCHEMA mode
|
|
166
|
+
TENANT_SCHEMA_SQL = """
|
|
167
|
+
-- Create tenant schema
|
|
168
|
+
CREATE SCHEMA IF NOT EXISTS {schema_name};
|
|
169
|
+
|
|
170
|
+
-- Entities table in tenant schema
|
|
171
|
+
CREATE TABLE IF NOT EXISTS {schema_name}.graph_entities (
|
|
172
|
+
id TEXT PRIMARY KEY,
|
|
173
|
+
entity_type TEXT NOT NULL,
|
|
174
|
+
properties JSONB NOT NULL DEFAULT '{{}}'::jsonb,
|
|
175
|
+
embedding BYTEA,
|
|
176
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
177
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
-- Relations table in tenant schema
|
|
181
|
+
CREATE TABLE IF NOT EXISTS {schema_name}.graph_relations (
|
|
182
|
+
id TEXT PRIMARY KEY,
|
|
183
|
+
relation_type TEXT NOT NULL,
|
|
184
|
+
source_id TEXT NOT NULL,
|
|
185
|
+
target_id TEXT NOT NULL,
|
|
186
|
+
properties JSONB NOT NULL DEFAULT '{{}}'::jsonb,
|
|
187
|
+
weight REAL DEFAULT 1.0,
|
|
188
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
189
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
190
|
+
);
|
|
191
|
+
|
|
192
|
+
-- Indexes
|
|
193
|
+
CREATE INDEX IF NOT EXISTS idx_{schema_name}_entities_type ON {schema_name}.graph_entities(entity_type);
|
|
194
|
+
CREATE INDEX IF NOT EXISTS idx_{schema_name}_relations_type ON {schema_name}.graph_relations(relation_type);
|
|
195
|
+
CREATE INDEX IF NOT EXISTS idx_{schema_name}_relations_source ON {schema_name}.graph_relations(source_id);
|
|
196
|
+
CREATE INDEX IF NOT EXISTS idx_{schema_name}_relations_target ON {schema_name}.graph_relations(target_id);
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class PostgresGraphStore(GraphStore):
|
|
201
|
+
"""
|
|
202
|
+
PostgreSQL-based graph storage implementation
|
|
203
|
+
|
|
204
|
+
Provides production-grade persistent graph storage with:
|
|
205
|
+
- Connection pooling via asyncpg
|
|
206
|
+
- ACID transactions
|
|
207
|
+
- SQL-optimized queries with recursive CTEs
|
|
208
|
+
- JSONB for flexible property storage
|
|
209
|
+
- Optional pgvector for vector similarity search
|
|
210
|
+
|
|
211
|
+
Features:
|
|
212
|
+
- Production-ready with connection pooling
|
|
213
|
+
- Efficient graph traversal using WITH RECURSIVE
|
|
214
|
+
- Automatic schema initialization
|
|
215
|
+
- Transaction support
|
|
216
|
+
- JSONB indexing for fast property queries
|
|
217
|
+
|
|
218
|
+
Multi-Tenancy Support:
|
|
219
|
+
- SHARED_SCHEMA mode: Single schema with tenant_id column + optional RLS
|
|
220
|
+
- SEPARATE_SCHEMA mode: PostgreSQL schemas per tenant (CREATE SCHEMA tenant_xxx)
|
|
221
|
+
- Global namespace for tenant_id=NULL (backward compatible)
|
|
222
|
+
- Row-Level Security (RLS) for automatic tenant filtering
|
|
223
|
+
|
|
224
|
+
Example:
|
|
225
|
+
```python
|
|
226
|
+
from aiecs.infrastructure.graph_storage import PostgresGraphStore
|
|
227
|
+
|
|
228
|
+
# Using config from settings
|
|
229
|
+
store = PostgresGraphStore()
|
|
230
|
+
await store.initialize()
|
|
231
|
+
|
|
232
|
+
# Multi-tenant with RLS
|
|
233
|
+
store = PostgresGraphStore(
|
|
234
|
+
isolation_mode=TenantIsolationMode.SHARED_SCHEMA,
|
|
235
|
+
enable_rls=True
|
|
236
|
+
)
|
|
237
|
+
await store.initialize()
|
|
238
|
+
|
|
239
|
+
# Multi-tenant usage
|
|
240
|
+
from aiecs.infrastructure.graph_storage.tenant import TenantContext
|
|
241
|
+
context = TenantContext(tenant_id="acme-corp")
|
|
242
|
+
await store.add_entity(entity, context=context)
|
|
243
|
+
|
|
244
|
+
await store.close()
|
|
245
|
+
```
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
def __init__(
|
|
249
|
+
self,
|
|
250
|
+
host: Optional[str] = None,
|
|
251
|
+
port: Optional[int] = None,
|
|
252
|
+
user: Optional[str] = None,
|
|
253
|
+
password: Optional[str] = None,
|
|
254
|
+
database: Optional[str] = None,
|
|
255
|
+
min_pool_size: int = 5,
|
|
256
|
+
max_pool_size: int = 20,
|
|
257
|
+
enable_pgvector: bool = False,
|
|
258
|
+
isolation_mode: TenantIsolationMode = TenantIsolationMode.SHARED_SCHEMA,
|
|
259
|
+
enable_rls: bool = False,
|
|
260
|
+
pool: Optional[asyncpg.Pool] = None,
|
|
261
|
+
database_manager: Optional[Any] = None,
|
|
262
|
+
**kwargs,
|
|
263
|
+
):
|
|
264
|
+
"""
|
|
265
|
+
Initialize PostgreSQL graph store
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
host: PostgreSQL host (defaults from config)
|
|
269
|
+
port: PostgreSQL port (defaults from config)
|
|
270
|
+
user: PostgreSQL user (defaults from config)
|
|
271
|
+
password: PostgreSQL password (defaults from config)
|
|
272
|
+
database: Database name (defaults from config)
|
|
273
|
+
min_pool_size: Minimum connection pool size
|
|
274
|
+
max_pool_size: Maximum connection pool size
|
|
275
|
+
enable_pgvector: Enable pgvector extension for vector search
|
|
276
|
+
isolation_mode: Tenant isolation mode (SHARED_SCHEMA or SEPARATE_SCHEMA)
|
|
277
|
+
enable_rls: Enable Row-Level Security for SHARED_SCHEMA mode
|
|
278
|
+
pool: Optional existing asyncpg pool to reuse (from DatabaseManager)
|
|
279
|
+
database_manager: Optional DatabaseManager instance to reuse its pool
|
|
280
|
+
**kwargs: Additional asyncpg connection parameters
|
|
281
|
+
"""
|
|
282
|
+
super().__init__()
|
|
283
|
+
|
|
284
|
+
# Multi-tenancy configuration
|
|
285
|
+
self.isolation_mode = isolation_mode
|
|
286
|
+
self.enable_rls = enable_rls
|
|
287
|
+
self._initialized_tenant_schemas: set = set() # Track created tenant schemas
|
|
288
|
+
|
|
289
|
+
# Option 1: Reuse existing pool
|
|
290
|
+
self._external_pool = pool
|
|
291
|
+
self._owns_pool = pool is None and database_manager is None
|
|
292
|
+
|
|
293
|
+
# Option 2: Reuse DatabaseManager's pool
|
|
294
|
+
if database_manager is not None:
|
|
295
|
+
self._external_pool = getattr(database_manager, "connection_pool", None)
|
|
296
|
+
if self._external_pool:
|
|
297
|
+
logger.info("Reusing DatabaseManager's connection pool")
|
|
298
|
+
self._owns_pool = False
|
|
299
|
+
|
|
300
|
+
# Load config from settings if not provided (needed for own pool creation)
|
|
301
|
+
# Support both connection string (dsn) and individual parameters
|
|
302
|
+
self.dsn = None
|
|
303
|
+
if not all([host, port, user, password, database]):
|
|
304
|
+
settings = get_settings()
|
|
305
|
+
db_config = settings.database_config
|
|
306
|
+
|
|
307
|
+
# Check if connection string (dsn) is provided (for cloud
|
|
308
|
+
# databases)
|
|
309
|
+
if "dsn" in db_config:
|
|
310
|
+
self.dsn = db_config["dsn"]
|
|
311
|
+
# Still set defaults for logging/display purposes
|
|
312
|
+
host = host or "cloud"
|
|
313
|
+
port = port or 5432
|
|
314
|
+
user = user or "postgres"
|
|
315
|
+
password = password or ""
|
|
316
|
+
database = database or "aiecs"
|
|
317
|
+
else:
|
|
318
|
+
# Use individual parameters (for local databases)
|
|
319
|
+
host = host or db_config.get("host", "localhost")
|
|
320
|
+
port = port or db_config.get("port", 5432)
|
|
321
|
+
user = user or db_config.get("user", "postgres")
|
|
322
|
+
password = password or db_config.get("password", "")
|
|
323
|
+
database = database or db_config.get("database", "aiecs")
|
|
324
|
+
|
|
325
|
+
self.host = host
|
|
326
|
+
self.port = port
|
|
327
|
+
self.user = user
|
|
328
|
+
self.password = password
|
|
329
|
+
self.database = database
|
|
330
|
+
self.min_pool_size = min_pool_size
|
|
331
|
+
self.max_pool_size = max_pool_size
|
|
332
|
+
self.enable_pgvector = enable_pgvector
|
|
333
|
+
self.conn_kwargs = kwargs
|
|
334
|
+
|
|
335
|
+
self.pool: Optional[asyncpg.Pool] = self._external_pool
|
|
336
|
+
self._is_initialized = False
|
|
337
|
+
self._transaction_conn: Optional[asyncpg.Connection] = None
|
|
338
|
+
|
|
339
|
+
def _ensure_pool(self) -> asyncpg.Pool:
|
|
340
|
+
"""Ensure pool is initialized and return it."""
|
|
341
|
+
if self.pool is None:
|
|
342
|
+
raise RuntimeError("Connection pool not initialized")
|
|
343
|
+
return self.pool
|
|
344
|
+
|
|
345
|
+
async def initialize(self):
|
|
346
|
+
"""Initialize PostgreSQL connection pool and create schema"""
|
|
347
|
+
try:
|
|
348
|
+
# Create connection pool only if we don't have an external one
|
|
349
|
+
if self._owns_pool:
|
|
350
|
+
# Use connection string (dsn) if available (for cloud databases)
|
|
351
|
+
# Otherwise use individual parameters (for local databases)
|
|
352
|
+
if self.dsn:
|
|
353
|
+
self.pool = await asyncpg.create_pool(
|
|
354
|
+
dsn=self.dsn,
|
|
355
|
+
min_size=self.min_pool_size,
|
|
356
|
+
max_size=self.max_pool_size,
|
|
357
|
+
**self.conn_kwargs,
|
|
358
|
+
)
|
|
359
|
+
logger.info("PostgreSQL connection pool created using connection string (cloud/local)")
|
|
360
|
+
else:
|
|
361
|
+
self.pool = await asyncpg.create_pool(
|
|
362
|
+
host=self.host,
|
|
363
|
+
port=self.port,
|
|
364
|
+
user=self.user,
|
|
365
|
+
password=self.password,
|
|
366
|
+
database=self.database,
|
|
367
|
+
min_size=self.min_pool_size,
|
|
368
|
+
max_size=self.max_pool_size,
|
|
369
|
+
**self.conn_kwargs,
|
|
370
|
+
)
|
|
371
|
+
logger.info(f"PostgreSQL connection pool created: {self.host}:{self.port}/{self.database}")
|
|
372
|
+
else:
|
|
373
|
+
logger.info("Using external PostgreSQL connection pool (shared with AIECS DatabaseManager)")
|
|
374
|
+
|
|
375
|
+
# Create schema
|
|
376
|
+
pool = self._ensure_pool()
|
|
377
|
+
async with pool.acquire() as conn:
|
|
378
|
+
# Optionally enable pgvector first
|
|
379
|
+
if self.enable_pgvector:
|
|
380
|
+
try:
|
|
381
|
+
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
382
|
+
logger.info("pgvector extension enabled")
|
|
383
|
+
except Exception as e:
|
|
384
|
+
logger.warning(f"Failed to enable pgvector: {e}. Continuing without vector support.")
|
|
385
|
+
self.enable_pgvector = False
|
|
386
|
+
|
|
387
|
+
# Check if tables exist and need migration
|
|
388
|
+
tables_exist = await conn.fetchval(
|
|
389
|
+
"""
|
|
390
|
+
SELECT EXISTS (
|
|
391
|
+
SELECT 1 FROM information_schema.tables
|
|
392
|
+
WHERE table_name = 'graph_entities'
|
|
393
|
+
)
|
|
394
|
+
"""
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
if tables_exist:
|
|
398
|
+
# Run migration for existing databases to add tenant_id
|
|
399
|
+
try:
|
|
400
|
+
await conn.execute(MIGRATION_SQL)
|
|
401
|
+
logger.info("Database migration for multi-tenancy completed")
|
|
402
|
+
except Exception as e:
|
|
403
|
+
logger.warning(f"Migration may have already been applied: {e}")
|
|
404
|
+
else:
|
|
405
|
+
# Execute schema creation for new databases
|
|
406
|
+
await conn.execute(SCHEMA_SQL)
|
|
407
|
+
|
|
408
|
+
# Add vector column if pgvector is enabled
|
|
409
|
+
if self.enable_pgvector:
|
|
410
|
+
try:
|
|
411
|
+
# Check if vector column exists
|
|
412
|
+
column_exists = await conn.fetchval(
|
|
413
|
+
"""
|
|
414
|
+
SELECT EXISTS (
|
|
415
|
+
SELECT 1 FROM information_schema.columns
|
|
416
|
+
WHERE table_name = 'graph_entities'
|
|
417
|
+
AND column_name = 'embedding_vector'
|
|
418
|
+
)
|
|
419
|
+
"""
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
if not column_exists:
|
|
423
|
+
# Add vector column (default dimension 1536, can be
|
|
424
|
+
# adjusted)
|
|
425
|
+
await conn.execute(
|
|
426
|
+
"""
|
|
427
|
+
ALTER TABLE graph_entities
|
|
428
|
+
ADD COLUMN embedding_vector vector(1536)
|
|
429
|
+
"""
|
|
430
|
+
)
|
|
431
|
+
logger.info("Added embedding_vector column")
|
|
432
|
+
|
|
433
|
+
# Create index if it doesn't exist
|
|
434
|
+
index_exists = await conn.fetchval(
|
|
435
|
+
"""
|
|
436
|
+
SELECT EXISTS (
|
|
437
|
+
SELECT 1 FROM pg_indexes
|
|
438
|
+
WHERE tablename = 'graph_entities'
|
|
439
|
+
AND indexname = 'idx_graph_entities_embedding'
|
|
440
|
+
)
|
|
441
|
+
"""
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
if not index_exists:
|
|
445
|
+
await conn.execute(
|
|
446
|
+
"""
|
|
447
|
+
CREATE INDEX idx_graph_entities_embedding
|
|
448
|
+
ON graph_entities USING ivfflat (embedding_vector vector_cosine_ops)
|
|
449
|
+
WITH (lists = 100)
|
|
450
|
+
"""
|
|
451
|
+
)
|
|
452
|
+
logger.info("Created vector similarity index")
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.warning(f"Failed to set up pgvector column/index: {e}")
|
|
455
|
+
|
|
456
|
+
# Set up RLS if enabled for SHARED_SCHEMA mode
|
|
457
|
+
if self.enable_rls and self.isolation_mode == TenantIsolationMode.SHARED_SCHEMA:
|
|
458
|
+
try:
|
|
459
|
+
await conn.execute(RLS_SETUP_SQL)
|
|
460
|
+
logger.info("Row-Level Security (RLS) policies enabled")
|
|
461
|
+
except Exception as e:
|
|
462
|
+
logger.warning(f"Failed to set up RLS: {e}. Continuing without RLS.")
|
|
463
|
+
self.enable_rls = False
|
|
464
|
+
|
|
465
|
+
self._is_initialized = True
|
|
466
|
+
self._initialized_tenant_schemas = set()
|
|
467
|
+
logger.info("PostgreSQL graph store initialized successfully")
|
|
468
|
+
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.error(f"Failed to initialize PostgreSQL graph store: {e}")
|
|
471
|
+
raise
|
|
472
|
+
|
|
473
|
+
async def close(self):
|
|
474
|
+
"""Close database connection pool (only if we own it)"""
|
|
475
|
+
if self.pool and self._owns_pool:
|
|
476
|
+
await self.pool.close()
|
|
477
|
+
self.pool = None
|
|
478
|
+
logger.info("PostgreSQL connection pool closed")
|
|
479
|
+
elif self.pool and not self._owns_pool:
|
|
480
|
+
logger.info("Detaching from shared PostgreSQL connection pool (not closing)")
|
|
481
|
+
self.pool = None
|
|
482
|
+
self._is_initialized = False
|
|
483
|
+
self._initialized_tenant_schemas = set()
|
|
484
|
+
|
|
485
|
+
# =========================================================================
|
|
486
|
+
# Multi-Tenancy Helpers
|
|
487
|
+
# =========================================================================
|
|
488
|
+
|
|
489
|
+
def _get_tenant_id(self, context: Optional[TenantContext]) -> str:
|
|
490
|
+
"""Extract tenant_id from context, returns empty string for global namespace."""
|
|
491
|
+
return context.tenant_id if context and context.tenant_id else ""
|
|
492
|
+
|
|
493
|
+
def _get_schema_name(self, tenant_id: Optional[str]) -> str:
|
|
494
|
+
"""
|
|
495
|
+
Get schema name for SEPARATE_SCHEMA mode.
|
|
496
|
+
|
|
497
|
+
Returns 'public' for global namespace or 'tenant_xxx' for tenants.
|
|
498
|
+
"""
|
|
499
|
+
if tenant_id is None:
|
|
500
|
+
return "public"
|
|
501
|
+
# Sanitize tenant_id for use in schema name
|
|
502
|
+
safe_tenant = tenant_id.replace("-", "_")
|
|
503
|
+
return f"tenant_{safe_tenant}"
|
|
504
|
+
|
|
505
|
+
async def _ensure_tenant_schema(self, conn: asyncpg.Connection, tenant_id: str) -> None:
|
|
506
|
+
"""
|
|
507
|
+
Ensure tenant-specific schema exists for SEPARATE_SCHEMA mode.
|
|
508
|
+
"""
|
|
509
|
+
if self.isolation_mode != TenantIsolationMode.SEPARATE_SCHEMA:
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
if tenant_id in self._initialized_tenant_schemas:
|
|
513
|
+
return
|
|
514
|
+
|
|
515
|
+
schema_name = self._get_schema_name(tenant_id)
|
|
516
|
+
schema_sql = TENANT_SCHEMA_SQL.format(schema_name=schema_name)
|
|
517
|
+
|
|
518
|
+
await conn.execute(schema_sql)
|
|
519
|
+
self._initialized_tenant_schemas.add(tenant_id)
|
|
520
|
+
logger.info(f"Created tenant schema: {schema_name}")
|
|
521
|
+
|
|
522
|
+
async def _set_tenant_context(self, conn: asyncpg.Connection, tenant_id: str) -> None:
|
|
523
|
+
"""
|
|
524
|
+
Set tenant context for RLS or search_path based on isolation mode.
|
|
525
|
+
|
|
526
|
+
For SHARED_SCHEMA with RLS: SET LOCAL app.current_tenant = 'tenant_id'
|
|
527
|
+
For SEPARATE_SCHEMA: SET search_path = tenant_xxx, public
|
|
528
|
+
"""
|
|
529
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
530
|
+
schema_name = self._get_schema_name(tenant_id if tenant_id else None)
|
|
531
|
+
await conn.execute(f"SET search_path = {schema_name}, public")
|
|
532
|
+
logger.debug(f"Set search_path to {schema_name}")
|
|
533
|
+
elif self.enable_rls:
|
|
534
|
+
# Set app.current_tenant for RLS policies (empty string for global)
|
|
535
|
+
# Use SET LOCAL to scope to current transaction in connection pool
|
|
536
|
+
await conn.execute(f"SET LOCAL app.current_tenant_id = '{tenant_id}'")
|
|
537
|
+
logger.debug(f"Set LOCAL app.current_tenant_id = '{tenant_id}'")
|
|
538
|
+
# Verify it was set
|
|
539
|
+
check = await conn.fetchval("SELECT current_setting('app.current_tenant_id', true)")
|
|
540
|
+
logger.debug(f"Verified app.current_tenant_id = '{check}'")
|
|
541
|
+
|
|
542
|
+
def _build_tenant_filter(self, tenant_id: str, table_alias: str = "") -> Tuple[str, List]:
|
|
543
|
+
"""
|
|
544
|
+
Build SQL WHERE clause for tenant filtering in SHARED_SCHEMA mode without RLS.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Tuple of (WHERE clause fragment, parameters list)
|
|
548
|
+
"""
|
|
549
|
+
prefix = f"{table_alias}." if table_alias else ""
|
|
550
|
+
# tenant_id is always a string, empty string for global namespace
|
|
551
|
+
return f"{prefix}tenant_id = ${{param}}", [tenant_id]
|
|
552
|
+
|
|
553
|
+
@asynccontextmanager
|
|
554
|
+
async def transaction(self):
|
|
555
|
+
"""
|
|
556
|
+
Transaction context manager for atomic operations
|
|
557
|
+
|
|
558
|
+
Usage:
|
|
559
|
+
```python
|
|
560
|
+
async with store.transaction():
|
|
561
|
+
await store.add_entity(entity1)
|
|
562
|
+
await store.add_entity(entity2)
|
|
563
|
+
# Both entities added atomically
|
|
564
|
+
```
|
|
565
|
+
"""
|
|
566
|
+
if not self._is_initialized:
|
|
567
|
+
raise RuntimeError("GraphStore not initialized")
|
|
568
|
+
|
|
569
|
+
pool = self._ensure_pool()
|
|
570
|
+
async with pool.acquire() as conn:
|
|
571
|
+
async with conn.transaction():
|
|
572
|
+
# Store connection for use within transaction
|
|
573
|
+
old_conn = self._transaction_conn
|
|
574
|
+
self._transaction_conn = conn
|
|
575
|
+
try:
|
|
576
|
+
yield conn
|
|
577
|
+
finally:
|
|
578
|
+
self._transaction_conn = old_conn
|
|
579
|
+
|
|
580
|
+
async def _get_connection(self):
|
|
581
|
+
"""Get connection from pool or transaction"""
|
|
582
|
+
if self._transaction_conn:
|
|
583
|
+
return self._transaction_conn
|
|
584
|
+
return self.pool.acquire()
|
|
585
|
+
|
|
586
|
+
# =========================================================================
|
|
587
|
+
# Tier 1: Basic Interface (PostgreSQL-optimized implementations)
|
|
588
|
+
# =========================================================================
|
|
589
|
+
|
|
590
|
+
async def add_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
|
|
591
|
+
"""
|
|
592
|
+
Add entity to PostgreSQL database
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
entity: Entity to add
|
|
596
|
+
context: Optional tenant context for multi-tenant isolation
|
|
597
|
+
"""
|
|
598
|
+
if not self._is_initialized:
|
|
599
|
+
raise RuntimeError("GraphStore not initialized")
|
|
600
|
+
|
|
601
|
+
tenant_id = self._get_tenant_id(context)
|
|
602
|
+
logger.debug(f"add_entity called with entity_id='{entity.id}', tenant_id='{tenant_id}', enable_rls={self.enable_rls}")
|
|
603
|
+
|
|
604
|
+
# Set tenant_id on entity if context provided
|
|
605
|
+
if tenant_id is not None and entity.tenant_id is None:
|
|
606
|
+
entity.tenant_id = tenant_id
|
|
607
|
+
|
|
608
|
+
# Serialize data
|
|
609
|
+
properties_json = json.dumps(entity.properties)
|
|
610
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
|
|
611
|
+
|
|
612
|
+
async def _execute(conn: asyncpg.Connection):
|
|
613
|
+
# Set tenant context (search_path or RLS)
|
|
614
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA and tenant_id:
|
|
615
|
+
await self._ensure_tenant_schema(conn, tenant_id)
|
|
616
|
+
|
|
617
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
618
|
+
# SEPARATE_SCHEMA: No tenant_id column
|
|
619
|
+
await conn.execute(
|
|
620
|
+
"""
|
|
621
|
+
INSERT INTO graph_entities (id, entity_type, properties, embedding)
|
|
622
|
+
VALUES ($1, $2, $3::jsonb, $4)
|
|
623
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
624
|
+
entity_type = EXCLUDED.entity_type,
|
|
625
|
+
properties = EXCLUDED.properties,
|
|
626
|
+
embedding = EXCLUDED.embedding,
|
|
627
|
+
updated_at = CURRENT_TIMESTAMP
|
|
628
|
+
""",
|
|
629
|
+
entity.id,
|
|
630
|
+
entity.entity_type,
|
|
631
|
+
properties_json,
|
|
632
|
+
embedding_blob,
|
|
633
|
+
)
|
|
634
|
+
else:
|
|
635
|
+
# SHARED_SCHEMA: Include tenant_id column
|
|
636
|
+
await conn.execute(
|
|
637
|
+
"""
|
|
638
|
+
INSERT INTO graph_entities (id, tenant_id, entity_type, properties, embedding)
|
|
639
|
+
VALUES ($1, $2, $3, $4::jsonb, $5)
|
|
640
|
+
ON CONFLICT (id, tenant_id) DO UPDATE SET
|
|
641
|
+
entity_type = EXCLUDED.entity_type,
|
|
642
|
+
properties = EXCLUDED.properties,
|
|
643
|
+
embedding = EXCLUDED.embedding,
|
|
644
|
+
updated_at = CURRENT_TIMESTAMP
|
|
645
|
+
""",
|
|
646
|
+
entity.id,
|
|
647
|
+
tenant_id,
|
|
648
|
+
entity.entity_type,
|
|
649
|
+
properties_json,
|
|
650
|
+
embedding_blob,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
if self._transaction_conn:
|
|
654
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
655
|
+
await _execute(self._transaction_conn)
|
|
656
|
+
else:
|
|
657
|
+
pool = self._ensure_pool()
|
|
658
|
+
async with pool.acquire() as conn:
|
|
659
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
660
|
+
if self.enable_rls:
|
|
661
|
+
async with conn.transaction():
|
|
662
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
663
|
+
await _execute(conn)
|
|
664
|
+
else:
|
|
665
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
666
|
+
await _execute(conn)
|
|
667
|
+
|
|
668
|
+
async def get_entity(self, entity_id: str, context: Optional[TenantContext] = None) -> Optional[Entity]:
|
|
669
|
+
"""
|
|
670
|
+
Get entity from PostgreSQL database
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
entity_id: Entity ID to retrieve
|
|
674
|
+
context: Optional tenant context for multi-tenant isolation
|
|
675
|
+
"""
|
|
676
|
+
if not self._is_initialized:
|
|
677
|
+
raise RuntimeError("GraphStore not initialized")
|
|
678
|
+
|
|
679
|
+
tenant_id = self._get_tenant_id(context)
|
|
680
|
+
|
|
681
|
+
async def _fetch(conn: asyncpg.Connection):
|
|
682
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
683
|
+
return await conn.fetchrow(
|
|
684
|
+
"""
|
|
685
|
+
SELECT id, entity_type, properties, embedding
|
|
686
|
+
FROM graph_entities
|
|
687
|
+
WHERE id = $1
|
|
688
|
+
""",
|
|
689
|
+
entity_id,
|
|
690
|
+
)
|
|
691
|
+
elif self.enable_rls:
|
|
692
|
+
# RLS will filter automatically
|
|
693
|
+
return await conn.fetchrow(
|
|
694
|
+
"""
|
|
695
|
+
SELECT id, tenant_id, entity_type, properties, embedding
|
|
696
|
+
FROM graph_entities
|
|
697
|
+
WHERE id = $1
|
|
698
|
+
""",
|
|
699
|
+
entity_id,
|
|
700
|
+
)
|
|
701
|
+
else:
|
|
702
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
703
|
+
return await conn.fetchrow(
|
|
704
|
+
"""
|
|
705
|
+
SELECT id, tenant_id, entity_type, properties, embedding
|
|
706
|
+
FROM graph_entities
|
|
707
|
+
WHERE id = $1 AND tenant_id = $2
|
|
708
|
+
""",
|
|
709
|
+
entity_id,
|
|
710
|
+
tenant_id,
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
if self._transaction_conn:
|
|
714
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
715
|
+
row = await _fetch(self._transaction_conn)
|
|
716
|
+
else:
|
|
717
|
+
pool = self._ensure_pool()
|
|
718
|
+
async with pool.acquire() as conn:
|
|
719
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
720
|
+
if self.enable_rls:
|
|
721
|
+
async with conn.transaction():
|
|
722
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
723
|
+
row = await _fetch(conn)
|
|
724
|
+
else:
|
|
725
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
726
|
+
row = await _fetch(conn)
|
|
727
|
+
|
|
728
|
+
if not row:
|
|
729
|
+
return None
|
|
730
|
+
|
|
731
|
+
# Deserialize
|
|
732
|
+
properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
|
|
733
|
+
embedding_raw = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
|
|
734
|
+
embedding: Optional[List[float]] = cast(List[float], embedding_raw.tolist()) if embedding_raw is not None else None
|
|
735
|
+
|
|
736
|
+
# Get tenant_id from row or context
|
|
737
|
+
row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
|
|
738
|
+
|
|
739
|
+
return Entity(
|
|
740
|
+
id=row["id"],
|
|
741
|
+
entity_type=row["entity_type"],
|
|
742
|
+
properties=properties,
|
|
743
|
+
embedding=embedding,
|
|
744
|
+
tenant_id=row_tenant_id,
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
async def update_entity(self, entity: Entity, context: Optional[TenantContext] = None) -> None:
|
|
748
|
+
"""
|
|
749
|
+
Update entity in PostgreSQL database
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
entity: Entity to update
|
|
753
|
+
context: Optional tenant context for multi-tenant isolation
|
|
754
|
+
"""
|
|
755
|
+
if not self._is_initialized:
|
|
756
|
+
raise RuntimeError("GraphStore not initialized")
|
|
757
|
+
|
|
758
|
+
tenant_id = self._get_tenant_id(context)
|
|
759
|
+
properties_json = json.dumps(entity.properties)
|
|
760
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
|
|
761
|
+
|
|
762
|
+
async def _execute(conn: asyncpg.Connection):
|
|
763
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
764
|
+
return await conn.execute(
|
|
765
|
+
"""
|
|
766
|
+
UPDATE graph_entities
|
|
767
|
+
SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
|
|
768
|
+
WHERE id = $1
|
|
769
|
+
""",
|
|
770
|
+
entity.id,
|
|
771
|
+
entity.entity_type,
|
|
772
|
+
properties_json,
|
|
773
|
+
embedding_blob,
|
|
774
|
+
)
|
|
775
|
+
elif self.enable_rls:
|
|
776
|
+
return await conn.execute(
|
|
777
|
+
"""
|
|
778
|
+
UPDATE graph_entities
|
|
779
|
+
SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
|
|
780
|
+
WHERE id = $1
|
|
781
|
+
""",
|
|
782
|
+
entity.id,
|
|
783
|
+
entity.entity_type,
|
|
784
|
+
properties_json,
|
|
785
|
+
embedding_blob,
|
|
786
|
+
)
|
|
787
|
+
else:
|
|
788
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
789
|
+
return await conn.execute(
|
|
790
|
+
"""
|
|
791
|
+
UPDATE graph_entities
|
|
792
|
+
SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
|
|
793
|
+
WHERE id = $1 AND tenant_id = $5
|
|
794
|
+
""",
|
|
795
|
+
entity.id,
|
|
796
|
+
entity.entity_type,
|
|
797
|
+
properties_json,
|
|
798
|
+
embedding_blob,
|
|
799
|
+
tenant_id,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
if self._transaction_conn:
|
|
803
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
804
|
+
result = await _execute(self._transaction_conn)
|
|
805
|
+
else:
|
|
806
|
+
pool = self._ensure_pool()
|
|
807
|
+
async with pool.acquire() as conn:
|
|
808
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
809
|
+
if self.enable_rls:
|
|
810
|
+
async with conn.transaction():
|
|
811
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
812
|
+
result = await _execute(conn)
|
|
813
|
+
else:
|
|
814
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
815
|
+
result = await _execute(conn)
|
|
816
|
+
|
|
817
|
+
if result == "UPDATE 0":
|
|
818
|
+
raise ValueError(f"Entity with ID '{entity.id}' not found")
|
|
819
|
+
|
|
820
|
+
async def delete_entity(self, entity_id: str, context: Optional[TenantContext] = None) -> None:
|
|
821
|
+
"""
|
|
822
|
+
Delete entity from PostgreSQL database
|
|
823
|
+
|
|
824
|
+
Args:
|
|
825
|
+
entity_id: Entity ID to delete
|
|
826
|
+
context: Optional tenant context for multi-tenant isolation
|
|
827
|
+
"""
|
|
828
|
+
if not self._is_initialized:
|
|
829
|
+
raise RuntimeError("GraphStore not initialized")
|
|
830
|
+
|
|
831
|
+
tenant_id = self._get_tenant_id(context)
|
|
832
|
+
|
|
833
|
+
async def _execute(conn: asyncpg.Connection):
|
|
834
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls:
|
|
835
|
+
# Delete relations first
|
|
836
|
+
await conn.execute(
|
|
837
|
+
"DELETE FROM graph_relations WHERE source_id = $1 OR target_id = $1",
|
|
838
|
+
entity_id
|
|
839
|
+
)
|
|
840
|
+
return await conn.execute("DELETE FROM graph_entities WHERE id = $1", entity_id)
|
|
841
|
+
else:
|
|
842
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
843
|
+
await conn.execute(
|
|
844
|
+
"DELETE FROM graph_relations WHERE (source_id = $1 OR target_id = $1) AND tenant_id = $2",
|
|
845
|
+
entity_id,
|
|
846
|
+
tenant_id
|
|
847
|
+
)
|
|
848
|
+
return await conn.execute(
|
|
849
|
+
"DELETE FROM graph_entities WHERE id = $1 AND tenant_id = $2",
|
|
850
|
+
entity_id,
|
|
851
|
+
tenant_id
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
if self._transaction_conn:
|
|
855
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
856
|
+
result = await _execute(self._transaction_conn)
|
|
857
|
+
else:
|
|
858
|
+
pool = self._ensure_pool()
|
|
859
|
+
async with pool.acquire() as conn:
|
|
860
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
861
|
+
if self.enable_rls:
|
|
862
|
+
async with conn.transaction():
|
|
863
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
864
|
+
result = await _execute(conn)
|
|
865
|
+
else:
|
|
866
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
867
|
+
result = await _execute(conn)
|
|
868
|
+
|
|
869
|
+
if result == "DELETE 0":
|
|
870
|
+
raise ValueError(f"Entity with ID '{entity_id}' not found")
|
|
871
|
+
|
|
872
|
+
async def add_relation(self, relation: Relation, context: Optional[TenantContext] = None) -> None:
|
|
873
|
+
"""
|
|
874
|
+
Add relation to PostgreSQL database
|
|
875
|
+
|
|
876
|
+
Args:
|
|
877
|
+
relation: Relation to add
|
|
878
|
+
context: Optional tenant context for multi-tenant isolation
|
|
879
|
+
|
|
880
|
+
Raises:
|
|
881
|
+
CrossTenantRelationError: If source and target entities belong to different tenants
|
|
882
|
+
"""
|
|
883
|
+
if not self._is_initialized:
|
|
884
|
+
raise RuntimeError("GraphStore not initialized")
|
|
885
|
+
|
|
886
|
+
tenant_id = self._get_tenant_id(context)
|
|
887
|
+
|
|
888
|
+
# Check entities exist and enforce same-tenant constraint
|
|
889
|
+
source_entity = await self.get_entity(relation.source_id, context=context)
|
|
890
|
+
target_entity = await self.get_entity(relation.target_id, context=context)
|
|
891
|
+
|
|
892
|
+
if not source_entity:
|
|
893
|
+
raise ValueError(f"Source entity '{relation.source_id}' does not exist")
|
|
894
|
+
if not target_entity:
|
|
895
|
+
raise ValueError(f"Target entity '{relation.target_id}' does not exist")
|
|
896
|
+
|
|
897
|
+
# Enforce same-tenant constraint (skip for global namespace which has empty tenant_id)
|
|
898
|
+
if tenant_id:
|
|
899
|
+
if source_entity.tenant_id != target_entity.tenant_id:
|
|
900
|
+
raise CrossTenantRelationError(source_entity.tenant_id, target_entity.tenant_id)
|
|
901
|
+
|
|
902
|
+
# Set tenant_id on relation
|
|
903
|
+
if relation.tenant_id is None:
|
|
904
|
+
relation.tenant_id = tenant_id
|
|
905
|
+
|
|
906
|
+
properties_json = json.dumps(relation.properties)
|
|
907
|
+
|
|
908
|
+
async def _execute(conn: asyncpg.Connection):
|
|
909
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA and tenant_id:
|
|
910
|
+
await self._ensure_tenant_schema(conn, tenant_id)
|
|
911
|
+
|
|
912
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
913
|
+
await conn.execute(
|
|
914
|
+
"""
|
|
915
|
+
INSERT INTO graph_relations (id, relation_type, source_id, target_id, properties, weight)
|
|
916
|
+
VALUES ($1, $2, $3, $4, $5::jsonb, $6)
|
|
917
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
918
|
+
relation_type = EXCLUDED.relation_type,
|
|
919
|
+
source_id = EXCLUDED.source_id,
|
|
920
|
+
target_id = EXCLUDED.target_id,
|
|
921
|
+
properties = EXCLUDED.properties,
|
|
922
|
+
weight = EXCLUDED.weight,
|
|
923
|
+
updated_at = CURRENT_TIMESTAMP
|
|
924
|
+
""",
|
|
925
|
+
relation.id,
|
|
926
|
+
relation.relation_type,
|
|
927
|
+
relation.source_id,
|
|
928
|
+
relation.target_id,
|
|
929
|
+
properties_json,
|
|
930
|
+
relation.weight,
|
|
931
|
+
)
|
|
932
|
+
else:
|
|
933
|
+
await conn.execute(
|
|
934
|
+
"""
|
|
935
|
+
INSERT INTO graph_relations (id, tenant_id, relation_type, source_id, target_id, properties, weight)
|
|
936
|
+
VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7)
|
|
937
|
+
ON CONFLICT (id, tenant_id) DO UPDATE SET
|
|
938
|
+
relation_type = EXCLUDED.relation_type,
|
|
939
|
+
source_id = EXCLUDED.source_id,
|
|
940
|
+
target_id = EXCLUDED.target_id,
|
|
941
|
+
properties = EXCLUDED.properties,
|
|
942
|
+
weight = EXCLUDED.weight,
|
|
943
|
+
updated_at = CURRENT_TIMESTAMP
|
|
944
|
+
""",
|
|
945
|
+
relation.id,
|
|
946
|
+
tenant_id,
|
|
947
|
+
relation.relation_type,
|
|
948
|
+
relation.source_id,
|
|
949
|
+
relation.target_id,
|
|
950
|
+
properties_json,
|
|
951
|
+
relation.weight,
|
|
952
|
+
)
|
|
953
|
+
|
|
954
|
+
if self._transaction_conn:
|
|
955
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
956
|
+
await _execute(self._transaction_conn)
|
|
957
|
+
else:
|
|
958
|
+
pool = self._ensure_pool()
|
|
959
|
+
async with pool.acquire() as conn:
|
|
960
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
961
|
+
if self.enable_rls:
|
|
962
|
+
async with conn.transaction():
|
|
963
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
964
|
+
await _execute(conn)
|
|
965
|
+
else:
|
|
966
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
967
|
+
await _execute(conn)
|
|
968
|
+
|
|
969
|
+
async def get_relation(self, relation_id: str, context: Optional[TenantContext] = None) -> Optional[Relation]:
|
|
970
|
+
"""
|
|
971
|
+
Get relation from PostgreSQL database
|
|
972
|
+
|
|
973
|
+
Args:
|
|
974
|
+
relation_id: Relation ID to retrieve
|
|
975
|
+
context: Optional tenant context for multi-tenant isolation
|
|
976
|
+
"""
|
|
977
|
+
if not self._is_initialized:
|
|
978
|
+
raise RuntimeError("GraphStore not initialized")
|
|
979
|
+
|
|
980
|
+
tenant_id = self._get_tenant_id(context)
|
|
981
|
+
|
|
982
|
+
async def _fetch(conn: asyncpg.Connection):
|
|
983
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
984
|
+
return await conn.fetchrow(
|
|
985
|
+
"""
|
|
986
|
+
SELECT id, relation_type, source_id, target_id, properties, weight
|
|
987
|
+
FROM graph_relations
|
|
988
|
+
WHERE id = $1
|
|
989
|
+
""",
|
|
990
|
+
relation_id,
|
|
991
|
+
)
|
|
992
|
+
elif self.enable_rls:
|
|
993
|
+
return await conn.fetchrow(
|
|
994
|
+
"""
|
|
995
|
+
SELECT id, tenant_id, relation_type, source_id, target_id, properties, weight
|
|
996
|
+
FROM graph_relations
|
|
997
|
+
WHERE id = $1
|
|
998
|
+
""",
|
|
999
|
+
relation_id,
|
|
1000
|
+
)
|
|
1001
|
+
else:
|
|
1002
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
1003
|
+
return await conn.fetchrow(
|
|
1004
|
+
"""
|
|
1005
|
+
SELECT id, tenant_id, relation_type, source_id, target_id, properties, weight
|
|
1006
|
+
FROM graph_relations
|
|
1007
|
+
WHERE id = $1 AND tenant_id = $2
|
|
1008
|
+
""",
|
|
1009
|
+
relation_id,
|
|
1010
|
+
tenant_id,
|
|
1011
|
+
)
|
|
1012
|
+
|
|
1013
|
+
if self._transaction_conn:
|
|
1014
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
1015
|
+
row = await _fetch(self._transaction_conn)
|
|
1016
|
+
else:
|
|
1017
|
+
pool = self._ensure_pool()
|
|
1018
|
+
async with pool.acquire() as conn:
|
|
1019
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
1020
|
+
if self.enable_rls:
|
|
1021
|
+
async with conn.transaction():
|
|
1022
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1023
|
+
row = await _fetch(conn)
|
|
1024
|
+
else:
|
|
1025
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1026
|
+
row = await _fetch(conn)
|
|
1027
|
+
|
|
1028
|
+
if not row:
|
|
1029
|
+
return None
|
|
1030
|
+
|
|
1031
|
+
properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
|
|
1032
|
+
row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
|
|
1033
|
+
|
|
1034
|
+
return Relation(
|
|
1035
|
+
id=row["id"],
|
|
1036
|
+
relation_type=row["relation_type"],
|
|
1037
|
+
source_id=row["source_id"],
|
|
1038
|
+
target_id=row["target_id"],
|
|
1039
|
+
properties=properties,
|
|
1040
|
+
weight=float(row["weight"]) if row["weight"] else 1.0,
|
|
1041
|
+
tenant_id=row_tenant_id,
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
async def delete_relation(self, relation_id: str, context: Optional[TenantContext] = None) -> None:
|
|
1045
|
+
"""
|
|
1046
|
+
Delete relation from PostgreSQL database
|
|
1047
|
+
|
|
1048
|
+
Args:
|
|
1049
|
+
relation_id: Relation ID to delete
|
|
1050
|
+
context: Optional tenant context for multi-tenant isolation
|
|
1051
|
+
"""
|
|
1052
|
+
if not self._is_initialized:
|
|
1053
|
+
raise RuntimeError("GraphStore not initialized")
|
|
1054
|
+
|
|
1055
|
+
tenant_id = self._get_tenant_id(context)
|
|
1056
|
+
|
|
1057
|
+
async def _execute(conn: asyncpg.Connection):
|
|
1058
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls:
|
|
1059
|
+
return await conn.execute("DELETE FROM graph_relations WHERE id = $1", relation_id)
|
|
1060
|
+
else:
|
|
1061
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
1062
|
+
return await conn.execute(
|
|
1063
|
+
"DELETE FROM graph_relations WHERE id = $1 AND tenant_id = $2",
|
|
1064
|
+
relation_id,
|
|
1065
|
+
tenant_id
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1068
|
+
if self._transaction_conn:
|
|
1069
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
1070
|
+
result = await _execute(self._transaction_conn)
|
|
1071
|
+
else:
|
|
1072
|
+
pool = self._ensure_pool()
|
|
1073
|
+
async with pool.acquire() as conn:
|
|
1074
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
1075
|
+
if self.enable_rls:
|
|
1076
|
+
async with conn.transaction():
|
|
1077
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1078
|
+
result = await _execute(conn)
|
|
1079
|
+
else:
|
|
1080
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1081
|
+
result = await _execute(conn)
|
|
1082
|
+
|
|
1083
|
+
if result == "DELETE 0":
|
|
1084
|
+
raise ValueError(f"Relation with ID '{relation_id}' not found")
|
|
1085
|
+
|
|
1086
|
+
async def get_neighbors(
|
|
1087
|
+
self,
|
|
1088
|
+
entity_id: str,
|
|
1089
|
+
relation_type: Optional[str] = None,
|
|
1090
|
+
direction: str = "outgoing",
|
|
1091
|
+
context: Optional[TenantContext] = None,
|
|
1092
|
+
) -> List[Entity]:
|
|
1093
|
+
"""
|
|
1094
|
+
Get neighboring entities (optimized with SQL)
|
|
1095
|
+
|
|
1096
|
+
Args:
|
|
1097
|
+
entity_id: ID of entity to get neighbors for
|
|
1098
|
+
relation_type: Optional filter by relation type
|
|
1099
|
+
direction: "outgoing", "incoming", or "both"
|
|
1100
|
+
context: Optional tenant context for multi-tenant isolation
|
|
1101
|
+
"""
|
|
1102
|
+
if not self._is_initialized:
|
|
1103
|
+
raise RuntimeError("GraphStore not initialized")
|
|
1104
|
+
|
|
1105
|
+
tenant_id = self._get_tenant_id(context)
|
|
1106
|
+
|
|
1107
|
+
async def _fetch(conn: asyncpg.Connection):
|
|
1108
|
+
# For SEPARATE_SCHEMA or RLS, the context handles filtering
|
|
1109
|
+
use_tenant_filter = not (self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls)
|
|
1110
|
+
|
|
1111
|
+
# Build query based on direction
|
|
1112
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
1113
|
+
# No tenant_id column in SEPARATE_SCHEMA
|
|
1114
|
+
if direction == "outgoing":
|
|
1115
|
+
query = """
|
|
1116
|
+
SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
|
|
1117
|
+
FROM graph_entities e
|
|
1118
|
+
JOIN graph_relations r ON e.id = r.target_id
|
|
1119
|
+
WHERE r.source_id = $1
|
|
1120
|
+
"""
|
|
1121
|
+
elif direction == "incoming":
|
|
1122
|
+
query = """
|
|
1123
|
+
SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
|
|
1124
|
+
FROM graph_entities e
|
|
1125
|
+
JOIN graph_relations r ON e.id = r.source_id
|
|
1126
|
+
WHERE r.target_id = $1
|
|
1127
|
+
"""
|
|
1128
|
+
else: # both
|
|
1129
|
+
query = """
|
|
1130
|
+
SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
|
|
1131
|
+
FROM graph_entities e
|
|
1132
|
+
WHERE e.id IN (
|
|
1133
|
+
SELECT target_id FROM graph_relations WHERE source_id = $1
|
|
1134
|
+
UNION
|
|
1135
|
+
SELECT source_id FROM graph_relations WHERE target_id = $1
|
|
1136
|
+
)
|
|
1137
|
+
"""
|
|
1138
|
+
params: List[Any] = [entity_id]
|
|
1139
|
+
if relation_type:
|
|
1140
|
+
if direction == "both":
|
|
1141
|
+
query = query.replace(
|
|
1142
|
+
"SELECT target_id FROM graph_relations WHERE source_id = $1",
|
|
1143
|
+
"SELECT target_id FROM graph_relations WHERE source_id = $1 AND relation_type = $2",
|
|
1144
|
+
)
|
|
1145
|
+
query = query.replace(
|
|
1146
|
+
"SELECT source_id FROM graph_relations WHERE target_id = $1",
|
|
1147
|
+
"SELECT source_id FROM graph_relations WHERE target_id = $1 AND relation_type = $2",
|
|
1148
|
+
)
|
|
1149
|
+
else:
|
|
1150
|
+
query += " AND r.relation_type = $2"
|
|
1151
|
+
params.append(relation_type)
|
|
1152
|
+
else:
|
|
1153
|
+
# SHARED_SCHEMA with tenant_id column (tenant_id is always a string, '' for global)
|
|
1154
|
+
tenant_filter = ""
|
|
1155
|
+
if use_tenant_filter:
|
|
1156
|
+
tenant_filter = "AND e.tenant_id = $2 AND r.tenant_id = $2"
|
|
1157
|
+
|
|
1158
|
+
if direction == "outgoing":
|
|
1159
|
+
query = f"""
|
|
1160
|
+
SELECT DISTINCT e.id, e.tenant_id, e.entity_type, e.properties, e.embedding
|
|
1161
|
+
FROM graph_entities e
|
|
1162
|
+
JOIN graph_relations r ON e.id = r.target_id
|
|
1163
|
+
WHERE r.source_id = $1 {tenant_filter}
|
|
1164
|
+
"""
|
|
1165
|
+
elif direction == "incoming":
|
|
1166
|
+
query = f"""
|
|
1167
|
+
SELECT DISTINCT e.id, e.tenant_id, e.entity_type, e.properties, e.embedding
|
|
1168
|
+
FROM graph_entities e
|
|
1169
|
+
JOIN graph_relations r ON e.id = r.source_id
|
|
1170
|
+
WHERE r.target_id = $1 {tenant_filter}
|
|
1171
|
+
"""
|
|
1172
|
+
else: # both
|
|
1173
|
+
inner_filter = "AND tenant_id = $2" if use_tenant_filter else ""
|
|
1174
|
+
query = f"""
|
|
1175
|
+
SELECT DISTINCT e.id, e.tenant_id, e.entity_type, e.properties, e.embedding
|
|
1176
|
+
FROM graph_entities e
|
|
1177
|
+
WHERE e.id IN (
|
|
1178
|
+
SELECT target_id FROM graph_relations WHERE source_id = $1 {inner_filter}
|
|
1179
|
+
UNION
|
|
1180
|
+
SELECT source_id FROM graph_relations WHERE target_id = $1 {inner_filter}
|
|
1181
|
+
)
|
|
1182
|
+
"""
|
|
1183
|
+
if use_tenant_filter:
|
|
1184
|
+
query += " AND e.tenant_id = $2"
|
|
1185
|
+
|
|
1186
|
+
params = [entity_id]
|
|
1187
|
+
if use_tenant_filter:
|
|
1188
|
+
params.append(tenant_id)
|
|
1189
|
+
|
|
1190
|
+
if relation_type:
|
|
1191
|
+
param_idx = len(params) + 1
|
|
1192
|
+
if direction == "both":
|
|
1193
|
+
query = query.replace(
|
|
1194
|
+
"SELECT target_id FROM graph_relations WHERE source_id = $1",
|
|
1195
|
+
f"SELECT target_id FROM graph_relations WHERE source_id = $1 AND relation_type = ${param_idx}",
|
|
1196
|
+
)
|
|
1197
|
+
query = query.replace(
|
|
1198
|
+
"SELECT source_id FROM graph_relations WHERE target_id = $1",
|
|
1199
|
+
f"SELECT source_id FROM graph_relations WHERE target_id = $1 AND relation_type = ${param_idx}",
|
|
1200
|
+
)
|
|
1201
|
+
else:
|
|
1202
|
+
query += f" AND r.relation_type = ${param_idx}"
|
|
1203
|
+
params.append(relation_type)
|
|
1204
|
+
|
|
1205
|
+
return await conn.fetch(query, *params)
|
|
1206
|
+
|
|
1207
|
+
if self._transaction_conn:
|
|
1208
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
1209
|
+
rows = await _fetch(self._transaction_conn)
|
|
1210
|
+
else:
|
|
1211
|
+
pool = self._ensure_pool()
|
|
1212
|
+
async with pool.acquire() as conn:
|
|
1213
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
1214
|
+
if self.enable_rls:
|
|
1215
|
+
async with conn.transaction():
|
|
1216
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1217
|
+
rows = await _fetch(conn)
|
|
1218
|
+
else:
|
|
1219
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1220
|
+
rows = await _fetch(conn)
|
|
1221
|
+
|
|
1222
|
+
entities = []
|
|
1223
|
+
for row in rows:
|
|
1224
|
+
properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
|
|
1225
|
+
embedding_raw = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
|
|
1226
|
+
embedding: Optional[List[float]] = cast(List[float], embedding_raw.tolist()) if embedding_raw is not None else None
|
|
1227
|
+
row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
|
|
1228
|
+
entities.append(
|
|
1229
|
+
Entity(
|
|
1230
|
+
id=row["id"],
|
|
1231
|
+
entity_type=row["entity_type"],
|
|
1232
|
+
properties=properties,
|
|
1233
|
+
embedding=embedding,
|
|
1234
|
+
tenant_id=row_tenant_id,
|
|
1235
|
+
)
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
return entities
|
|
1239
|
+
|
|
1240
|
+
async def get_all_entities(
|
|
1241
|
+
self,
|
|
1242
|
+
entity_type: Optional[str] = None,
|
|
1243
|
+
limit: Optional[int] = None,
|
|
1244
|
+
context: Optional[TenantContext] = None,
|
|
1245
|
+
) -> List[Entity]:
|
|
1246
|
+
"""
|
|
1247
|
+
Get all entities, optionally filtered by type
|
|
1248
|
+
|
|
1249
|
+
Args:
|
|
1250
|
+
entity_type: Optional filter by entity type
|
|
1251
|
+
limit: Optional limit on number of entities
|
|
1252
|
+
context: Optional tenant context for multi-tenant isolation
|
|
1253
|
+
"""
|
|
1254
|
+
if not self._is_initialized:
|
|
1255
|
+
raise RuntimeError("GraphStore not initialized")
|
|
1256
|
+
|
|
1257
|
+
tenant_id = self._get_tenant_id(context)
|
|
1258
|
+
logger.debug(f"get_all_entities called with tenant_id='{tenant_id}', enable_rls={self.enable_rls}, isolation_mode={self.isolation_mode}")
|
|
1259
|
+
|
|
1260
|
+
async def _fetch(conn: asyncpg.Connection):
|
|
1261
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
1262
|
+
query = "SELECT id, entity_type, properties, embedding FROM graph_entities"
|
|
1263
|
+
params: List[Any] = []
|
|
1264
|
+
|
|
1265
|
+
if entity_type:
|
|
1266
|
+
query += " WHERE entity_type = $1"
|
|
1267
|
+
params.append(entity_type)
|
|
1268
|
+
|
|
1269
|
+
if limit:
|
|
1270
|
+
query += f" LIMIT ${len(params) + 1}"
|
|
1271
|
+
params.append(limit)
|
|
1272
|
+
elif self.enable_rls:
|
|
1273
|
+
query = "SELECT id, tenant_id, entity_type, properties, embedding FROM graph_entities"
|
|
1274
|
+
params = []
|
|
1275
|
+
|
|
1276
|
+
if entity_type:
|
|
1277
|
+
query += " WHERE entity_type = $1"
|
|
1278
|
+
params.append(entity_type)
|
|
1279
|
+
|
|
1280
|
+
if limit:
|
|
1281
|
+
query += f" LIMIT ${len(params) + 1}"
|
|
1282
|
+
params.append(limit)
|
|
1283
|
+
else:
|
|
1284
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
1285
|
+
query = "SELECT id, tenant_id, entity_type, properties, embedding FROM graph_entities WHERE tenant_id = $1"
|
|
1286
|
+
params = [tenant_id]
|
|
1287
|
+
|
|
1288
|
+
if entity_type:
|
|
1289
|
+
query += f" AND entity_type = ${len(params) + 1}"
|
|
1290
|
+
params.append(entity_type)
|
|
1291
|
+
|
|
1292
|
+
if limit:
|
|
1293
|
+
query += f" LIMIT ${len(params) + 1}"
|
|
1294
|
+
params.append(limit)
|
|
1295
|
+
|
|
1296
|
+
return await conn.fetch(query, *params)
|
|
1297
|
+
|
|
1298
|
+
if self._transaction_conn:
|
|
1299
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
1300
|
+
rows = await _fetch(self._transaction_conn)
|
|
1301
|
+
else:
|
|
1302
|
+
pool = self._ensure_pool()
|
|
1303
|
+
async with pool.acquire() as conn:
|
|
1304
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
1305
|
+
if self.enable_rls:
|
|
1306
|
+
async with conn.transaction():
|
|
1307
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1308
|
+
rows = await _fetch(conn)
|
|
1309
|
+
else:
|
|
1310
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1311
|
+
rows = await _fetch(conn)
|
|
1312
|
+
|
|
1313
|
+
logger.debug(f"get_all_entities query returned {len(rows)} rows")
|
|
1314
|
+
entities = []
|
|
1315
|
+
for row in rows:
|
|
1316
|
+
properties = json.loads(row["properties"]) if isinstance(row["properties"], str) else row["properties"]
|
|
1317
|
+
embedding_raw = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
|
|
1318
|
+
embedding: Optional[List[float]] = cast(List[float], embedding_raw.tolist()) if embedding_raw is not None else None
|
|
1319
|
+
row_tenant_id = row.get("tenant_id") if "tenant_id" in row.keys() else tenant_id
|
|
1320
|
+
logger.debug(f"Retrieved entity id='{row['id']}', tenant_id='{row_tenant_id}'")
|
|
1321
|
+
entities.append(
|
|
1322
|
+
Entity(
|
|
1323
|
+
id=row["id"],
|
|
1324
|
+
entity_type=row["entity_type"],
|
|
1325
|
+
properties=properties,
|
|
1326
|
+
embedding=embedding,
|
|
1327
|
+
tenant_id=row_tenant_id,
|
|
1328
|
+
)
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
logger.debug(f"get_all_entities returning {len(entities)} entities for requested tenant_id='{tenant_id}'")
|
|
1332
|
+
return entities
|
|
1333
|
+
|
|
1334
|
+
async def get_stats(self, context: Optional[TenantContext] = None) -> Dict[str, Any]:
|
|
1335
|
+
"""
|
|
1336
|
+
Get graph statistics
|
|
1337
|
+
|
|
1338
|
+
Args:
|
|
1339
|
+
context: Optional tenant context for tenant-scoped stats
|
|
1340
|
+
"""
|
|
1341
|
+
if not self._is_initialized:
|
|
1342
|
+
raise RuntimeError("GraphStore not initialized")
|
|
1343
|
+
|
|
1344
|
+
tenant_id = self._get_tenant_id(context)
|
|
1345
|
+
|
|
1346
|
+
async def _fetch(conn: asyncpg.Connection):
|
|
1347
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA or self.enable_rls:
|
|
1348
|
+
entity_count = await conn.fetchval("SELECT COUNT(*) FROM graph_entities")
|
|
1349
|
+
relation_count = await conn.fetchval("SELECT COUNT(*) FROM graph_relations")
|
|
1350
|
+
entity_types = await conn.fetch("SELECT entity_type, COUNT(*) as count FROM graph_entities GROUP BY entity_type")
|
|
1351
|
+
relation_types = await conn.fetch("SELECT relation_type, COUNT(*) as count FROM graph_relations GROUP BY relation_type")
|
|
1352
|
+
else:
|
|
1353
|
+
# Manual tenant filtering (tenant_id is always a string, '' for global)
|
|
1354
|
+
entity_count = await conn.fetchval("SELECT COUNT(*) FROM graph_entities WHERE tenant_id = $1", tenant_id)
|
|
1355
|
+
relation_count = await conn.fetchval("SELECT COUNT(*) FROM graph_relations WHERE tenant_id = $1", tenant_id)
|
|
1356
|
+
entity_types = await conn.fetch("SELECT entity_type, COUNT(*) as count FROM graph_entities WHERE tenant_id = $1 GROUP BY entity_type", tenant_id)
|
|
1357
|
+
relation_types = await conn.fetch("SELECT relation_type, COUNT(*) as count FROM graph_relations WHERE tenant_id = $1 GROUP BY relation_type", tenant_id)
|
|
1358
|
+
|
|
1359
|
+
return entity_count, relation_count, entity_types, relation_types
|
|
1360
|
+
|
|
1361
|
+
if self._transaction_conn:
|
|
1362
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
1363
|
+
entity_count, relation_count, entity_types, relation_types = await _fetch(self._transaction_conn)
|
|
1364
|
+
else:
|
|
1365
|
+
pool = self._ensure_pool()
|
|
1366
|
+
async with pool.acquire() as conn:
|
|
1367
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
1368
|
+
if self.enable_rls:
|
|
1369
|
+
async with conn.transaction():
|
|
1370
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1371
|
+
entity_count, relation_count, entity_types, relation_types = await _fetch(conn)
|
|
1372
|
+
else:
|
|
1373
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1374
|
+
entity_count, relation_count, entity_types, relation_types = await _fetch(conn)
|
|
1375
|
+
|
|
1376
|
+
return {
|
|
1377
|
+
"entity_count": entity_count,
|
|
1378
|
+
"relation_count": relation_count,
|
|
1379
|
+
"entity_types": {row["entity_type"]: row["count"] for row in entity_types},
|
|
1380
|
+
"relation_types": {row["relation_type"]: row["count"] for row in relation_types},
|
|
1381
|
+
"backend": "postgresql",
|
|
1382
|
+
"pool_size": (f"{self.pool.get_size()}/{self.max_pool_size}" if self.pool else "0/0"),
|
|
1383
|
+
"isolation_mode": self.isolation_mode.value,
|
|
1384
|
+
"tenant_id": tenant_id,
|
|
1385
|
+
"enable_rls": self.enable_rls,
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
async def clear(self, context: Optional[TenantContext] = None) -> None:
|
|
1389
|
+
"""
|
|
1390
|
+
Clear data from PostgreSQL database
|
|
1391
|
+
|
|
1392
|
+
Args:
|
|
1393
|
+
context: Optional tenant context for multi-tenant isolation.
|
|
1394
|
+
If provided, clears only data for the specified tenant.
|
|
1395
|
+
If None (no context), clears ALL data across all tenants.
|
|
1396
|
+
"""
|
|
1397
|
+
if not self._is_initialized:
|
|
1398
|
+
raise RuntimeError("GraphStore not initialized")
|
|
1399
|
+
|
|
1400
|
+
# Note: context=None means clear ALL data, not just global namespace
|
|
1401
|
+
clear_all = context is None
|
|
1402
|
+
tenant_id = self._get_tenant_id(context)
|
|
1403
|
+
|
|
1404
|
+
async def _execute(conn: asyncpg.Connection):
|
|
1405
|
+
if clear_all:
|
|
1406
|
+
# Clear all data across all tenants
|
|
1407
|
+
await conn.execute("DELETE FROM graph_relations")
|
|
1408
|
+
await conn.execute("DELETE FROM graph_entities")
|
|
1409
|
+
|
|
1410
|
+
# Drop tenant schemas for SEPARATE_SCHEMA mode
|
|
1411
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
1412
|
+
schemas = await conn.fetch(
|
|
1413
|
+
"SELECT schema_name FROM information_schema.schemata WHERE schema_name LIKE 'tenant_%'"
|
|
1414
|
+
)
|
|
1415
|
+
for row in schemas:
|
|
1416
|
+
await conn.execute(f"DROP SCHEMA IF EXISTS {row['schema_name']} CASCADE")
|
|
1417
|
+
self._initialized_tenant_schemas.clear()
|
|
1418
|
+
else:
|
|
1419
|
+
if self.isolation_mode == TenantIsolationMode.SEPARATE_SCHEMA:
|
|
1420
|
+
# Drop tenant schema
|
|
1421
|
+
schema_name = self._get_schema_name(tenant_id if tenant_id else None)
|
|
1422
|
+
await conn.execute(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")
|
|
1423
|
+
self._initialized_tenant_schemas.discard(tenant_id)
|
|
1424
|
+
elif self.enable_rls:
|
|
1425
|
+
# RLS will filter automatically
|
|
1426
|
+
await conn.execute("DELETE FROM graph_relations")
|
|
1427
|
+
await conn.execute("DELETE FROM graph_entities")
|
|
1428
|
+
else:
|
|
1429
|
+
# Manual tenant filtering (tenant_id is string, '' for global)
|
|
1430
|
+
await conn.execute("DELETE FROM graph_relations WHERE tenant_id = $1", tenant_id)
|
|
1431
|
+
await conn.execute("DELETE FROM graph_entities WHERE tenant_id = $1", tenant_id)
|
|
1432
|
+
|
|
1433
|
+
if self._transaction_conn:
|
|
1434
|
+
await self._set_tenant_context(self._transaction_conn, tenant_id)
|
|
1435
|
+
await _execute(self._transaction_conn)
|
|
1436
|
+
else:
|
|
1437
|
+
pool = self._ensure_pool()
|
|
1438
|
+
async with pool.acquire() as conn:
|
|
1439
|
+
# Wrap in transaction if RLS is enabled (SET LOCAL requires transaction)
|
|
1440
|
+
if self.enable_rls and not clear_all:
|
|
1441
|
+
async with conn.transaction():
|
|
1442
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1443
|
+
await _execute(conn)
|
|
1444
|
+
else:
|
|
1445
|
+
if not clear_all:
|
|
1446
|
+
await self._set_tenant_context(conn, tenant_id)
|
|
1447
|
+
await _execute(conn)
|
|
1448
|
+
|
|
1449
|
+
# =========================================================================
|
|
1450
|
+
# Tier 2: Advanced Interface (PostgreSQL-optimized with recursive CTEs)
|
|
1451
|
+
# =========================================================================
|
|
1452
|
+
|
|
1453
|
+
async def find_paths(
|
|
1454
|
+
self,
|
|
1455
|
+
source_id: str,
|
|
1456
|
+
target_id: str,
|
|
1457
|
+
max_depth: int = 3,
|
|
1458
|
+
limit: Optional[int] = 10,
|
|
1459
|
+
) -> List[Path]:
|
|
1460
|
+
"""
|
|
1461
|
+
Find paths using WITH RECURSIVE CTE (PostgreSQL-optimized)
|
|
1462
|
+
|
|
1463
|
+
This overrides the default implementation with an efficient
|
|
1464
|
+
recursive SQL query.
|
|
1465
|
+
"""
|
|
1466
|
+
if not self._is_initialized:
|
|
1467
|
+
raise RuntimeError("GraphStore not initialized")
|
|
1468
|
+
|
|
1469
|
+
# Recursive CTE to find all paths
|
|
1470
|
+
query = """
|
|
1471
|
+
WITH RECURSIVE paths AS (
|
|
1472
|
+
-- Base case: direct connections
|
|
1473
|
+
SELECT
|
|
1474
|
+
r.source_id,
|
|
1475
|
+
r.target_id,
|
|
1476
|
+
r.relation_type,
|
|
1477
|
+
ARRAY[r.source_id] as path_nodes,
|
|
1478
|
+
ARRAY[r.id] as path_relations,
|
|
1479
|
+
1 as depth
|
|
1480
|
+
FROM graph_relations r
|
|
1481
|
+
WHERE r.source_id = $1
|
|
1482
|
+
|
|
1483
|
+
UNION ALL
|
|
1484
|
+
|
|
1485
|
+
-- Recursive case: extend paths
|
|
1486
|
+
SELECT
|
|
1487
|
+
p.source_id,
|
|
1488
|
+
r.target_id,
|
|
1489
|
+
r.relation_type,
|
|
1490
|
+
p.path_nodes || r.source_id,
|
|
1491
|
+
p.path_relations || r.id,
|
|
1492
|
+
p.depth + 1
|
|
1493
|
+
FROM paths p
|
|
1494
|
+
JOIN graph_relations r ON p.target_id = r.source_id
|
|
1495
|
+
WHERE p.depth < $3
|
|
1496
|
+
AND NOT (r.source_id = ANY(p.path_nodes)) -- Avoid cycles
|
|
1497
|
+
)
|
|
1498
|
+
SELECT DISTINCT
|
|
1499
|
+
path_nodes || target_id as nodes,
|
|
1500
|
+
path_relations as relations,
|
|
1501
|
+
depth
|
|
1502
|
+
FROM paths
|
|
1503
|
+
WHERE target_id = $2
|
|
1504
|
+
ORDER BY depth ASC
|
|
1505
|
+
LIMIT $4
|
|
1506
|
+
"""
|
|
1507
|
+
|
|
1508
|
+
if self._transaction_conn:
|
|
1509
|
+
conn = self._transaction_conn
|
|
1510
|
+
rows = await conn.fetch(query, source_id, target_id, max_depth, limit or 10)
|
|
1511
|
+
else:
|
|
1512
|
+
pool = self._ensure_pool()
|
|
1513
|
+
async with pool.acquire() as conn:
|
|
1514
|
+
rows = await conn.fetch(query, source_id, target_id, max_depth, limit or 10)
|
|
1515
|
+
|
|
1516
|
+
paths = []
|
|
1517
|
+
for row in rows:
|
|
1518
|
+
node_ids = row["nodes"]
|
|
1519
|
+
relation_ids = row["relations"]
|
|
1520
|
+
|
|
1521
|
+
# Fetch entities and relations
|
|
1522
|
+
entities = []
|
|
1523
|
+
for node_id in node_ids:
|
|
1524
|
+
entity = await self.get_entity(node_id)
|
|
1525
|
+
if entity:
|
|
1526
|
+
entities.append(entity)
|
|
1527
|
+
|
|
1528
|
+
relations = []
|
|
1529
|
+
for rel_id in relation_ids:
|
|
1530
|
+
relation = await self.get_relation(rel_id)
|
|
1531
|
+
if relation:
|
|
1532
|
+
relations.append(relation)
|
|
1533
|
+
|
|
1534
|
+
if entities and relations:
|
|
1535
|
+
paths.append(Path(nodes=entities, edges=relations))
|
|
1536
|
+
|
|
1537
|
+
return paths
|
|
1538
|
+
|
|
1539
|
+
# =========================================================================
|
|
1540
|
+
# Helper methods
|
|
1541
|
+
# =========================================================================
|
|
1542
|
+
|
|
1543
|
+
def _serialize_embedding(self, embedding) -> Optional[bytes]:
|
|
1544
|
+
"""Serialize numpy array or list to bytes"""
|
|
1545
|
+
if embedding is None:
|
|
1546
|
+
return None
|
|
1547
|
+
# Handle both numpy array and list
|
|
1548
|
+
if isinstance(embedding, np.ndarray):
|
|
1549
|
+
return embedding.tobytes()
|
|
1550
|
+
elif isinstance(embedding, (list, tuple)):
|
|
1551
|
+
# Convert list to numpy array first
|
|
1552
|
+
arr = np.array(embedding, dtype=np.float32)
|
|
1553
|
+
return arr.tobytes()
|
|
1554
|
+
else:
|
|
1555
|
+
# Try to convert to numpy array
|
|
1556
|
+
arr = np.array(embedding, dtype=np.float32)
|
|
1557
|
+
return arr.tobytes()
|
|
1558
|
+
|
|
1559
|
+
def _deserialize_embedding(self, data: bytes) -> Optional[np.ndarray]:
|
|
1560
|
+
"""Deserialize bytes to numpy array"""
|
|
1561
|
+
if not data:
|
|
1562
|
+
return None
|
|
1563
|
+
return np.frombuffer(data, dtype=np.float32)
|