aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,871 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PostgreSQL Graph Storage Backend
|
|
3
|
+
|
|
4
|
+
Provides production-grade graph storage using PostgreSQL with:
|
|
5
|
+
- Connection pooling via asyncpg
|
|
6
|
+
- Transaction support
|
|
7
|
+
- Recursive CTEs for efficient graph traversal
|
|
8
|
+
- Optional pgvector support for vector similarity search
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import asyncpg
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
from contextlib import asynccontextmanager
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
19
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
20
|
+
from aiecs.domain.knowledge_graph.models.path import Path
|
|
21
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
22
|
+
from aiecs.config.config import get_settings
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# PostgreSQL Schema for graph storage
|
|
28
|
+
SCHEMA_SQL = """
|
|
29
|
+
-- Entities table
|
|
30
|
+
CREATE TABLE IF NOT EXISTS graph_entities (
|
|
31
|
+
id TEXT PRIMARY KEY,
|
|
32
|
+
entity_type TEXT NOT NULL,
|
|
33
|
+
properties JSONB NOT NULL DEFAULT '{}',
|
|
34
|
+
embedding BYTEA,
|
|
35
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
36
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
-- Relations table
|
|
40
|
+
CREATE TABLE IF NOT EXISTS graph_relations (
|
|
41
|
+
id TEXT PRIMARY KEY,
|
|
42
|
+
relation_type TEXT NOT NULL,
|
|
43
|
+
source_id TEXT NOT NULL,
|
|
44
|
+
target_id TEXT NOT NULL,
|
|
45
|
+
properties JSONB NOT NULL DEFAULT '{}',
|
|
46
|
+
weight REAL DEFAULT 1.0,
|
|
47
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
48
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
49
|
+
FOREIGN KEY (source_id) REFERENCES graph_entities(id) ON DELETE CASCADE,
|
|
50
|
+
FOREIGN KEY (target_id) REFERENCES graph_entities(id) ON DELETE CASCADE
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
-- Indexes for performance
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_type ON graph_entities(entity_type);
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_graph_entities_properties ON graph_entities USING GIN(properties);
|
|
56
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_type ON graph_relations(relation_type);
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_source ON graph_relations(source_id);
|
|
58
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_target ON graph_relations(target_id);
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_source_target ON graph_relations(source_id, target_id);
|
|
60
|
+
CREATE INDEX IF NOT EXISTS idx_graph_relations_properties ON graph_relations USING GIN(properties);
|
|
61
|
+
|
|
62
|
+
-- Optional: Add pgvector extension support (if available)
|
|
63
|
+
-- CREATE EXTENSION IF NOT EXISTS vector;
|
|
64
|
+
-- ALTER TABLE graph_entities ADD COLUMN IF NOT EXISTS embedding_vector vector(1536);
|
|
65
|
+
-- CREATE INDEX IF NOT EXISTS idx_graph_entities_embedding ON graph_entities USING ivfflat (embedding_vector vector_cosine_ops);
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class PostgresGraphStore(GraphStore):
|
|
70
|
+
"""
|
|
71
|
+
PostgreSQL-based graph storage implementation
|
|
72
|
+
|
|
73
|
+
Provides production-grade persistent graph storage with:
|
|
74
|
+
- Connection pooling via asyncpg
|
|
75
|
+
- ACID transactions
|
|
76
|
+
- SQL-optimized queries with recursive CTEs
|
|
77
|
+
- JSONB for flexible property storage
|
|
78
|
+
- Optional pgvector for vector similarity search
|
|
79
|
+
|
|
80
|
+
Features:
|
|
81
|
+
- Production-ready with connection pooling
|
|
82
|
+
- Efficient graph traversal using WITH RECURSIVE
|
|
83
|
+
- Automatic schema initialization
|
|
84
|
+
- Transaction support
|
|
85
|
+
- JSONB indexing for fast property queries
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
```python
|
|
89
|
+
from aiecs.infrastructure.graph_storage import PostgresGraphStore
|
|
90
|
+
|
|
91
|
+
# Using config from settings
|
|
92
|
+
store = PostgresGraphStore()
|
|
93
|
+
await store.initialize()
|
|
94
|
+
|
|
95
|
+
# Or with custom config
|
|
96
|
+
store = PostgresGraphStore(
|
|
97
|
+
host="localhost",
|
|
98
|
+
port=5432,
|
|
99
|
+
user="postgres",
|
|
100
|
+
password="password",
|
|
101
|
+
database="knowledge_graph"
|
|
102
|
+
)
|
|
103
|
+
await store.initialize()
|
|
104
|
+
|
|
105
|
+
entity = Entity(id="e1", entity_type="Person", properties={"name": "Alice"})
|
|
106
|
+
await store.add_entity(entity)
|
|
107
|
+
|
|
108
|
+
await store.close()
|
|
109
|
+
```
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
host: Optional[str] = None,
|
|
115
|
+
port: Optional[int] = None,
|
|
116
|
+
user: Optional[str] = None,
|
|
117
|
+
password: Optional[str] = None,
|
|
118
|
+
database: Optional[str] = None,
|
|
119
|
+
min_pool_size: int = 5,
|
|
120
|
+
max_pool_size: int = 20,
|
|
121
|
+
enable_pgvector: bool = False,
|
|
122
|
+
pool: Optional[asyncpg.Pool] = None,
|
|
123
|
+
database_manager: Optional[Any] = None,
|
|
124
|
+
**kwargs,
|
|
125
|
+
):
|
|
126
|
+
"""
|
|
127
|
+
Initialize PostgreSQL graph store
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
host: PostgreSQL host (defaults from config)
|
|
131
|
+
port: PostgreSQL port (defaults from config)
|
|
132
|
+
user: PostgreSQL user (defaults from config)
|
|
133
|
+
password: PostgreSQL password (defaults from config)
|
|
134
|
+
database: Database name (defaults from config)
|
|
135
|
+
min_pool_size: Minimum connection pool size
|
|
136
|
+
max_pool_size: Maximum connection pool size
|
|
137
|
+
enable_pgvector: Enable pgvector extension for vector search
|
|
138
|
+
pool: Optional existing asyncpg pool to reuse (from DatabaseManager)
|
|
139
|
+
database_manager: Optional DatabaseManager instance to reuse its pool
|
|
140
|
+
**kwargs: Additional asyncpg connection parameters
|
|
141
|
+
"""
|
|
142
|
+
super().__init__()
|
|
143
|
+
|
|
144
|
+
# Option 1: Reuse existing pool
|
|
145
|
+
self._external_pool = pool
|
|
146
|
+
self._owns_pool = pool is None and database_manager is None
|
|
147
|
+
|
|
148
|
+
# Option 2: Reuse DatabaseManager's pool
|
|
149
|
+
if database_manager is not None:
|
|
150
|
+
self._external_pool = getattr(database_manager, "connection_pool", None)
|
|
151
|
+
if self._external_pool:
|
|
152
|
+
logger.info("Reusing DatabaseManager's connection pool")
|
|
153
|
+
self._owns_pool = False
|
|
154
|
+
|
|
155
|
+
# Load config from settings if not provided (needed for own pool creation)
|
|
156
|
+
# Support both connection string (dsn) and individual parameters
|
|
157
|
+
self.dsn = None
|
|
158
|
+
if not all([host, port, user, password, database]):
|
|
159
|
+
settings = get_settings()
|
|
160
|
+
db_config = settings.database_config
|
|
161
|
+
|
|
162
|
+
# Check if connection string (dsn) is provided (for cloud
|
|
163
|
+
# databases)
|
|
164
|
+
if "dsn" in db_config:
|
|
165
|
+
self.dsn = db_config["dsn"]
|
|
166
|
+
# Still set defaults for logging/display purposes
|
|
167
|
+
host = host or "cloud"
|
|
168
|
+
port = port or 5432
|
|
169
|
+
user = user or "postgres"
|
|
170
|
+
password = password or ""
|
|
171
|
+
database = database or "aiecs"
|
|
172
|
+
else:
|
|
173
|
+
# Use individual parameters (for local databases)
|
|
174
|
+
host = host or db_config.get("host", "localhost")
|
|
175
|
+
port = port or db_config.get("port", 5432)
|
|
176
|
+
user = user or db_config.get("user", "postgres")
|
|
177
|
+
password = password or db_config.get("password", "")
|
|
178
|
+
database = database or db_config.get("database", "aiecs")
|
|
179
|
+
|
|
180
|
+
self.host = host
|
|
181
|
+
self.port = port
|
|
182
|
+
self.user = user
|
|
183
|
+
self.password = password
|
|
184
|
+
self.database = database
|
|
185
|
+
self.min_pool_size = min_pool_size
|
|
186
|
+
self.max_pool_size = max_pool_size
|
|
187
|
+
self.enable_pgvector = enable_pgvector
|
|
188
|
+
self.conn_kwargs = kwargs
|
|
189
|
+
|
|
190
|
+
self.pool: Optional[asyncpg.Pool] = self._external_pool
|
|
191
|
+
self._is_initialized = False
|
|
192
|
+
self._transaction_conn: Optional[asyncpg.Connection] = None
|
|
193
|
+
|
|
194
|
+
async def initialize(self):
|
|
195
|
+
"""Initialize PostgreSQL connection pool and create schema"""
|
|
196
|
+
try:
|
|
197
|
+
# Create connection pool only if we don't have an external one
|
|
198
|
+
if self._owns_pool:
|
|
199
|
+
# Use connection string (dsn) if available (for cloud databases)
|
|
200
|
+
# Otherwise use individual parameters (for local databases)
|
|
201
|
+
if self.dsn:
|
|
202
|
+
self.pool = await asyncpg.create_pool(
|
|
203
|
+
dsn=self.dsn,
|
|
204
|
+
min_size=self.min_pool_size,
|
|
205
|
+
max_size=self.max_pool_size,
|
|
206
|
+
**self.conn_kwargs,
|
|
207
|
+
)
|
|
208
|
+
logger.info(
|
|
209
|
+
"PostgreSQL connection pool created using connection string (cloud/local)"
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
self.pool = await asyncpg.create_pool(
|
|
213
|
+
host=self.host,
|
|
214
|
+
port=self.port,
|
|
215
|
+
user=self.user,
|
|
216
|
+
password=self.password,
|
|
217
|
+
database=self.database,
|
|
218
|
+
min_size=self.min_pool_size,
|
|
219
|
+
max_size=self.max_pool_size,
|
|
220
|
+
**self.conn_kwargs,
|
|
221
|
+
)
|
|
222
|
+
logger.info(
|
|
223
|
+
f"PostgreSQL connection pool created: {self.host}:{self.port}/{self.database}"
|
|
224
|
+
)
|
|
225
|
+
else:
|
|
226
|
+
logger.info(
|
|
227
|
+
"Using external PostgreSQL connection pool (shared with AIECS DatabaseManager)"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
# Create schema
|
|
231
|
+
async with self.pool.acquire() as conn:
|
|
232
|
+
# Optionally enable pgvector first
|
|
233
|
+
if self.enable_pgvector:
|
|
234
|
+
try:
|
|
235
|
+
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
236
|
+
logger.info("pgvector extension enabled")
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logger.warning(
|
|
239
|
+
f"Failed to enable pgvector: {e}. Continuing without vector support."
|
|
240
|
+
)
|
|
241
|
+
self.enable_pgvector = False
|
|
242
|
+
|
|
243
|
+
# Execute schema creation
|
|
244
|
+
await conn.execute(SCHEMA_SQL)
|
|
245
|
+
|
|
246
|
+
# Add vector column if pgvector is enabled
|
|
247
|
+
if self.enable_pgvector:
|
|
248
|
+
try:
|
|
249
|
+
# Check if vector column exists
|
|
250
|
+
column_exists = await conn.fetchval(
|
|
251
|
+
"""
|
|
252
|
+
SELECT EXISTS (
|
|
253
|
+
SELECT 1 FROM information_schema.columns
|
|
254
|
+
WHERE table_name = 'graph_entities'
|
|
255
|
+
AND column_name = 'embedding_vector'
|
|
256
|
+
)
|
|
257
|
+
"""
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
if not column_exists:
|
|
261
|
+
# Add vector column (default dimension 1536, can be
|
|
262
|
+
# adjusted)
|
|
263
|
+
await conn.execute(
|
|
264
|
+
"""
|
|
265
|
+
ALTER TABLE graph_entities
|
|
266
|
+
ADD COLUMN embedding_vector vector(1536)
|
|
267
|
+
"""
|
|
268
|
+
)
|
|
269
|
+
logger.info("Added embedding_vector column")
|
|
270
|
+
|
|
271
|
+
# Create index if it doesn't exist
|
|
272
|
+
index_exists = await conn.fetchval(
|
|
273
|
+
"""
|
|
274
|
+
SELECT EXISTS (
|
|
275
|
+
SELECT 1 FROM pg_indexes
|
|
276
|
+
WHERE tablename = 'graph_entities'
|
|
277
|
+
AND indexname = 'idx_graph_entities_embedding'
|
|
278
|
+
)
|
|
279
|
+
"""
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if not index_exists:
|
|
283
|
+
await conn.execute(
|
|
284
|
+
"""
|
|
285
|
+
CREATE INDEX idx_graph_entities_embedding
|
|
286
|
+
ON graph_entities USING ivfflat (embedding_vector vector_cosine_ops)
|
|
287
|
+
WITH (lists = 100)
|
|
288
|
+
"""
|
|
289
|
+
)
|
|
290
|
+
logger.info("Created vector similarity index")
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logger.warning(f"Failed to set up pgvector column/index: {e}")
|
|
293
|
+
|
|
294
|
+
self._is_initialized = True
|
|
295
|
+
logger.info("PostgreSQL graph store initialized successfully")
|
|
296
|
+
|
|
297
|
+
except Exception as e:
|
|
298
|
+
logger.error(f"Failed to initialize PostgreSQL graph store: {e}")
|
|
299
|
+
raise
|
|
300
|
+
|
|
301
|
+
async def close(self):
|
|
302
|
+
"""Close database connection pool (only if we own it)"""
|
|
303
|
+
if self.pool and self._owns_pool:
|
|
304
|
+
await self.pool.close()
|
|
305
|
+
self.pool = None
|
|
306
|
+
logger.info("PostgreSQL connection pool closed")
|
|
307
|
+
elif self.pool and not self._owns_pool:
|
|
308
|
+
logger.info("Detaching from shared PostgreSQL connection pool (not closing)")
|
|
309
|
+
self.pool = None
|
|
310
|
+
self._is_initialized = False
|
|
311
|
+
|
|
312
|
+
@asynccontextmanager
|
|
313
|
+
async def transaction(self):
|
|
314
|
+
"""
|
|
315
|
+
Transaction context manager for atomic operations
|
|
316
|
+
|
|
317
|
+
Usage:
|
|
318
|
+
```python
|
|
319
|
+
async with store.transaction():
|
|
320
|
+
await store.add_entity(entity1)
|
|
321
|
+
await store.add_entity(entity2)
|
|
322
|
+
# Both entities added atomically
|
|
323
|
+
```
|
|
324
|
+
"""
|
|
325
|
+
if not self._is_initialized:
|
|
326
|
+
raise RuntimeError("GraphStore not initialized")
|
|
327
|
+
|
|
328
|
+
async with self.pool.acquire() as conn:
|
|
329
|
+
async with conn.transaction():
|
|
330
|
+
# Store connection for use within transaction
|
|
331
|
+
old_conn = self._transaction_conn
|
|
332
|
+
self._transaction_conn = conn
|
|
333
|
+
try:
|
|
334
|
+
yield conn
|
|
335
|
+
finally:
|
|
336
|
+
self._transaction_conn = old_conn
|
|
337
|
+
|
|
338
|
+
async def _get_connection(self):
|
|
339
|
+
"""Get connection from pool or transaction"""
|
|
340
|
+
if self._transaction_conn:
|
|
341
|
+
return self._transaction_conn
|
|
342
|
+
return self.pool.acquire()
|
|
343
|
+
|
|
344
|
+
# =========================================================================
|
|
345
|
+
# Tier 1: Basic Interface (PostgreSQL-optimized implementations)
|
|
346
|
+
# =========================================================================
|
|
347
|
+
|
|
348
|
+
async def add_entity(self, entity: Entity) -> None:
|
|
349
|
+
"""Add entity to PostgreSQL database"""
|
|
350
|
+
if not self._is_initialized:
|
|
351
|
+
raise RuntimeError("GraphStore not initialized")
|
|
352
|
+
|
|
353
|
+
# Serialize data
|
|
354
|
+
properties_json = json.dumps(entity.properties)
|
|
355
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
|
|
356
|
+
|
|
357
|
+
# Use connection from transaction or pool
|
|
358
|
+
if self._transaction_conn:
|
|
359
|
+
conn = self._transaction_conn
|
|
360
|
+
await conn.execute(
|
|
361
|
+
"""
|
|
362
|
+
INSERT INTO graph_entities (id, entity_type, properties, embedding)
|
|
363
|
+
VALUES ($1, $2, $3::jsonb, $4)
|
|
364
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
365
|
+
entity_type = EXCLUDED.entity_type,
|
|
366
|
+
properties = EXCLUDED.properties,
|
|
367
|
+
embedding = EXCLUDED.embedding,
|
|
368
|
+
updated_at = CURRENT_TIMESTAMP
|
|
369
|
+
""",
|
|
370
|
+
entity.id,
|
|
371
|
+
entity.entity_type,
|
|
372
|
+
properties_json,
|
|
373
|
+
embedding_blob,
|
|
374
|
+
)
|
|
375
|
+
else:
|
|
376
|
+
async with self.pool.acquire() as conn:
|
|
377
|
+
await conn.execute(
|
|
378
|
+
"""
|
|
379
|
+
INSERT INTO graph_entities (id, entity_type, properties, embedding)
|
|
380
|
+
VALUES ($1, $2, $3::jsonb, $4)
|
|
381
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
382
|
+
entity_type = EXCLUDED.entity_type,
|
|
383
|
+
properties = EXCLUDED.properties,
|
|
384
|
+
embedding = EXCLUDED.embedding,
|
|
385
|
+
updated_at = CURRENT_TIMESTAMP
|
|
386
|
+
""",
|
|
387
|
+
entity.id,
|
|
388
|
+
entity.entity_type,
|
|
389
|
+
properties_json,
|
|
390
|
+
embedding_blob,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
async def get_entity(self, entity_id: str) -> Optional[Entity]:
|
|
394
|
+
"""Get entity from PostgreSQL database"""
|
|
395
|
+
if not self._is_initialized:
|
|
396
|
+
raise RuntimeError("GraphStore not initialized")
|
|
397
|
+
|
|
398
|
+
if self._transaction_conn:
|
|
399
|
+
conn = self._transaction_conn
|
|
400
|
+
row = await conn.fetchrow(
|
|
401
|
+
"""
|
|
402
|
+
SELECT id, entity_type, properties, embedding
|
|
403
|
+
FROM graph_entities
|
|
404
|
+
WHERE id = $1
|
|
405
|
+
""",
|
|
406
|
+
entity_id,
|
|
407
|
+
)
|
|
408
|
+
else:
|
|
409
|
+
async with self.pool.acquire() as conn:
|
|
410
|
+
row = await conn.fetchrow(
|
|
411
|
+
"""
|
|
412
|
+
SELECT id, entity_type, properties, embedding
|
|
413
|
+
FROM graph_entities
|
|
414
|
+
WHERE id = $1
|
|
415
|
+
""",
|
|
416
|
+
entity_id,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
if not row:
|
|
420
|
+
return None
|
|
421
|
+
|
|
422
|
+
# Deserialize
|
|
423
|
+
properties = (
|
|
424
|
+
json.loads(row["properties"])
|
|
425
|
+
if isinstance(row["properties"], str)
|
|
426
|
+
else row["properties"]
|
|
427
|
+
)
|
|
428
|
+
embedding = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
|
|
429
|
+
|
|
430
|
+
return Entity(
|
|
431
|
+
id=row["id"],
|
|
432
|
+
entity_type=row["entity_type"],
|
|
433
|
+
properties=properties,
|
|
434
|
+
embedding=embedding,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
async def update_entity(self, entity: Entity) -> None:
|
|
438
|
+
"""Update entity in PostgreSQL database"""
|
|
439
|
+
if not self._is_initialized:
|
|
440
|
+
raise RuntimeError("GraphStore not initialized")
|
|
441
|
+
|
|
442
|
+
properties_json = json.dumps(entity.properties)
|
|
443
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
|
|
444
|
+
|
|
445
|
+
if self._transaction_conn:
|
|
446
|
+
conn = self._transaction_conn
|
|
447
|
+
result = await conn.execute(
|
|
448
|
+
"""
|
|
449
|
+
UPDATE graph_entities
|
|
450
|
+
SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
|
|
451
|
+
WHERE id = $1
|
|
452
|
+
""",
|
|
453
|
+
entity.id,
|
|
454
|
+
entity.entity_type,
|
|
455
|
+
properties_json,
|
|
456
|
+
embedding_blob,
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
async with self.pool.acquire() as conn:
|
|
460
|
+
result = await conn.execute(
|
|
461
|
+
"""
|
|
462
|
+
UPDATE graph_entities
|
|
463
|
+
SET entity_type = $2, properties = $3::jsonb, embedding = $4, updated_at = CURRENT_TIMESTAMP
|
|
464
|
+
WHERE id = $1
|
|
465
|
+
""",
|
|
466
|
+
entity.id,
|
|
467
|
+
entity.entity_type,
|
|
468
|
+
properties_json,
|
|
469
|
+
embedding_blob,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
if result == "UPDATE 0":
|
|
473
|
+
raise ValueError(f"Entity with ID '{entity.id}' not found")
|
|
474
|
+
|
|
475
|
+
async def delete_entity(self, entity_id: str) -> None:
|
|
476
|
+
"""Delete entity from PostgreSQL database"""
|
|
477
|
+
if not self._is_initialized:
|
|
478
|
+
raise RuntimeError("GraphStore not initialized")
|
|
479
|
+
|
|
480
|
+
if self._transaction_conn:
|
|
481
|
+
conn = self._transaction_conn
|
|
482
|
+
result = await conn.execute("DELETE FROM graph_entities WHERE id = $1", entity_id)
|
|
483
|
+
else:
|
|
484
|
+
async with self.pool.acquire() as conn:
|
|
485
|
+
result = await conn.execute("DELETE FROM graph_entities WHERE id = $1", entity_id)
|
|
486
|
+
|
|
487
|
+
if result == "DELETE 0":
|
|
488
|
+
raise ValueError(f"Entity with ID '{entity_id}' not found")
|
|
489
|
+
|
|
490
|
+
async def add_relation(self, relation: Relation) -> None:
|
|
491
|
+
"""Add relation to PostgreSQL database"""
|
|
492
|
+
if not self._is_initialized:
|
|
493
|
+
raise RuntimeError("GraphStore not initialized")
|
|
494
|
+
|
|
495
|
+
properties_json = json.dumps(relation.properties)
|
|
496
|
+
|
|
497
|
+
if self._transaction_conn:
|
|
498
|
+
conn = self._transaction_conn
|
|
499
|
+
await conn.execute(
|
|
500
|
+
"""
|
|
501
|
+
INSERT INTO graph_relations (id, relation_type, source_id, target_id, properties, weight)
|
|
502
|
+
VALUES ($1, $2, $3, $4, $5::jsonb, $6)
|
|
503
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
504
|
+
relation_type = EXCLUDED.relation_type,
|
|
505
|
+
source_id = EXCLUDED.source_id,
|
|
506
|
+
target_id = EXCLUDED.target_id,
|
|
507
|
+
properties = EXCLUDED.properties,
|
|
508
|
+
weight = EXCLUDED.weight,
|
|
509
|
+
updated_at = CURRENT_TIMESTAMP
|
|
510
|
+
""",
|
|
511
|
+
relation.id,
|
|
512
|
+
relation.relation_type,
|
|
513
|
+
relation.source_id,
|
|
514
|
+
relation.target_id,
|
|
515
|
+
properties_json,
|
|
516
|
+
relation.weight,
|
|
517
|
+
)
|
|
518
|
+
else:
|
|
519
|
+
async with self.pool.acquire() as conn:
|
|
520
|
+
await conn.execute(
|
|
521
|
+
"""
|
|
522
|
+
INSERT INTO graph_relations (id, relation_type, source_id, target_id, properties, weight)
|
|
523
|
+
VALUES ($1, $2, $3, $4, $5::jsonb, $6)
|
|
524
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
525
|
+
relation_type = EXCLUDED.relation_type,
|
|
526
|
+
source_id = EXCLUDED.source_id,
|
|
527
|
+
target_id = EXCLUDED.target_id,
|
|
528
|
+
properties = EXCLUDED.properties,
|
|
529
|
+
weight = EXCLUDED.weight,
|
|
530
|
+
updated_at = CURRENT_TIMESTAMP
|
|
531
|
+
""",
|
|
532
|
+
relation.id,
|
|
533
|
+
relation.relation_type,
|
|
534
|
+
relation.source_id,
|
|
535
|
+
relation.target_id,
|
|
536
|
+
properties_json,
|
|
537
|
+
relation.weight,
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
async def get_relation(self, relation_id: str) -> Optional[Relation]:
|
|
541
|
+
"""Get relation from PostgreSQL database"""
|
|
542
|
+
if not self._is_initialized:
|
|
543
|
+
raise RuntimeError("GraphStore not initialized")
|
|
544
|
+
|
|
545
|
+
if self._transaction_conn:
|
|
546
|
+
conn = self._transaction_conn
|
|
547
|
+
row = await conn.fetchrow(
|
|
548
|
+
"""
|
|
549
|
+
SELECT id, relation_type, source_id, target_id, properties, weight
|
|
550
|
+
FROM graph_relations
|
|
551
|
+
WHERE id = $1
|
|
552
|
+
""",
|
|
553
|
+
relation_id,
|
|
554
|
+
)
|
|
555
|
+
else:
|
|
556
|
+
async with self.pool.acquire() as conn:
|
|
557
|
+
row = await conn.fetchrow(
|
|
558
|
+
"""
|
|
559
|
+
SELECT id, relation_type, source_id, target_id, properties, weight
|
|
560
|
+
FROM graph_relations
|
|
561
|
+
WHERE id = $1
|
|
562
|
+
""",
|
|
563
|
+
relation_id,
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
if not row:
|
|
567
|
+
return None
|
|
568
|
+
|
|
569
|
+
properties = (
|
|
570
|
+
json.loads(row["properties"])
|
|
571
|
+
if isinstance(row["properties"], str)
|
|
572
|
+
else row["properties"]
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
return Relation(
|
|
576
|
+
id=row["id"],
|
|
577
|
+
relation_type=row["relation_type"],
|
|
578
|
+
source_id=row["source_id"],
|
|
579
|
+
target_id=row["target_id"],
|
|
580
|
+
properties=properties,
|
|
581
|
+
weight=float(row["weight"]) if row["weight"] else 1.0,
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
async def delete_relation(self, relation_id: str) -> None:
|
|
585
|
+
"""Delete relation from PostgreSQL database"""
|
|
586
|
+
if not self._is_initialized:
|
|
587
|
+
raise RuntimeError("GraphStore not initialized")
|
|
588
|
+
|
|
589
|
+
if self._transaction_conn:
|
|
590
|
+
conn = self._transaction_conn
|
|
591
|
+
result = await conn.execute("DELETE FROM graph_relations WHERE id = $1", relation_id)
|
|
592
|
+
else:
|
|
593
|
+
async with self.pool.acquire() as conn:
|
|
594
|
+
result = await conn.execute(
|
|
595
|
+
"DELETE FROM graph_relations WHERE id = $1", relation_id
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
if result == "DELETE 0":
|
|
599
|
+
raise ValueError(f"Relation with ID '{relation_id}' not found")
|
|
600
|
+
|
|
601
|
+
async def get_neighbors(
|
|
602
|
+
self,
|
|
603
|
+
entity_id: str,
|
|
604
|
+
relation_type: Optional[str] = None,
|
|
605
|
+
direction: str = "outgoing",
|
|
606
|
+
) -> List[Entity]:
|
|
607
|
+
"""Get neighboring entities (optimized with SQL)"""
|
|
608
|
+
if not self._is_initialized:
|
|
609
|
+
raise RuntimeError("GraphStore not initialized")
|
|
610
|
+
|
|
611
|
+
# Build query based on direction
|
|
612
|
+
if direction == "outgoing":
|
|
613
|
+
query = """
|
|
614
|
+
SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
|
|
615
|
+
FROM graph_entities e
|
|
616
|
+
JOIN graph_relations r ON e.id = r.target_id
|
|
617
|
+
WHERE r.source_id = $1
|
|
618
|
+
"""
|
|
619
|
+
elif direction == "incoming":
|
|
620
|
+
query = """
|
|
621
|
+
SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
|
|
622
|
+
FROM graph_entities e
|
|
623
|
+
JOIN graph_relations r ON e.id = r.source_id
|
|
624
|
+
WHERE r.target_id = $1
|
|
625
|
+
"""
|
|
626
|
+
else: # both
|
|
627
|
+
query = """
|
|
628
|
+
SELECT DISTINCT e.id, e.entity_type, e.properties, e.embedding
|
|
629
|
+
FROM graph_entities e
|
|
630
|
+
WHERE e.id IN (
|
|
631
|
+
SELECT target_id FROM graph_relations WHERE source_id = $1
|
|
632
|
+
UNION
|
|
633
|
+
SELECT source_id FROM graph_relations WHERE target_id = $1
|
|
634
|
+
)
|
|
635
|
+
"""
|
|
636
|
+
|
|
637
|
+
# Add relation type filter if specified
|
|
638
|
+
params = [entity_id]
|
|
639
|
+
if relation_type:
|
|
640
|
+
if direction == "both":
|
|
641
|
+
query = query.replace(
|
|
642
|
+
"SELECT target_id FROM graph_relations WHERE source_id = $1",
|
|
643
|
+
"SELECT target_id FROM graph_relations WHERE source_id = $1 AND relation_type = $2",
|
|
644
|
+
)
|
|
645
|
+
query = query.replace(
|
|
646
|
+
"SELECT source_id FROM graph_relations WHERE target_id = $1",
|
|
647
|
+
"SELECT source_id FROM graph_relations WHERE target_id = $1 AND relation_type = $2",
|
|
648
|
+
)
|
|
649
|
+
else:
|
|
650
|
+
query += " AND r.relation_type = $2"
|
|
651
|
+
params.append(relation_type)
|
|
652
|
+
|
|
653
|
+
if self._transaction_conn:
|
|
654
|
+
conn = self._transaction_conn
|
|
655
|
+
rows = await conn.fetch(query, *params)
|
|
656
|
+
else:
|
|
657
|
+
async with self.pool.acquire() as conn:
|
|
658
|
+
rows = await conn.fetch(query, *params)
|
|
659
|
+
|
|
660
|
+
entities = []
|
|
661
|
+
for row in rows:
|
|
662
|
+
properties = (
|
|
663
|
+
json.loads(row["properties"])
|
|
664
|
+
if isinstance(row["properties"], str)
|
|
665
|
+
else row["properties"]
|
|
666
|
+
)
|
|
667
|
+
embedding = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
|
|
668
|
+
entities.append(
|
|
669
|
+
Entity(
|
|
670
|
+
id=row["id"],
|
|
671
|
+
entity_type=row["entity_type"],
|
|
672
|
+
properties=properties,
|
|
673
|
+
embedding=embedding,
|
|
674
|
+
)
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
return entities
|
|
678
|
+
|
|
679
|
+
async def get_all_entities(
|
|
680
|
+
self, entity_type: Optional[str] = None, limit: Optional[int] = None
|
|
681
|
+
) -> List[Entity]:
|
|
682
|
+
"""Get all entities, optionally filtered by type"""
|
|
683
|
+
if not self._is_initialized:
|
|
684
|
+
raise RuntimeError("GraphStore not initialized")
|
|
685
|
+
|
|
686
|
+
query = "SELECT id, entity_type, properties, embedding FROM graph_entities"
|
|
687
|
+
params = []
|
|
688
|
+
|
|
689
|
+
if entity_type:
|
|
690
|
+
query += " WHERE entity_type = $1"
|
|
691
|
+
params.append(entity_type)
|
|
692
|
+
|
|
693
|
+
if limit:
|
|
694
|
+
query += f" LIMIT ${len(params) + 1}"
|
|
695
|
+
params.append(limit)
|
|
696
|
+
|
|
697
|
+
if self._transaction_conn:
|
|
698
|
+
conn = self._transaction_conn
|
|
699
|
+
rows = await conn.fetch(query, *params)
|
|
700
|
+
else:
|
|
701
|
+
async with self.pool.acquire() as conn:
|
|
702
|
+
rows = await conn.fetch(query, *params)
|
|
703
|
+
|
|
704
|
+
entities = []
|
|
705
|
+
for row in rows:
|
|
706
|
+
properties = (
|
|
707
|
+
json.loads(row["properties"])
|
|
708
|
+
if isinstance(row["properties"], str)
|
|
709
|
+
else row["properties"]
|
|
710
|
+
)
|
|
711
|
+
embedding = self._deserialize_embedding(row["embedding"]) if row["embedding"] else None
|
|
712
|
+
entities.append(
|
|
713
|
+
Entity(
|
|
714
|
+
id=row["id"],
|
|
715
|
+
entity_type=row["entity_type"],
|
|
716
|
+
properties=properties,
|
|
717
|
+
embedding=embedding,
|
|
718
|
+
)
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
return entities
|
|
722
|
+
|
|
723
|
+
async def get_stats(self) -> Dict[str, Any]:
|
|
724
|
+
"""Get graph statistics"""
|
|
725
|
+
if not self._is_initialized:
|
|
726
|
+
raise RuntimeError("GraphStore not initialized")
|
|
727
|
+
|
|
728
|
+
if self._transaction_conn:
|
|
729
|
+
conn = self._transaction_conn
|
|
730
|
+
entity_count = await conn.fetchval("SELECT COUNT(*) FROM graph_entities")
|
|
731
|
+
relation_count = await conn.fetchval("SELECT COUNT(*) FROM graph_relations")
|
|
732
|
+
entity_types = await conn.fetch(
|
|
733
|
+
"SELECT entity_type, COUNT(*) as count FROM graph_entities GROUP BY entity_type"
|
|
734
|
+
)
|
|
735
|
+
relation_types = await conn.fetch(
|
|
736
|
+
"SELECT relation_type, COUNT(*) as count FROM graph_relations GROUP BY relation_type"
|
|
737
|
+
)
|
|
738
|
+
else:
|
|
739
|
+
async with self.pool.acquire() as conn:
|
|
740
|
+
entity_count = await conn.fetchval("SELECT COUNT(*) FROM graph_entities")
|
|
741
|
+
relation_count = await conn.fetchval("SELECT COUNT(*) FROM graph_relations")
|
|
742
|
+
entity_types = await conn.fetch(
|
|
743
|
+
"SELECT entity_type, COUNT(*) as count FROM graph_entities GROUP BY entity_type"
|
|
744
|
+
)
|
|
745
|
+
relation_types = await conn.fetch(
|
|
746
|
+
"SELECT relation_type, COUNT(*) as count FROM graph_relations GROUP BY relation_type"
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
return {
|
|
750
|
+
"entity_count": entity_count,
|
|
751
|
+
"relation_count": relation_count,
|
|
752
|
+
"entity_types": {row["entity_type"]: row["count"] for row in entity_types},
|
|
753
|
+
"relation_types": {row["relation_type"]: row["count"] for row in relation_types},
|
|
754
|
+
"backend": "postgresql",
|
|
755
|
+
"pool_size": (f"{self.pool.get_size()}/{self.max_pool_size}" if self.pool else "0/0"),
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
# =========================================================================
|
|
759
|
+
# Tier 2: Advanced Interface (PostgreSQL-optimized with recursive CTEs)
|
|
760
|
+
# =========================================================================
|
|
761
|
+
|
|
762
|
+
async def find_paths(
|
|
763
|
+
self,
|
|
764
|
+
source_id: str,
|
|
765
|
+
target_id: str,
|
|
766
|
+
max_depth: int = 3,
|
|
767
|
+
limit: Optional[int] = 10,
|
|
768
|
+
) -> List[Path]:
|
|
769
|
+
"""
|
|
770
|
+
Find paths using WITH RECURSIVE CTE (PostgreSQL-optimized)
|
|
771
|
+
|
|
772
|
+
This overrides the default implementation with an efficient
|
|
773
|
+
recursive SQL query.
|
|
774
|
+
"""
|
|
775
|
+
if not self._is_initialized:
|
|
776
|
+
raise RuntimeError("GraphStore not initialized")
|
|
777
|
+
|
|
778
|
+
# Recursive CTE to find all paths
|
|
779
|
+
query = """
|
|
780
|
+
WITH RECURSIVE paths AS (
|
|
781
|
+
-- Base case: direct connections
|
|
782
|
+
SELECT
|
|
783
|
+
r.source_id,
|
|
784
|
+
r.target_id,
|
|
785
|
+
r.relation_type,
|
|
786
|
+
ARRAY[r.source_id] as path_nodes,
|
|
787
|
+
ARRAY[r.id] as path_relations,
|
|
788
|
+
1 as depth
|
|
789
|
+
FROM graph_relations r
|
|
790
|
+
WHERE r.source_id = $1
|
|
791
|
+
|
|
792
|
+
UNION ALL
|
|
793
|
+
|
|
794
|
+
-- Recursive case: extend paths
|
|
795
|
+
SELECT
|
|
796
|
+
p.source_id,
|
|
797
|
+
r.target_id,
|
|
798
|
+
r.relation_type,
|
|
799
|
+
p.path_nodes || r.source_id,
|
|
800
|
+
p.path_relations || r.id,
|
|
801
|
+
p.depth + 1
|
|
802
|
+
FROM paths p
|
|
803
|
+
JOIN graph_relations r ON p.target_id = r.source_id
|
|
804
|
+
WHERE p.depth < $3
|
|
805
|
+
AND NOT (r.source_id = ANY(p.path_nodes)) -- Avoid cycles
|
|
806
|
+
)
|
|
807
|
+
SELECT DISTINCT
|
|
808
|
+
path_nodes || target_id as nodes,
|
|
809
|
+
path_relations as relations,
|
|
810
|
+
depth
|
|
811
|
+
FROM paths
|
|
812
|
+
WHERE target_id = $2
|
|
813
|
+
ORDER BY depth ASC
|
|
814
|
+
LIMIT $4
|
|
815
|
+
"""
|
|
816
|
+
|
|
817
|
+
if self._transaction_conn:
|
|
818
|
+
conn = self._transaction_conn
|
|
819
|
+
rows = await conn.fetch(query, source_id, target_id, max_depth, limit or 10)
|
|
820
|
+
else:
|
|
821
|
+
async with self.pool.acquire() as conn:
|
|
822
|
+
rows = await conn.fetch(query, source_id, target_id, max_depth, limit or 10)
|
|
823
|
+
|
|
824
|
+
paths = []
|
|
825
|
+
for row in rows:
|
|
826
|
+
node_ids = row["nodes"]
|
|
827
|
+
relation_ids = row["relations"]
|
|
828
|
+
|
|
829
|
+
# Fetch entities and relations
|
|
830
|
+
entities = []
|
|
831
|
+
for node_id in node_ids:
|
|
832
|
+
entity = await self.get_entity(node_id)
|
|
833
|
+
if entity:
|
|
834
|
+
entities.append(entity)
|
|
835
|
+
|
|
836
|
+
relations = []
|
|
837
|
+
for rel_id in relation_ids:
|
|
838
|
+
relation = await self.get_relation(rel_id)
|
|
839
|
+
if relation:
|
|
840
|
+
relations.append(relation)
|
|
841
|
+
|
|
842
|
+
if entities and relations:
|
|
843
|
+
paths.append(Path(nodes=entities, edges=relations))
|
|
844
|
+
|
|
845
|
+
return paths
|
|
846
|
+
|
|
847
|
+
# =========================================================================
|
|
848
|
+
# Helper methods
|
|
849
|
+
# =========================================================================
|
|
850
|
+
|
|
851
|
+
def _serialize_embedding(self, embedding) -> Optional[bytes]:
|
|
852
|
+
"""Serialize numpy array or list to bytes"""
|
|
853
|
+
if embedding is None:
|
|
854
|
+
return None
|
|
855
|
+
# Handle both numpy array and list
|
|
856
|
+
if isinstance(embedding, np.ndarray):
|
|
857
|
+
return embedding.tobytes()
|
|
858
|
+
elif isinstance(embedding, (list, tuple)):
|
|
859
|
+
# Convert list to numpy array first
|
|
860
|
+
arr = np.array(embedding, dtype=np.float32)
|
|
861
|
+
return arr.tobytes()
|
|
862
|
+
else:
|
|
863
|
+
# Try to convert to numpy array
|
|
864
|
+
arr = np.array(embedding, dtype=np.float32)
|
|
865
|
+
return arr.tobytes()
|
|
866
|
+
|
|
867
|
+
def _deserialize_embedding(self, data: bytes) -> Optional[np.ndarray]:
|
|
868
|
+
"""Deserialize bytes to numpy array"""
|
|
869
|
+
if not data:
|
|
870
|
+
return None
|
|
871
|
+
return np.frombuffer(data, dtype=np.float32)
|