aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLite Graph Storage Backend
|
|
3
|
+
|
|
4
|
+
Provides file-based persistent graph storage using SQLite.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import aiosqlite
|
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from contextlib import asynccontextmanager
|
|
12
|
+
|
|
13
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
14
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
15
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# SQL Schema for SQLite graph storage
|
|
19
|
+
SCHEMA_SQL = """
|
|
20
|
+
-- Entities table
|
|
21
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
22
|
+
id TEXT PRIMARY KEY,
|
|
23
|
+
entity_type TEXT NOT NULL,
|
|
24
|
+
properties TEXT NOT NULL, -- JSON
|
|
25
|
+
embedding BLOB, -- Vector embedding (serialized)
|
|
26
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
27
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
-- Relations table
|
|
31
|
+
CREATE TABLE IF NOT EXISTS relations (
|
|
32
|
+
id TEXT PRIMARY KEY,
|
|
33
|
+
relation_type TEXT NOT NULL,
|
|
34
|
+
source_id TEXT NOT NULL,
|
|
35
|
+
target_id TEXT NOT NULL,
|
|
36
|
+
properties TEXT NOT NULL, -- JSON
|
|
37
|
+
weight REAL DEFAULT 1.0,
|
|
38
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
39
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
40
|
+
FOREIGN KEY (source_id) REFERENCES entities(id) ON DELETE CASCADE,
|
|
41
|
+
FOREIGN KEY (target_id) REFERENCES entities(id) ON DELETE CASCADE
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
-- Indexes for performance
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type);
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_relations_type ON relations(relation_type);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_relations_source ON relations(source_id);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_relations_target ON relations(target_id);
|
|
49
|
+
CREATE INDEX IF NOT EXISTS idx_relations_source_target ON relations(source_id, target_id);
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SQLiteGraphStore(GraphStore):
|
|
54
|
+
"""
|
|
55
|
+
SQLite-based graph storage implementation
|
|
56
|
+
|
|
57
|
+
Provides persistent file-based graph storage with:
|
|
58
|
+
- ACID transactions
|
|
59
|
+
- SQL-optimized queries
|
|
60
|
+
- Optional recursive CTEs for traversal
|
|
61
|
+
- Connection pooling
|
|
62
|
+
|
|
63
|
+
Features:
|
|
64
|
+
- File-based persistence (single .db file)
|
|
65
|
+
- Automatic schema initialization
|
|
66
|
+
- Efficient SQL queries for graph operations
|
|
67
|
+
- Optional Tier 2 optimizations
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
```python
|
|
71
|
+
store = SQLiteGraphStore("knowledge_graph.db")
|
|
72
|
+
await store.initialize()
|
|
73
|
+
|
|
74
|
+
entity = Entity(id="e1", entity_type="Person", properties={"name": "Alice"})
|
|
75
|
+
await store.add_entity(entity)
|
|
76
|
+
|
|
77
|
+
await store.close()
|
|
78
|
+
```
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self, db_path: str = ":memory:", **kwargs):
|
|
82
|
+
"""
|
|
83
|
+
Initialize SQLite graph store
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
db_path: Path to SQLite database file (":memory:" for in-memory)
|
|
87
|
+
**kwargs: Additional SQLite connection parameters
|
|
88
|
+
"""
|
|
89
|
+
super().__init__()
|
|
90
|
+
self.db_path = db_path
|
|
91
|
+
self.conn_kwargs = kwargs
|
|
92
|
+
self.conn: Optional[aiosqlite.Connection] = None
|
|
93
|
+
self._is_initialized = False
|
|
94
|
+
self._in_transaction = False
|
|
95
|
+
|
|
96
|
+
async def initialize(self):
|
|
97
|
+
"""Initialize SQLite database and create schema"""
|
|
98
|
+
# Create directory if needed
|
|
99
|
+
if self.db_path != ":memory:":
|
|
100
|
+
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
|
|
102
|
+
# Connect to database
|
|
103
|
+
self.conn = await aiosqlite.connect(self.db_path, **self.conn_kwargs)
|
|
104
|
+
|
|
105
|
+
# Enable foreign keys
|
|
106
|
+
await self.conn.execute("PRAGMA foreign_keys = ON")
|
|
107
|
+
|
|
108
|
+
# Create schema
|
|
109
|
+
await self.conn.executescript(SCHEMA_SQL)
|
|
110
|
+
await self.conn.commit()
|
|
111
|
+
|
|
112
|
+
self._is_initialized = True
|
|
113
|
+
|
|
114
|
+
async def close(self):
|
|
115
|
+
"""Close database connection"""
|
|
116
|
+
if self.conn:
|
|
117
|
+
await self.conn.close()
|
|
118
|
+
self.conn = None
|
|
119
|
+
self._is_initialized = False
|
|
120
|
+
|
|
121
|
+
@asynccontextmanager
|
|
122
|
+
async def transaction(self):
|
|
123
|
+
"""
|
|
124
|
+
Transaction context manager for atomic operations
|
|
125
|
+
|
|
126
|
+
Usage:
|
|
127
|
+
```python
|
|
128
|
+
async with store.transaction():
|
|
129
|
+
await store.add_entity(entity1)
|
|
130
|
+
await store.add_entity(entity2)
|
|
131
|
+
# Both entities added atomically
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Note: SQLite uses connection-level transactions. Within a transaction,
|
|
135
|
+
commits are deferred until the context exits successfully.
|
|
136
|
+
"""
|
|
137
|
+
if not self._is_initialized:
|
|
138
|
+
raise RuntimeError("GraphStore not initialized")
|
|
139
|
+
|
|
140
|
+
# Track transaction state to prevent auto-commits in operations
|
|
141
|
+
self._in_transaction = True
|
|
142
|
+
try:
|
|
143
|
+
# Begin transaction
|
|
144
|
+
await self.conn.execute("BEGIN")
|
|
145
|
+
yield
|
|
146
|
+
# Commit on success
|
|
147
|
+
await self.conn.commit()
|
|
148
|
+
except Exception:
|
|
149
|
+
# Rollback on error
|
|
150
|
+
await self.conn.rollback()
|
|
151
|
+
raise
|
|
152
|
+
finally:
|
|
153
|
+
self._in_transaction = False
|
|
154
|
+
|
|
155
|
+
# =========================================================================
|
|
156
|
+
# Tier 1: Basic Interface (SQL-optimized implementations)
|
|
157
|
+
# =========================================================================
|
|
158
|
+
|
|
159
|
+
async def add_entity(self, entity: Entity) -> None:
|
|
160
|
+
"""Add entity to SQLite database"""
|
|
161
|
+
if not self._is_initialized:
|
|
162
|
+
raise RuntimeError("GraphStore not initialized")
|
|
163
|
+
|
|
164
|
+
# Check if entity already exists
|
|
165
|
+
cursor = await self.conn.execute("SELECT id FROM entities WHERE id = ?", (entity.id,))
|
|
166
|
+
existing = await cursor.fetchone()
|
|
167
|
+
if existing:
|
|
168
|
+
raise ValueError(f"Entity with ID '{entity.id}' already exists")
|
|
169
|
+
|
|
170
|
+
# Serialize data
|
|
171
|
+
properties_json = json.dumps(entity.properties)
|
|
172
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
|
|
173
|
+
|
|
174
|
+
# Insert entity
|
|
175
|
+
await self.conn.execute(
|
|
176
|
+
"""
|
|
177
|
+
INSERT INTO entities (id, entity_type, properties, embedding)
|
|
178
|
+
VALUES (?, ?, ?, ?)
|
|
179
|
+
""",
|
|
180
|
+
(entity.id, entity.entity_type, properties_json, embedding_blob),
|
|
181
|
+
)
|
|
182
|
+
if not self._in_transaction:
|
|
183
|
+
await self.conn.commit()
|
|
184
|
+
|
|
185
|
+
async def get_entity(self, entity_id: str) -> Optional[Entity]:
|
|
186
|
+
"""Get entity from SQLite database"""
|
|
187
|
+
if not self._is_initialized:
|
|
188
|
+
raise RuntimeError("GraphStore not initialized")
|
|
189
|
+
|
|
190
|
+
cursor = await self.conn.execute(
|
|
191
|
+
"""
|
|
192
|
+
SELECT id, entity_type, properties, embedding
|
|
193
|
+
FROM entities
|
|
194
|
+
WHERE id = ?
|
|
195
|
+
""",
|
|
196
|
+
(entity_id,),
|
|
197
|
+
)
|
|
198
|
+
row = await cursor.fetchone()
|
|
199
|
+
|
|
200
|
+
if not row:
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
return self._row_to_entity(row)
|
|
204
|
+
|
|
205
|
+
async def update_entity(self, entity: Entity) -> Entity:
|
|
206
|
+
"""Update entity in SQLite database"""
|
|
207
|
+
if not self._is_initialized:
|
|
208
|
+
raise RuntimeError("GraphStore not initialized")
|
|
209
|
+
|
|
210
|
+
# Check if entity exists
|
|
211
|
+
existing = await self.get_entity(entity.id)
|
|
212
|
+
if not existing:
|
|
213
|
+
raise ValueError(f"Entity with ID '{entity.id}' does not exist")
|
|
214
|
+
|
|
215
|
+
# Serialize data
|
|
216
|
+
properties_json = json.dumps(entity.properties)
|
|
217
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if entity.embedding else None
|
|
218
|
+
|
|
219
|
+
# Update entity
|
|
220
|
+
await self.conn.execute(
|
|
221
|
+
"""
|
|
222
|
+
UPDATE entities
|
|
223
|
+
SET entity_type = ?, properties = ?, embedding = ?, updated_at = CURRENT_TIMESTAMP
|
|
224
|
+
WHERE id = ?
|
|
225
|
+
""",
|
|
226
|
+
(entity.entity_type, properties_json, embedding_blob, entity.id),
|
|
227
|
+
)
|
|
228
|
+
if not self._in_transaction:
|
|
229
|
+
await self.conn.commit()
|
|
230
|
+
|
|
231
|
+
return entity
|
|
232
|
+
|
|
233
|
+
async def delete_entity(self, entity_id: str):
|
|
234
|
+
"""Delete entity and its relations from SQLite database"""
|
|
235
|
+
if not self._is_initialized:
|
|
236
|
+
raise RuntimeError("GraphStore not initialized")
|
|
237
|
+
|
|
238
|
+
# Foreign key cascade will automatically delete relations
|
|
239
|
+
await self.conn.execute("DELETE FROM entities WHERE id = ?", (entity_id,))
|
|
240
|
+
if not self._in_transaction:
|
|
241
|
+
await self.conn.commit()
|
|
242
|
+
|
|
243
|
+
async def add_relation(self, relation: Relation) -> None:
|
|
244
|
+
"""Add relation to SQLite database"""
|
|
245
|
+
if not self._is_initialized:
|
|
246
|
+
raise RuntimeError("GraphStore not initialized")
|
|
247
|
+
|
|
248
|
+
# Check if relation already exists
|
|
249
|
+
cursor = await self.conn.execute("SELECT id FROM relations WHERE id = ?", (relation.id,))
|
|
250
|
+
existing = await cursor.fetchone()
|
|
251
|
+
if existing:
|
|
252
|
+
raise ValueError(f"Relation with ID '{relation.id}' already exists")
|
|
253
|
+
|
|
254
|
+
# Check if entities exist
|
|
255
|
+
source_exists = await self.get_entity(relation.source_id)
|
|
256
|
+
target_exists = await self.get_entity(relation.target_id)
|
|
257
|
+
if not source_exists:
|
|
258
|
+
raise ValueError(f"Source entity '{relation.source_id}' does not exist")
|
|
259
|
+
if not target_exists:
|
|
260
|
+
raise ValueError(f"Target entity '{relation.target_id}' does not exist")
|
|
261
|
+
|
|
262
|
+
# Serialize data
|
|
263
|
+
properties_json = json.dumps(relation.properties)
|
|
264
|
+
|
|
265
|
+
# Insert relation
|
|
266
|
+
await self.conn.execute(
|
|
267
|
+
"""
|
|
268
|
+
INSERT INTO relations (id, relation_type, source_id, target_id, properties, weight)
|
|
269
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
270
|
+
""",
|
|
271
|
+
(
|
|
272
|
+
relation.id,
|
|
273
|
+
relation.relation_type,
|
|
274
|
+
relation.source_id,
|
|
275
|
+
relation.target_id,
|
|
276
|
+
properties_json,
|
|
277
|
+
relation.weight,
|
|
278
|
+
),
|
|
279
|
+
)
|
|
280
|
+
if not self._in_transaction:
|
|
281
|
+
await self.conn.commit()
|
|
282
|
+
|
|
283
|
+
async def get_relation(self, relation_id: str) -> Optional[Relation]:
|
|
284
|
+
"""Get relation from SQLite database"""
|
|
285
|
+
if not self._is_initialized:
|
|
286
|
+
raise RuntimeError("GraphStore not initialized")
|
|
287
|
+
|
|
288
|
+
cursor = await self.conn.execute(
|
|
289
|
+
"""
|
|
290
|
+
SELECT id, relation_type, source_id, target_id, properties, weight
|
|
291
|
+
FROM relations
|
|
292
|
+
WHERE id = ?
|
|
293
|
+
""",
|
|
294
|
+
(relation_id,),
|
|
295
|
+
)
|
|
296
|
+
row = await cursor.fetchone()
|
|
297
|
+
|
|
298
|
+
if not row:
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
return self._row_to_relation(row)
|
|
302
|
+
|
|
303
|
+
async def update_relation(self, relation: Relation) -> Relation:
|
|
304
|
+
"""Update relation in SQLite database"""
|
|
305
|
+
if not self._is_initialized:
|
|
306
|
+
raise RuntimeError("GraphStore not initialized")
|
|
307
|
+
|
|
308
|
+
# Check if relation exists
|
|
309
|
+
existing = await self.get_relation(relation.id)
|
|
310
|
+
if not existing:
|
|
311
|
+
raise ValueError(f"Relation with ID '{relation.id}' does not exist")
|
|
312
|
+
|
|
313
|
+
# Serialize data
|
|
314
|
+
properties_json = json.dumps(relation.properties)
|
|
315
|
+
|
|
316
|
+
# Update relation
|
|
317
|
+
await self.conn.execute(
|
|
318
|
+
"""
|
|
319
|
+
UPDATE relations
|
|
320
|
+
SET relation_type = ?, source_id = ?, target_id = ?, properties = ?,
|
|
321
|
+
weight = ?, updated_at = CURRENT_TIMESTAMP
|
|
322
|
+
WHERE id = ?
|
|
323
|
+
""",
|
|
324
|
+
(
|
|
325
|
+
relation.relation_type,
|
|
326
|
+
relation.source_id,
|
|
327
|
+
relation.target_id,
|
|
328
|
+
properties_json,
|
|
329
|
+
relation.weight,
|
|
330
|
+
relation.id,
|
|
331
|
+
),
|
|
332
|
+
)
|
|
333
|
+
if not self._in_transaction:
|
|
334
|
+
await self.conn.commit()
|
|
335
|
+
|
|
336
|
+
return relation
|
|
337
|
+
|
|
338
|
+
async def delete_relation(self, relation_id: str):
|
|
339
|
+
"""Delete relation from SQLite database"""
|
|
340
|
+
if not self._is_initialized:
|
|
341
|
+
raise RuntimeError("GraphStore not initialized")
|
|
342
|
+
|
|
343
|
+
await self.conn.execute("DELETE FROM relations WHERE id = ?", (relation_id,))
|
|
344
|
+
if not self._in_transaction:
|
|
345
|
+
await self.conn.commit()
|
|
346
|
+
|
|
347
|
+
async def get_neighbors(
|
|
348
|
+
self,
|
|
349
|
+
entity_id: str,
|
|
350
|
+
relation_type: Optional[str] = None,
|
|
351
|
+
direction: str = "outgoing",
|
|
352
|
+
) -> List[Entity]:
|
|
353
|
+
"""
|
|
354
|
+
Get neighboring entities connected by relations
|
|
355
|
+
|
|
356
|
+
Implements the base GraphStore interface.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
entity_id: ID of entity to get neighbors for
|
|
360
|
+
relation_type: Optional filter by relation type
|
|
361
|
+
direction: "outgoing", "incoming", or "both"
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
List of neighboring entities
|
|
365
|
+
"""
|
|
366
|
+
if not self._is_initialized:
|
|
367
|
+
raise RuntimeError("GraphStore not initialized")
|
|
368
|
+
|
|
369
|
+
neighbors = []
|
|
370
|
+
|
|
371
|
+
# Build WHERE clause for relation type
|
|
372
|
+
type_filter = ""
|
|
373
|
+
params = [entity_id]
|
|
374
|
+
if relation_type:
|
|
375
|
+
type_filter = "AND r.relation_type = ?"
|
|
376
|
+
params.append(relation_type)
|
|
377
|
+
|
|
378
|
+
# Outgoing relations
|
|
379
|
+
if direction in ["outgoing", "both"]:
|
|
380
|
+
query = f"""
|
|
381
|
+
SELECT e.id, e.entity_type, e.properties, e.embedding
|
|
382
|
+
FROM relations r
|
|
383
|
+
JOIN entities e ON r.target_id = e.id
|
|
384
|
+
WHERE r.source_id = ? {type_filter}
|
|
385
|
+
"""
|
|
386
|
+
|
|
387
|
+
cursor = await self.conn.execute(query, params)
|
|
388
|
+
rows = await cursor.fetchall()
|
|
389
|
+
|
|
390
|
+
for row in rows:
|
|
391
|
+
entity = self._row_to_entity(row)
|
|
392
|
+
neighbors.append(entity)
|
|
393
|
+
|
|
394
|
+
# Incoming relations
|
|
395
|
+
if direction in ["incoming", "both"]:
|
|
396
|
+
params_incoming = [entity_id]
|
|
397
|
+
if relation_type:
|
|
398
|
+
params_incoming.append(relation_type)
|
|
399
|
+
|
|
400
|
+
query = f"""
|
|
401
|
+
SELECT e.id, e.entity_type, e.properties, e.embedding
|
|
402
|
+
FROM relations r
|
|
403
|
+
JOIN entities e ON r.source_id = e.id
|
|
404
|
+
WHERE r.target_id = ? {type_filter}
|
|
405
|
+
"""
|
|
406
|
+
|
|
407
|
+
cursor = await self.conn.execute(query, params_incoming)
|
|
408
|
+
rows = await cursor.fetchall()
|
|
409
|
+
|
|
410
|
+
for row in rows:
|
|
411
|
+
entity = self._row_to_entity(row)
|
|
412
|
+
neighbors.append(entity)
|
|
413
|
+
|
|
414
|
+
return neighbors
|
|
415
|
+
|
|
416
|
+
# =========================================================================
|
|
417
|
+
# Tier 2: Advanced Interface (SQL-optimized overrides)
|
|
418
|
+
# =========================================================================
|
|
419
|
+
|
|
420
|
+
async def vector_search(
|
|
421
|
+
self,
|
|
422
|
+
query_embedding: List[float],
|
|
423
|
+
entity_type: Optional[str] = None,
|
|
424
|
+
max_results: int = 10,
|
|
425
|
+
score_threshold: float = 0.0,
|
|
426
|
+
) -> List[Tuple[Entity, float]]:
|
|
427
|
+
"""
|
|
428
|
+
SQL-optimized vector similarity search
|
|
429
|
+
|
|
430
|
+
Performs cosine similarity search over entity embeddings stored in SQLite.
|
|
431
|
+
This implementation fetches all candidates and computes similarity in Python.
|
|
432
|
+
|
|
433
|
+
For production scale, consider:
|
|
434
|
+
- pgvector extension (PostgreSQL)
|
|
435
|
+
- Dedicated vector database (Qdrant, Milvus)
|
|
436
|
+
- Pre-computed ANN indexes
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
query_embedding: Query vector
|
|
440
|
+
entity_type: Optional filter by entity type
|
|
441
|
+
max_results: Maximum number of results to return
|
|
442
|
+
score_threshold: Minimum similarity score (0.0-1.0)
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
List of (entity, similarity_score) tuples, sorted descending
|
|
446
|
+
"""
|
|
447
|
+
if not self._is_initialized:
|
|
448
|
+
raise RuntimeError("GraphStore not initialized")
|
|
449
|
+
|
|
450
|
+
if not query_embedding:
|
|
451
|
+
raise ValueError("Query embedding cannot be empty")
|
|
452
|
+
|
|
453
|
+
# Build query with optional type filter
|
|
454
|
+
type_filter = "WHERE entity_type = ?" if entity_type else ""
|
|
455
|
+
params = [entity_type] if entity_type else []
|
|
456
|
+
|
|
457
|
+
query = f"""
|
|
458
|
+
SELECT id, entity_type, properties, embedding
|
|
459
|
+
FROM entities
|
|
460
|
+
{type_filter}
|
|
461
|
+
"""
|
|
462
|
+
|
|
463
|
+
cursor = await self.conn.execute(query, params)
|
|
464
|
+
rows = await cursor.fetchall()
|
|
465
|
+
|
|
466
|
+
# Compute similarities
|
|
467
|
+
scored_entities = []
|
|
468
|
+
for row in rows:
|
|
469
|
+
entity = self._row_to_entity(row)
|
|
470
|
+
|
|
471
|
+
# Skip entities without embeddings
|
|
472
|
+
if not entity.embedding:
|
|
473
|
+
continue
|
|
474
|
+
|
|
475
|
+
# Compute cosine similarity
|
|
476
|
+
similarity = self._cosine_similarity(query_embedding, entity.embedding)
|
|
477
|
+
|
|
478
|
+
# Filter by minimum score (score_threshold)
|
|
479
|
+
if similarity >= score_threshold:
|
|
480
|
+
scored_entities.append((entity, similarity))
|
|
481
|
+
|
|
482
|
+
# Sort by score descending and return top max_results
|
|
483
|
+
scored_entities.sort(key=lambda x: x[1], reverse=True)
|
|
484
|
+
return scored_entities[:max_results]
|
|
485
|
+
|
|
486
|
+
async def traverse(
|
|
487
|
+
self,
|
|
488
|
+
start_entity_id: str,
|
|
489
|
+
relation_type: Optional[str] = None,
|
|
490
|
+
max_depth: int = 3,
|
|
491
|
+
max_results: int = 100,
|
|
492
|
+
) -> List[Path]:
|
|
493
|
+
"""
|
|
494
|
+
SQL-optimized traversal using recursive CTE
|
|
495
|
+
|
|
496
|
+
This overrides the default Tier 2 implementation for better performance.
|
|
497
|
+
Uses recursive CTEs in SQLite for efficient graph traversal.
|
|
498
|
+
"""
|
|
499
|
+
if not self._is_initialized:
|
|
500
|
+
raise RuntimeError("GraphStore not initialized")
|
|
501
|
+
|
|
502
|
+
# For SQLite, we'll use the default implementation from base class
|
|
503
|
+
# which uses BFS with get_neighbors(). While recursive CTEs are powerful,
|
|
504
|
+
# building full Path objects with them is complex. The default is sufficient.
|
|
505
|
+
# Backends with native graph query languages (e.g., Neo4j with Cypher)
|
|
506
|
+
# should override this for better performance.
|
|
507
|
+
return await self._default_traverse_bfs(
|
|
508
|
+
start_entity_id, relation_type, max_depth, max_results
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# =========================================================================
|
|
512
|
+
# Helper Methods
|
|
513
|
+
# =========================================================================
|
|
514
|
+
|
|
515
|
+
def _row_to_entity(self, row: tuple) -> Entity:
|
|
516
|
+
"""Convert database row to Entity object"""
|
|
517
|
+
entity_id, entity_type, properties_json, embedding_blob = row
|
|
518
|
+
|
|
519
|
+
properties = json.loads(properties_json)
|
|
520
|
+
embedding = self._deserialize_embedding(embedding_blob) if embedding_blob else None
|
|
521
|
+
|
|
522
|
+
return Entity(
|
|
523
|
+
id=entity_id,
|
|
524
|
+
entity_type=entity_type,
|
|
525
|
+
properties=properties,
|
|
526
|
+
embedding=embedding,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
def _row_to_relation(self, row: tuple) -> Relation:
|
|
530
|
+
"""Convert database row to Relation object"""
|
|
531
|
+
rel_id, rel_type, source_id, target_id, properties_json, weight = row
|
|
532
|
+
|
|
533
|
+
properties = json.loads(properties_json)
|
|
534
|
+
|
|
535
|
+
return Relation(
|
|
536
|
+
id=rel_id,
|
|
537
|
+
relation_type=rel_type,
|
|
538
|
+
source_id=source_id,
|
|
539
|
+
target_id=target_id,
|
|
540
|
+
properties=properties,
|
|
541
|
+
weight=weight,
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
def _serialize_embedding(self, embedding: List[float]) -> bytes:
|
|
545
|
+
"""Serialize embedding vector to bytes"""
|
|
546
|
+
import struct
|
|
547
|
+
|
|
548
|
+
return struct.pack(f"{len(embedding)}f", *embedding)
|
|
549
|
+
|
|
550
|
+
def _deserialize_embedding(self, blob: bytes) -> List[float]:
|
|
551
|
+
"""Deserialize embedding vector from bytes"""
|
|
552
|
+
import struct
|
|
553
|
+
|
|
554
|
+
count = len(blob) // 4 # 4 bytes per float
|
|
555
|
+
return list(struct.unpack(f"{count}f", blob))
|
|
556
|
+
|
|
557
|
+
def _cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
|
558
|
+
"""
|
|
559
|
+
Compute cosine similarity between two vectors
|
|
560
|
+
|
|
561
|
+
Returns value between -1 and 1, where 1 means identical direction.
|
|
562
|
+
Normalized to 0-1 range for consistency.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
vec1: First vector
|
|
566
|
+
vec2: Second vector
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
Cosine similarity (0.0-1.0)
|
|
570
|
+
"""
|
|
571
|
+
if len(vec1) != len(vec2):
|
|
572
|
+
return 0.0
|
|
573
|
+
|
|
574
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
575
|
+
magnitude1 = sum(a * a for a in vec1) ** 0.5
|
|
576
|
+
magnitude2 = sum(b * b for b in vec2) ** 0.5
|
|
577
|
+
|
|
578
|
+
if magnitude1 == 0 or magnitude2 == 0:
|
|
579
|
+
return 0.0
|
|
580
|
+
|
|
581
|
+
# Cosine similarity ranges from -1 to 1, normalize to 0 to 1
|
|
582
|
+
similarity = dot_product / (magnitude1 * magnitude2)
|
|
583
|
+
return (similarity + 1) / 2
|
|
584
|
+
|
|
585
|
+
async def get_stats(self) -> Dict[str, Any]:
|
|
586
|
+
"""Get statistics about the SQLite graph store"""
|
|
587
|
+
if not self._is_initialized:
|
|
588
|
+
raise RuntimeError("GraphStore not initialized")
|
|
589
|
+
|
|
590
|
+
# Count entities
|
|
591
|
+
cursor = await self.conn.execute("SELECT COUNT(*) FROM entities")
|
|
592
|
+
entity_count = (await cursor.fetchone())[0]
|
|
593
|
+
|
|
594
|
+
# Count relations
|
|
595
|
+
cursor = await self.conn.execute("SELECT COUNT(*) FROM relations")
|
|
596
|
+
relation_count = (await cursor.fetchone())[0]
|
|
597
|
+
|
|
598
|
+
# Database file size
|
|
599
|
+
file_size = 0
|
|
600
|
+
if self.db_path != ":memory:":
|
|
601
|
+
try:
|
|
602
|
+
file_size = Path(self.db_path).stat().st_size
|
|
603
|
+
except (OSError, ValueError):
|
|
604
|
+
pass
|
|
605
|
+
|
|
606
|
+
return {
|
|
607
|
+
"entity_count": entity_count,
|
|
608
|
+
"relation_count": relation_count,
|
|
609
|
+
"storage_type": "sqlite",
|
|
610
|
+
"db_path": self.db_path,
|
|
611
|
+
"db_size_bytes": file_size,
|
|
612
|
+
"is_initialized": self._is_initialized,
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
async def clear(self):
|
|
616
|
+
"""Clear all data from SQLite database"""
|
|
617
|
+
if not self._is_initialized:
|
|
618
|
+
raise RuntimeError("GraphStore not initialized")
|
|
619
|
+
|
|
620
|
+
await self.conn.execute("DELETE FROM relations")
|
|
621
|
+
await self.conn.execute("DELETE FROM entities")
|
|
622
|
+
if not self._in_transaction:
|
|
623
|
+
await self.conn.commit()
|